Skip to content

Commit a916b4c

Browse files
committed
Encapsulate FieldMetadata
1 parent 1daa5ed commit a916b4c

File tree

8 files changed

+150
-52
lines changed

8 files changed

+150
-52
lines changed

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use datafusion_common::{
4040
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, not_impl_err,
4141
plan_err, DFSchema, DataFusionError, Result, ScalarValue,
4242
};
43+
use datafusion_expr::expr::FieldMetadata;
4344
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
4445
use datafusion_expr::{
4546
lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody,
@@ -1535,7 +1536,7 @@ async fn test_metadata_based_udf_with_literal() -> Result<()> {
15351536
let df = ctx.sql("select 0;").await?.select(vec![
15361537
lit(5u64).alias_with_metadata("lit_with_doubling", Some(input_metadata.clone())),
15371538
lit(5u64).alias("lit_no_doubling"),
1538-
lit_with_metadata(5u64, Some(input_metadata))
1539+
lit_with_metadata(5u64, Some(FieldMetadata::from(input_metadata)))
15391540
.alias("lit_with_double_no_alias_metadata"),
15401541
])?;
15411542

datafusion/expr/src/expr.rs

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::logical_plan::Subquery;
3030
use crate::Volatility;
3131
use crate::{udaf, ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
3232

33-
use arrow::datatypes::{DataType, FieldRef};
33+
use arrow::datatypes::{DataType, Field, FieldRef};
3434
use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
3535
use datafusion_common::tree_node::{
3636
Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion,
@@ -284,8 +284,8 @@ pub enum Expr {
284284
Column(Column),
285285
/// A named reference to a variable in a registry.
286286
ScalarVariable(DataType, Vec<String>),
287-
/// A constant value along with associated metadata
288-
Literal(ScalarValue, Option<BTreeMap<String, String>>),
287+
/// A constant value along with associated [`FieldMetadata`].
288+
Literal(ScalarValue, Option<FieldMetadata>),
289289
/// A binary expression such as "age > 21"
290290
BinaryExpr(BinaryExpr),
291291
/// LIKE expression
@@ -413,6 +413,116 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr {
413413
}
414414
}
415415

416+
/// Literal metadata
417+
///
418+
/// This structure is used to store metadata associated with a literal expressions
419+
/// and is designed to be cheap to `clone`.
420+
///
421+
/// This structure is used to store metadata associated with a literal expression, and it
422+
/// corresponds to the `metadata` field on [`FieldRef`].
423+
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
424+
pub struct FieldMetadata {
425+
/// The inner metadata of a literal expression, which is a map of string
426+
/// keys to string values.
427+
///
428+
/// Note this is not a `HashMap because `HashMap` does not provide
429+
/// implementations for traits like `Debug` and `Hash`.
430+
inner: Arc<BTreeMap<String, String>>,
431+
}
432+
433+
impl FieldMetadata {
434+
/// Create a new empty metadata instance.
435+
pub fn new_empty() -> Self {
436+
Self {
437+
inner: Arc::new(BTreeMap::new()),
438+
}
439+
}
440+
441+
/// Create a new metadata instance from a `Field`'s metadata.
442+
pub fn new_from_field(field: &Field) -> Self {
443+
let inner = field
444+
.metadata()
445+
.iter()
446+
.map(|(k, v)| (k.to_string(), v.to_string()))
447+
.collect();
448+
Self {
449+
inner: Arc::new(inner),
450+
}
451+
}
452+
453+
/// Create a new metadata instance from a map of string keys to string values.
454+
pub fn new(inner: BTreeMap<String, String>) -> Self {
455+
Self {
456+
inner: Arc::new(inner),
457+
}
458+
}
459+
460+
/// Get the inner metadata as a reference to a `BTreeMap`.
461+
pub fn inner(&self) -> &BTreeMap<String, String> {
462+
&self.inner
463+
}
464+
465+
/// Return the inner metadata
466+
pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
467+
self.inner
468+
}
469+
470+
/// Adds metadata from `other` into `self`, overwriting any existing keys.
471+
pub fn extend(&mut self, other: Self) {
472+
let other = Arc::unwrap_or_clone(other.into_inner());
473+
Arc::make_mut(&mut self.inner).extend(other);
474+
}
475+
476+
/// Returns true if the metadata is empty.
477+
pub fn is_empty(&self) -> bool {
478+
self.inner.is_empty()
479+
}
480+
481+
/// Returns the number of key-value pairs in the metadata.
482+
pub fn len(&self) -> usize {
483+
self.inner.len()
484+
}
485+
486+
/// Updates the metadata on the Field with this metadata
487+
pub fn add_to_field(&self, field: Field) -> Field {
488+
field.with_metadata(
489+
self.inner
490+
.iter()
491+
.map(|(k, v)| (k.clone(), v.clone()))
492+
.collect(),
493+
)
494+
}
495+
}
496+
497+
impl From<&Field> for FieldMetadata {
498+
fn from(field: &Field) -> Self {
499+
Self::new_from_field(field)
500+
}
501+
}
502+
503+
impl From<BTreeMap<String, String>> for FieldMetadata {
504+
fn from(inner: BTreeMap<String, String>) -> Self {
505+
Self::new(inner)
506+
}
507+
}
508+
509+
impl From<std::collections::HashMap<String, String>> for FieldMetadata {
510+
fn from(map: std::collections::HashMap<String, String>) -> Self {
511+
Self::new(map.into_iter().collect())
512+
}
513+
}
514+
515+
/// From reference
516+
impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
517+
fn from(map: &std::collections::HashMap<String, String>) -> Self {
518+
let inner = map
519+
.iter()
520+
.map(|(k, v)| (k.to_string(), v.to_string()))
521+
.collect();
522+
Self::new(inner)
523+
}
524+
}
525+
416526
/// UNNEST expression.
417527
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
418528
pub struct Unnest {

datafusion/expr/src/expr_rewriter/mod.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -390,11 +390,7 @@ mod test {
390390
} else {
391391
utf8_val
392392
};
393-
Ok(Transformed::yes(lit_with_metadata(
394-
utf8_val,
395-
metadata
396-
.map(|m| m.into_iter().collect::<HashMap<String, String>>()),
397-
)))
393+
Ok(Transformed::yes(lit_with_metadata(utf8_val, metadata)))
398394
}
399395
// otherwise, return None
400396
_ => Ok(Transformed::no(expr)),

datafusion/expr/src/expr_schema.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -423,12 +423,7 @@ impl ExprSchemable for Expr {
423423
Expr::Literal(l, metadata) => {
424424
let mut field = Field::new(&schema_name, l.data_type(), l.is_null());
425425
if let Some(metadata) = metadata {
426-
field = field.with_metadata(
427-
metadata
428-
.iter()
429-
.map(|(k, v)| (k.clone(), v.clone()))
430-
.collect(),
431-
);
426+
field = metadata.add_to_field(field);
432427
}
433428
Ok(Arc::new(field))
434429
}

datafusion/expr/src/literal.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,34 +17,29 @@
1717

1818
//! Literal module contains foundational types that are used to represent literals in DataFusion.
1919
20+
use crate::expr::FieldMetadata;
2021
use crate::Expr;
2122
use datafusion_common::ScalarValue;
22-
use std::collections::HashMap;
2323

2424
/// Create a literal expression
2525
pub fn lit<T: Literal>(n: T) -> Expr {
2626
n.lit()
2727
}
2828

29-
pub fn lit_with_metadata<T: Literal>(
30-
n: T,
31-
metadata: impl Into<Option<HashMap<String, String>>>,
32-
) -> Expr {
33-
let metadata = metadata.into();
29+
pub fn lit_with_metadata<T: Literal>(n: T, metadata: Option<FieldMetadata>) -> Expr {
3430
let Some(metadata) = metadata else {
3531
return n.lit();
3632
};
3733

3834
let Expr::Literal(sv, prior_metadata) = n.lit() else {
3935
unreachable!();
4036
};
41-
4237
let new_metadata = match prior_metadata {
4338
Some(mut prior) => {
4439
prior.extend(metadata);
4540
prior
4641
}
47-
None => metadata.into_iter().collect(),
42+
None => metadata,
4843
};
4944

5045
Expr::Literal(sv, Some(new_metadata))

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! Expression simplification API
1919
2020
use std::borrow::Cow;
21-
use std::collections::{BTreeMap, HashSet};
21+
use std::collections::HashSet;
2222
use std::ops::Not;
2323

2424
use arrow::{
@@ -58,6 +58,7 @@ use crate::{
5858
analyzer::type_coercion::TypeCoercionRewriter,
5959
simplify_expressions::unwrap_cast::try_cast_literal_to_type,
6060
};
61+
use datafusion_expr::expr::FieldMetadata;
6162
use indexmap::IndexSet;
6263
use regex::Regex;
6364

@@ -523,9 +524,9 @@ struct ConstEvaluator<'a> {
523524
#[allow(clippy::large_enum_variant)]
524525
enum ConstSimplifyResult {
525526
// Expr was simplified and contains the new expression
526-
Simplified(ScalarValue, Option<BTreeMap<String, String>>),
527+
Simplified(ScalarValue, Option<FieldMetadata>),
527528
// Expr was not simplified and original value is returned
528-
NotSimplified(ScalarValue, Option<BTreeMap<String, String>>),
529+
NotSimplified(ScalarValue, Option<FieldMetadata>),
529530
// Evaluation encountered an error, contains the original expression
530531
SimplifyRuntimeError(DataFusionError, Expr),
531532
}
@@ -682,9 +683,7 @@ impl<'a> ConstEvaluator<'a> {
682683
let m = f.metadata();
683684
match m.is_empty() {
684685
true => None,
685-
false => {
686-
Some(m.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
687-
}
686+
false => Some(FieldMetadata::from(m)),
688687
}
689688
});
690689
let col_val = match phys_expr.evaluate(&self.input_batch) {

datafusion/physical-expr/src/expressions/literal.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
//! Literal expressions for physical operations
1919
2020
use std::any::Any;
21-
use std::collections::HashMap;
2221
use std::hash::Hash;
2322
use std::sync::Arc;
2423

@@ -30,6 +29,7 @@ use arrow::{
3029
record_batch::RecordBatch,
3130
};
3231
use datafusion_common::{Result, ScalarValue};
32+
use datafusion_expr::expr::FieldMetadata;
3333
use datafusion_expr::Expr;
3434
use datafusion_expr_common::columnar_value::ColumnarValue;
3535
use datafusion_expr_common::interval_arithmetic::Interval;
@@ -64,14 +64,13 @@ impl Literal {
6464
/// Create a literal value expression
6565
pub fn new_with_metadata(
6666
value: ScalarValue,
67-
metadata: impl Into<Option<HashMap<String, String>>>,
67+
metadata: Option<FieldMetadata>,
6868
) -> Self {
69-
let metadata = metadata.into();
7069
let mut field =
7170
Field::new(format!("{value}"), value.data_type(), value.is_null());
7271

7372
if let Some(metadata) = metadata {
74-
field = field.with_metadata(metadata);
73+
field = metadata.add_to_field(field);
7574
}
7675

7776
Self {

datafusion/physical-expr/src/planner.rs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::collections::HashMap;
1918
use std::sync::Arc;
2019

2120
use crate::ScalarFunctionExpr;
@@ -29,7 +28,9 @@ use datafusion_common::{
2928
exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema,
3029
};
3130
use datafusion_expr::execution_props::ExecutionProps;
32-
use datafusion_expr::expr::{Alias, Cast, InList, Placeholder, ScalarFunction};
31+
use datafusion_expr::expr::{
32+
Alias, Cast, FieldMetadata, InList, Placeholder, ScalarFunction,
33+
};
3334
use datafusion_expr::var_provider::is_system_variables;
3435
use datafusion_expr::var_provider::VarType;
3536
use datafusion_expr::{
@@ -114,22 +115,26 @@ pub fn create_physical_expr(
114115
match e {
115116
Expr::Alias(Alias { expr, metadata, .. }) => {
116117
if let Expr::Literal(v, prior_metadata) = expr.as_ref() {
117-
let mut new_metadata = prior_metadata
118-
.as_ref()
119-
.map(|m| {
120-
m.iter()
121-
.map(|(k, v)| (k.clone(), v.clone()))
122-
.collect::<HashMap<String, String>>()
123-
})
124-
.unwrap_or_default();
125-
if let Some(metadata) = metadata {
126-
new_metadata.extend(metadata.clone());
127-
}
128-
let new_metadata = match new_metadata.is_empty() {
129-
true => None,
130-
false => Some(new_metadata),
118+
let metadata = metadata.as_ref().map(|m| FieldMetadata::from(m.clone()));
119+
let new_metadata = match (prior_metadata.as_ref(), metadata) {
120+
(Some(m), Some(n)) => {
121+
let mut m = m.clone();
122+
m.extend(n);
123+
Some(m)
124+
}
125+
(Some(m), None) => Some(m.clone()),
126+
(None, Some(n)) => Some(n),
127+
(None, None) => None,
131128
};
132129

130+
let new_metadata = new_metadata.and_then(|new_metadata| {
131+
if new_metadata.is_empty() {
132+
None
133+
} else {
134+
Some(new_metadata)
135+
}
136+
});
137+
133138
Ok(Arc::new(Literal::new_with_metadata(
134139
v.clone(),
135140
new_metadata,
@@ -144,9 +149,7 @@ pub fn create_physical_expr(
144149
}
145150
Expr::Literal(value, metadata) => Ok(Arc::new(Literal::new_with_metadata(
146151
value.clone(),
147-
metadata
148-
.as_ref()
149-
.map(|m| m.iter().map(|(k, v)| (k.clone(), v.clone())).collect()),
152+
metadata.clone(),
150153
))),
151154
Expr::ScalarVariable(_, variable_names) => {
152155
if is_system_variables(variable_names) {

0 commit comments

Comments
 (0)