Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ use datafusion_common::{
use datafusion_common_runtime::SpawnedTask;
use datafusion_execution::config::SessionConfig;
use datafusion_execution::runtime_env::RuntimeEnv;
use datafusion_expr::expr::{GroupingSet, Sort, WindowFunction};
use datafusion_expr::expr::{FieldMetadata, GroupingSet, Sort, WindowFunction};
use datafusion_expr::var_provider::{VarProvider, VarType};
use datafusion_expr::{
cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder,
Expand Down Expand Up @@ -5676,6 +5676,7 @@ async fn test_alias() -> Result<()> {
async fn test_alias_with_metadata() -> Result<()> {
let mut metadata = HashMap::new();
metadata.insert(String::from("k"), String::from("v"));
let metadata = FieldMetadata::from(metadata);
let df = create_test_table("test")
.await?
.select(vec![col("a").alias_with_metadata("b", Some(metadata))])?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1533,10 +1533,11 @@ async fn test_metadata_based_udf_with_literal() -> Result<()> {
[("modify_values".to_string(), "double_output".to_string())]
.into_iter()
.collect();
let input_metadata = FieldMetadata::from(input_metadata);
let df = ctx.sql("select 0;").await?.select(vec![
lit(5u64).alias_with_metadata("lit_with_doubling", Some(input_metadata.clone())),
lit(5u64).alias("lit_no_doubling"),
lit_with_metadata(5u64, Some(FieldMetadata::from(input_metadata)))
lit_with_metadata(5u64, Some(input_metadata))
.alias("lit_with_double_no_alias_metadata"),
])?;

Expand Down
57 changes: 41 additions & 16 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,9 @@ impl FieldMetadata {

/// Adds metadata from `other` into `self`, overwriting any existing keys.
pub fn extend(&mut self, other: Self) {
if other.is_empty() {
return;
}
let other = Arc::unwrap_or_clone(other.into_inner());
Arc::make_mut(&mut self.inner).extend(other);
}
Expand All @@ -531,18 +534,21 @@ impl FieldMetadata {
self.inner.len()
}

/// Convert this `FieldMetadata` into a `HashMap<String, String>`
pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
self.inner
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect()
}

/// Updates the metadata on the Field with this metadata, if it is not empty.
pub fn add_to_field(&self, field: Field) -> Field {
if self.inner.is_empty() {
return field;
}

field.with_metadata(
self.inner
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect(),
)
field.with_metadata(self.to_hashmap())
}
}

Expand Down Expand Up @@ -575,6 +581,24 @@ impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
}
}

/// From hashbrown map
impl From<HashMap<String, String>> for FieldMetadata {
fn from(map: HashMap<String, String>) -> Self {
let inner = map.into_iter().collect();
Self::new(inner)
}
}

impl From<&HashMap<String, String>> for FieldMetadata {
fn from(map: &HashMap<String, String>) -> Self {
let inner = map
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
Self::new(inner)
}
}

/// UNNEST expression.
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
pub struct Unnest {
Expand All @@ -601,7 +625,7 @@ pub struct Alias {
pub expr: Box<Expr>,
pub relation: Option<TableReference>,
pub name: String,
pub metadata: Option<std::collections::HashMap<String, String>>,
pub metadata: Option<FieldMetadata>,
Copy link
Contributor Author

@alamb alamb Jun 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is the point of the PR is to use the FieldMetadata struct introduced in this PR everywhere:

The rest of the changes in this PR are consequences of this change

}

impl Hash for Alias {
Expand Down Expand Up @@ -641,10 +665,7 @@ impl Alias {
}
}

pub fn with_metadata(
mut self,
metadata: Option<std::collections::HashMap<String, String>>,
) -> Self {
pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self {
self.metadata = metadata;
self
}
Expand Down Expand Up @@ -1591,15 +1612,17 @@ impl Expr {
/// # Example
/// ```
/// # use datafusion_expr::col;
/// use std::collections::HashMap;
/// # use std::collections::HashMap;
/// # use datafusion_expr::expr::FieldMetadata;
/// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
/// let metadata = FieldMetadata::from(metadata);
/// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
/// ```
///
pub fn alias_with_metadata(
self,
name: impl Into<String>,
metadata: Option<std::collections::HashMap<String, String>>,
metadata: Option<FieldMetadata>,
) -> Expr {
Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata))
}
Expand All @@ -1621,16 +1644,18 @@ impl Expr {
/// # Example
/// ```
/// # use datafusion_expr::col;
/// use std::collections::HashMap;
/// # use std::collections::HashMap;
/// # use datafusion_expr::expr::FieldMetadata;
/// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
/// let metadata = FieldMetadata::from(metadata);
/// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
/// ```
///
pub fn alias_qualified_with_metadata(
self,
relation: Option<impl Into<TableReference>>,
name: impl Into<String>,
metadata: Option<std::collections::HashMap<String, String>>,
metadata: Option<FieldMetadata>,
) -> Expr {
Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata))
}
Expand Down Expand Up @@ -3819,7 +3844,7 @@ mod test {
// If this test fails when you change `Expr`, please try
// `Box`ing the fields to make `Expr` smaller
// See https://github.com/apache/datafusion/issues/16199 for details
assert_eq!(size_of::<Expr>(), 144);
assert_eq!(size_of::<Expr>(), 128);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And Expr gets smaller!

assert_eq!(size_of::<ScalarValue>(), 64);
assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
assert_eq!(size_of::<Vec<Expr>>(), 24);
Expand Down
31 changes: 14 additions & 17 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

use super::{Between, Expr, Like};
use crate::expr::{
AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, InList,
InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, FieldMetadata,
InList, InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
WindowFunctionParams,
};
use crate::type_coercion::functions::{
Expand All @@ -34,7 +34,6 @@ use datafusion_common::{
};
use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
use std::collections::HashMap;
use std::sync::Arc;

/// Trait to allow expr to typable with respect to a schema
Expand All @@ -46,7 +45,7 @@ pub trait ExprSchemable {
fn nullable(&self, input_schema: &dyn ExprSchema) -> Result<bool>;

/// Given a schema, return the expr's optional metadata
fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, String>>;
fn metadata(&self, schema: &dyn ExprSchema) -> Result<FieldMetadata>;

/// Convert to a field with respect to a schema
fn to_field(
Expand Down Expand Up @@ -346,9 +345,9 @@ impl ExprSchemable for Expr {
}
}

fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, String>> {
fn metadata(&self, schema: &dyn ExprSchema) -> Result<FieldMetadata> {
self.to_field(schema)
.map(|(_, field)| field.metadata().clone())
.map(|(_, field)| FieldMetadata::from(field.metadata()))
}

/// Returns the datatype and nullability of the expression based on [ExprSchema].
Expand Down Expand Up @@ -405,12 +404,10 @@ impl ExprSchemable for Expr {

let mut combined_metadata = expr.metadata(schema)?;
if let Some(metadata) = metadata {
if !metadata.is_empty() {
combined_metadata.extend(metadata.clone());
}
combined_metadata.extend(metadata.clone());
}

Ok(Arc::new(field.with_metadata(combined_metadata)))
Ok(Arc::new(combined_metadata.add_to_field(field)))
}
Expr::Negative(expr) => expr.to_field(schema).map(|(_, f)| f),
Expr::Column(c) => schema.field_from_column(c).map(|f| Arc::new(f.clone())),
Expand Down Expand Up @@ -736,7 +733,7 @@ mod tests {
use super::*;
use crate::{col, lit};

use datafusion_common::{internal_err, DFSchema, ScalarValue};
use datafusion_common::{internal_err, DFSchema, HashMap, ScalarValue};

macro_rules! test_is_expr_nullable {
($EXPR_TYPE:ident) => {{
Expand Down Expand Up @@ -842,6 +839,7 @@ mod tests {
fn test_expr_metadata() {
let mut meta = HashMap::new();
meta.insert("bar".to_string(), "buzz".to_string());
let meta = FieldMetadata::from(meta);
let expr = col("foo");
let schema = MockExprSchema::new()
.with_data_type(DataType::Int32)
Expand All @@ -860,14 +858,13 @@ mod tests {
);

let schema = DFSchema::from_unqualified_fields(
vec![Field::new("foo", DataType::Int32, true).with_metadata(meta.clone())]
.into(),
HashMap::new(),
vec![meta.add_to_field(Field::new("foo", DataType::Int32, true))].into(),
std::collections::HashMap::new(),
)
.unwrap();

// verify to_field method populates metadata
assert_eq!(&meta, expr.to_field(&schema).unwrap().1.metadata());
assert_eq!(meta, expr.metadata(&schema).unwrap());
}

#[derive(Debug)]
Expand Down Expand Up @@ -899,8 +896,8 @@ mod tests {
self
}

fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
self.field = self.field.with_metadata(metadata);
fn with_metadata(mut self, metadata: FieldMetadata) -> Self {
self.field = metadata.add_to_field(self.field);
self
}
}
Expand Down
1 change: 0 additions & 1 deletion datafusion/physical-expr/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ pub fn create_physical_expr(
match e {
Expr::Alias(Alias { expr, metadata, .. }) => {
if let Expr::Literal(v, prior_metadata) = expr.as_ref() {
let metadata = metadata.as_ref().map(|m| FieldMetadata::from(m.clone()));
let new_metadata = FieldMetadata::merge_options(
prior_metadata.as_ref(),
metadata.as_ref(),
Expand Down
5 changes: 4 additions & 1 deletion datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,10 @@ pub fn serialize_expr(
.map(|r| vec![r.into()])
.unwrap_or(vec![]),
alias: name.to_owned(),
metadata: metadata.to_owned().unwrap_or(HashMap::new()),
metadata: metadata
.as_ref()
.map(|m| m.to_hashmap())
.unwrap_or(HashMap::new()),
});
protobuf::LogicalExprNode {
expr_type: Some(ExprType::Alias(alias)),
Expand Down
29 changes: 29 additions & 0 deletions docs/source/library-user-guide/upgrading.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,35 @@

# Upgrade Guides

## DataFusion `49.0.0`

### Metadata is now represented by `FieldMetadata`

Metadata from the Arrow `Field` is now stored using the `FieldMetadata`
structure. In prior versions it was stored as both a `HashMap<String, String>`
and a `BTreeMap<String, String>`. `FieldMetadata` is a easier to work with and
is more efficient.

To create `FieldMetadata` from a `Field`:

```rust
# /* comment to avoid running
let metadata = FieldMetadata::from(&field);
# */
```

To add metadata to a `Field`, use the `add_to_field` method:

```rust
# /* comment to avoid running
let updated_field = metadata.add_to_field(field);
# */
```

See [#16317] for details.

[#16317]: https://github.com/apache/datafusion/pull/16317

## DataFusion `48.0.0`

### `Expr::Literal` has optional metadata
Expand Down