Skip to content

Commit 82327f4

Browse files
committed
Add support for Arrow Time types in Substrait
This commit adds support for the Arrow Dictionary type in Substrait plans. Resolves #16273
1 parent d66d6b9 commit 82327f4

File tree

3 files changed

+55
-27
lines changed

3 files changed

+55
-27
lines changed

datafusion/substrait/src/logical_plan/consumer/types.rs

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ use crate::variation_const::{
2222
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
2323
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
2424
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
25-
DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
26-
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
27-
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
28-
LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
29-
TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
30-
TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
31-
VIEW_CONTAINER_TYPE_VARIATION_REF,
25+
DEFAULT_TYPE_VARIATION_REF, DICTIONARY_CONTAINER_TYPE_VARIATION_REF,
26+
DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF,
27+
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
28+
INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF,
29+
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
30+
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
31+
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
3232
};
3333
use datafusion::arrow::datatypes::{
3434
DataType, Field, Fields, IntervalUnit, Schema, TimeUnit,
@@ -181,24 +181,32 @@ pub fn from_substrait_type(
181181
let value_type = map.value.as_ref().ok_or_else(|| {
182182
substrait_datafusion_err!("Map type must have value type")
183183
})?;
184-
let key_field = Arc::new(Field::new(
185-
"key",
186-
from_substrait_type(consumer, key_type, dfs_names, name_idx)?,
187-
false,
188-
));
189-
let value_field = Arc::new(Field::new(
190-
"value",
191-
from_substrait_type(consumer, value_type, dfs_names, name_idx)?,
192-
true,
193-
));
194-
Ok(DataType::Map(
195-
Arc::new(Field::new_struct(
196-
"entries",
197-
[key_field, value_field],
198-
false, // The inner map field is always non-nullable (Arrow #1697),
184+
let key_type =
185+
from_substrait_type(consumer, key_type, dfs_names, name_idx)?;
186+
let value_type =
187+
from_substrait_type(consumer, value_type, dfs_names, name_idx)?;
188+
189+
match map.type_variation_reference {
190+
DEFAULT_CONTAINER_TYPE_VARIATION_REF => {
191+
let key_field = Arc::new(Field::new("key", key_type, false));
192+
let value_field = Arc::new(Field::new("value", value_type, true));
193+
Ok(DataType::Map(
194+
Arc::new(Field::new_struct(
195+
"entries",
196+
[key_field, value_field],
197+
false, // The inner map field is always non-nullable (Arrow #1697),
198+
)),
199+
false, // whether keys are sorted
200+
))
201+
}
202+
DICTIONARY_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::Dictionary(
203+
Box::new(key_type),
204+
Box::new(value_type),
199205
)),
200-
false, // whether keys are sorted
201-
))
206+
v => not_impl_err!(
207+
"Unsupported Substrait type variation {v} of type {s_kind:?}"
208+
),
209+
}
202210
}
203211
r#type::Kind::Decimal(d) => match d.type_variation_reference {
204212
DECIMAL_128_TYPE_VARIATION_REF => {

datafusion/substrait/src/logical_plan/producer/types.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ use crate::variation_const::{
2020
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
2121
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
2222
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
23-
DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
24-
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
25-
VIEW_CONTAINER_TYPE_VARIATION_REF,
23+
DEFAULT_TYPE_VARIATION_REF, DICTIONARY_CONTAINER_TYPE_VARIATION_REF,
24+
DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, LARGE_CONTAINER_TYPE_VARIATION_REF,
25+
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
2626
};
2727
use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
2828
use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef};
@@ -271,6 +271,18 @@ pub(crate) fn to_substrait_type(
271271
}
272272
_ => plan_err!("Map fields must contain a Struct with exactly 2 fields"),
273273
},
274+
DataType::Dictionary(key_type, value_type) => {
275+
let key_type = to_substrait_type(key_type, nullable)?;
276+
let value_type = to_substrait_type(value_type, nullable)?;
277+
Ok(substrait::proto::Type {
278+
kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
279+
key: Some(Box::new(key_type)),
280+
value: Some(Box::new(value_type)),
281+
type_variation_reference: DICTIONARY_CONTAINER_TYPE_VARIATION_REF,
282+
nullability,
283+
}))),
284+
})
285+
}
274286
DataType::Struct(fields) => {
275287
let field_types = fields
276288
.iter()
@@ -391,6 +403,10 @@ mod tests {
391403
.into(),
392404
false,
393405
))?;
406+
round_trip_type(DataType::Dictionary(
407+
Box::new(DataType::Utf8),
408+
Box::new(DataType::Int32),
409+
))?;
394410

395411
round_trip_type(DataType::Struct(
396412
vec![

datafusion/substrait/src/variation_const.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ pub const DATE_64_TYPE_VARIATION_REF: u32 = 1;
5353
pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0;
5454
pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1;
5555
pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2;
56+
/// Used for the arrow type [`DataType::Map`].
57+
///
58+
/// [`DataType::Map`]: datafusion::arrow::datatypes::DataType::Map
59+
pub const DICTIONARY_CONTAINER_TYPE_VARIATION_REF: u32 = 3;
5660
pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0;
5761
pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;
5862
/// Used for the arrow type [`DataType::Interval`] with [`IntervalUnit::DayTime`].

0 commit comments

Comments
 (0)