Skip to content

Commit 2e616f1

Browse files
jkosh44LiaCastaneda
authored andcommitted
Add support for Arrow Dictionary type in Substrait (apache#16608)
* Add support for Arrow Dictionary type in Substrait This commit adds support for the Arrow Dictionary type in Substrait plans. Resolves apache#16273 * Add more specific type variation consts (cherry picked from commit d359d64)
1 parent fc37f3b commit 2e616f1

File tree

3 files changed

+48
-20
lines changed

3 files changed

+48
-20
lines changed

datafusion/substrait/src/logical_plan/consumer/types.rs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ use super::SubstraitConsumer;
2121
use crate::variation_const::{
2222
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
2323
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
24-
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
24+
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF,
25+
DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF,
2526
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
2627
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
2728
LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
@@ -180,24 +181,32 @@ pub fn from_substrait_type(
180181
let value_type = map.value.as_ref().ok_or_else(|| {
181182
substrait_datafusion_err!("Map type must have value type")
182183
})?;
183-
let key_field = Arc::new(Field::new(
184-
"key",
185-
from_substrait_type(consumer, key_type, dfs_names, name_idx)?,
186-
false,
187-
));
188-
let value_field = Arc::new(Field::new(
189-
"value",
190-
from_substrait_type(consumer, value_type, dfs_names, name_idx)?,
191-
true,
192-
));
193-
Ok(DataType::Map(
194-
Arc::new(Field::new_struct(
195-
"entries",
196-
[key_field, value_field],
197-
false, // The inner map field is always non-nullable (Arrow #1697),
184+
let key_type =
185+
from_substrait_type(consumer, key_type, dfs_names, name_idx)?;
186+
let value_type =
187+
from_substrait_type(consumer, value_type, dfs_names, name_idx)?;
188+
189+
match map.type_variation_reference {
190+
DEFAULT_MAP_TYPE_VARIATION_REF => {
191+
let key_field = Arc::new(Field::new("key", key_type, false));
192+
let value_field = Arc::new(Field::new("value", value_type, true));
193+
Ok(DataType::Map(
194+
Arc::new(Field::new_struct(
195+
"entries",
196+
[key_field, value_field],
197+
false, // The inner map field is always non-nullable (Arrow #1697),
198+
)),
199+
false, // whether keys are sorted
200+
))
201+
}
202+
DICTIONARY_MAP_TYPE_VARIATION_REF => Ok(DataType::Dictionary(
203+
Box::new(key_type),
204+
Box::new(value_type),
198205
)),
199-
false, // whether keys are sorted
200-
))
206+
v => not_impl_err!(
207+
"Unsupported Substrait type variation {v} of type {s_kind:?}"
208+
),
209+
}
201210
}
202211
r#type::Kind::Decimal(d) => match d.type_variation_reference {
203212
DECIMAL_128_TYPE_VARIATION_REF => {

datafusion/substrait/src/logical_plan/producer/types.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ use crate::variation_const::TIMESTAMP_NANO_TYPE_VARIATION_REF;
2121
use crate::variation_const::{
2222
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
2323
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
24-
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
24+
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF,
25+
DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF,
2526
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
2627
VIEW_CONTAINER_TYPE_VARIATION_REF,
2728
};
@@ -235,13 +236,25 @@ pub(crate) fn to_substrait_type(
235236
kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
236237
key: Some(Box::new(key_type)),
237238
value: Some(Box::new(value_type)),
238-
type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF,
239+
type_variation_reference: DEFAULT_MAP_TYPE_VARIATION_REF,
239240
nullability,
240241
}))),
241242
})
242243
}
243244
_ => plan_err!("Map fields must contain a Struct with exactly 2 fields"),
244245
},
246+
DataType::Dictionary(key_type, value_type) => {
247+
let key_type = to_substrait_type(key_type, nullable)?;
248+
let value_type = to_substrait_type(value_type, nullable)?;
249+
Ok(substrait::proto::Type {
250+
kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
251+
key: Some(Box::new(key_type)),
252+
value: Some(Box::new(value_type)),
253+
type_variation_reference: DICTIONARY_MAP_TYPE_VARIATION_REF,
254+
nullability,
255+
}))),
256+
})
257+
}
245258
DataType::Struct(fields) => {
246259
let field_types = fields
247260
.iter()
@@ -365,6 +378,10 @@ mod tests {
365378
.into(),
366379
false,
367380
))?;
381+
round_trip_type(DataType::Dictionary(
382+
Box::new(DataType::Utf8),
383+
Box::new(DataType::Int32),
384+
))?;
368385

369386
round_trip_type(DataType::Struct(
370387
vec![

datafusion/substrait/src/variation_const.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ pub const DATE_64_TYPE_VARIATION_REF: u32 = 1;
5353
pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0;
5454
pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1;
5555
pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2;
56+
pub const DEFAULT_MAP_TYPE_VARIATION_REF: u32 = 0;
57+
pub const DICTIONARY_MAP_TYPE_VARIATION_REF: u32 = 1;
5658
pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0;
5759
pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;
5860

0 commit comments

Comments
 (0)