Skip to content

Commit caeb4d2

Browse files
authored
feat: Improve DataType display for RunEndEncoded (#8596)
## Which issue does this PR close? - Closes #8351 ## Rationale for this change ## What changes are included in this PR? This PR refactors and improves the `Display` formatting for `DataType` by: - **Improving Union type display** - now shows field information with parentheses for clarity: `Union(Sparse, 0: ('a': Int32), 1: ('b': nullable Utf8))` - **Improving RunEndEncoded type display** - now properly formats both run_ends and values fields: `RunEndEncoded('run_ends': UInt32, 'values': nullable Int32)` ## Are these changes tested? Yes ## Are there any user-facing changes?
1 parent 11d8660 commit caeb4d2

File tree

1 file changed

+104
-24
lines changed

1 file changed

+104
-24
lines changed

arrow-schema/src/datatype_display.rs

Lines changed: 104 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ impl fmt::Display for DataType {
2929
}
3030
}
3131

32+
fn format_field(field: &crate::Field) -> String {
33+
let name = field.name();
34+
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
35+
let data_type = field.data_type();
36+
let metadata_str = format_metadata(field.metadata());
37+
format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
38+
}
39+
3240
// A lot of these can still be improved a lot.
3341
// _Some_ of these can be parsed with `FromStr`, but not all (YET!).
3442
// The goal is that the formatting should always be
@@ -122,13 +130,7 @@ impl fmt::Display for DataType {
122130
if !fields.is_empty() {
123131
let fields_str = fields
124132
.iter()
125-
.map(|field| {
126-
let name = field.name();
127-
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
128-
let data_type = field.data_type();
129-
let metadata_str = format_metadata(field.metadata());
130-
format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
131-
})
133+
.map(|field| format_field(field))
132134
.collect::<Vec<_>>()
133135
.join(", ");
134136
write!(f, "{fields_str}")?;
@@ -143,11 +145,8 @@ impl fmt::Display for DataType {
143145
.iter()
144146
.map(|v| {
145147
let type_id = v.0;
146-
let field = v.1;
147-
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
148-
let data_type = field.data_type();
149-
let metadata_str = format_metadata(field.metadata());
150-
format!("{type_id:?}: {maybe_nullable}{data_type}{metadata_str}")
148+
let field_str = format_field(v.1);
149+
format!("{type_id:?}: ({field_str})")
151150
})
152151
.collect::<Vec<_>>()
153152
.join(", ");
@@ -165,20 +164,19 @@ impl fmt::Display for DataType {
165164
Self::Decimal256(precision, scale) => write!(f, "Decimal256({precision}, {scale})"),
166165
Self::Map(field, sorted) => {
167166
write!(f, "Map(")?;
168-
let name = field.name();
169-
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
170-
let data_type = field.data_type();
171-
let metadata_str = format_metadata(field.metadata());
167+
let map_field_str = format_field(field);
172168
let keys_are_sorted = if *sorted { "sorted" } else { "unsorted" };
173169

174-
write!(
175-
f,
176-
"\"{name}\": {maybe_nullable}{data_type}{metadata_str}, {keys_are_sorted})"
177-
)?;
170+
write!(f, "{map_field_str}, {keys_are_sorted})")?;
178171
Ok(())
179172
}
180173
Self::RunEndEncoded(run_ends_field, values_field) => {
181-
write!(f, "RunEndEncoded({run_ends_field}, {values_field})")
174+
write!(f, "RunEndEncoded(")?;
175+
let run_ends_str = format_field(run_ends_field);
176+
let values_str = format_field(values_field);
177+
178+
write!(f, "{run_ends_str}, {values_str})")?;
179+
Ok(())
182180
}
183181
}
184182
}
@@ -391,7 +389,7 @@ mod tests {
391389

392390
let union_data_type = DataType::Union(union_fields, crate::UnionMode::Sparse);
393391
let union_data_type_string = union_data_type.to_string();
394-
let expected_string = "Union(Sparse, 0: Int32, 1: nullable Utf8)";
392+
let expected_string = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8))";
395393
assert_eq!(union_data_type_string, expected_string);
396394

397395
// Test with metadata
@@ -407,8 +405,7 @@ mod tests {
407405
let union_data_type_with_metadata =
408406
DataType::Union(union_fields_with_metadata, crate::UnionMode::Sparse);
409407
let union_data_type_with_metadata_string = union_data_type_with_metadata.to_string();
410-
let expected_string_with_metadata =
411-
"Union(Sparse, 0: Int32, 1: nullable Utf8, metadata: {\"key\": \"value\"})";
408+
let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8, metadata: {\"key\": \"value\"}))";
412409
assert_eq!(
413410
union_data_type_with_metadata_string,
414411
expected_string_with_metadata
@@ -456,4 +453,87 @@ mod tests {
456453
expected_string_with_metadata
457454
);
458455
}
456+
457+
#[test]
458+
fn test_display_run_end_encoded() {
459+
let run_ends_field = Arc::new(Field::new("run_ends", DataType::UInt32, false));
460+
let values_field = Arc::new(Field::new("values", DataType::Int32, true));
461+
let ree_data_type = DataType::RunEndEncoded(run_ends_field.clone(), values_field.clone());
462+
let ree_data_type_string = ree_data_type.to_string();
463+
let expected_string = "RunEndEncoded(\"run_ends\": UInt32, \"values\": nullable Int32)";
464+
assert_eq!(ree_data_type_string, expected_string);
465+
466+
// Test with metadata
467+
let mut run_ends_field_with_metadata = Field::new("run_ends", DataType::UInt32, false);
468+
let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
469+
run_ends_field_with_metadata.set_metadata(metadata);
470+
let ree_data_type_with_metadata =
471+
DataType::RunEndEncoded(Arc::new(run_ends_field_with_metadata), values_field.clone());
472+
let ree_data_type_with_metadata_string = ree_data_type_with_metadata.to_string();
473+
let expected_string_with_metadata = "RunEndEncoded(\"run_ends\": UInt32, metadata: {\"key\": \"value\"}, \"values\": nullable Int32)";
474+
assert_eq!(
475+
ree_data_type_with_metadata_string,
476+
expected_string_with_metadata
477+
);
478+
}
479+
480+
#[test]
481+
fn test_display_dictionary() {
482+
let dict_data_type =
483+
DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
484+
let dict_data_type_string = dict_data_type.to_string();
485+
let expected_string = "Dictionary(Int8, Utf8)";
486+
assert_eq!(dict_data_type_string, expected_string);
487+
488+
// Test with complex index and value types
489+
let complex_dict_data_type = DataType::Dictionary(
490+
Box::new(DataType::Int16),
491+
Box::new(DataType::Struct(
492+
vec![
493+
Field::new("a", DataType::Int32, false),
494+
Field::new("b", DataType::Utf8, true),
495+
]
496+
.into(),
497+
)),
498+
);
499+
let complex_dict_data_type_string = complex_dict_data_type.to_string();
500+
let expected_complex_string =
501+
"Dictionary(Int16, Struct(\"a\": Int32, \"b\": nullable Utf8))";
502+
assert_eq!(complex_dict_data_type_string, expected_complex_string);
503+
}
504+
505+
#[test]
506+
fn test_display_interval() {
507+
let interval_year_month = DataType::Interval(crate::IntervalUnit::YearMonth);
508+
let interval_year_month_string = interval_year_month.to_string();
509+
let expected_year_month_string = "Interval(YearMonth)";
510+
assert_eq!(interval_year_month_string, expected_year_month_string);
511+
512+
let interval_day_time = DataType::Interval(crate::IntervalUnit::DayTime);
513+
let interval_day_time_string = interval_day_time.to_string();
514+
let expected_day_time_string = "Interval(DayTime)";
515+
assert_eq!(interval_day_time_string, expected_day_time_string);
516+
517+
let interval_month_day_nano = DataType::Interval(crate::IntervalUnit::MonthDayNano);
518+
let interval_month_day_nano_string = interval_month_day_nano.to_string();
519+
let expected_month_day_nano_string = "Interval(MonthDayNano)";
520+
assert_eq!(
521+
interval_month_day_nano_string,
522+
expected_month_day_nano_string
523+
);
524+
}
525+
526+
#[test]
527+
fn test_display_timestamp() {
528+
let timestamp_without_tz = DataType::Timestamp(crate::TimeUnit::Microsecond, None);
529+
let timestamp_without_tz_string = timestamp_without_tz.to_string();
530+
let expected_without_tz_string = "Timestamp(µs)";
531+
assert_eq!(timestamp_without_tz_string, expected_without_tz_string);
532+
533+
let timestamp_with_tz =
534+
DataType::Timestamp(crate::TimeUnit::Nanosecond, Some(Arc::from("UTC")));
535+
let timestamp_with_tz_string = timestamp_with_tz.to_string();
536+
let expected_with_tz_string = "Timestamp(ns, \"UTC\")";
537+
assert_eq!(timestamp_with_tz_string, expected_with_tz_string);
538+
}
459539
}

0 commit comments

Comments
 (0)