diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index 67c517ddbc4f..f68334ec24ca 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -22,13 +22,13 @@ use arrow::datatypes::i256; use arrow::{array::ArrayRef, datatypes::DataType}; use arrow_array::{ - new_null_array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, - Decimal256Array, FixedSizeBinaryArray, Float16Array, Float32Array, Float64Array, - Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, - StringArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, - Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, - UInt64Array, UInt8Array, + new_empty_array, new_null_array, BinaryArray, BooleanArray, Date32Array, Date64Array, + Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array, Float32Array, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, + LargeStringArray, StringArray, Time32MillisecondArray, Time32SecondArray, + Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, + UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; use arrow_schema::{Field, FieldRef, Schema, TimeUnit}; use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result}; @@ -873,6 +873,34 @@ macro_rules! get_data_page_statistics { Decimal128Array::from_iter([<$stat_type_prefix Decimal128DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)), Some(DataType::Decimal256(precision, scale)) => Ok(Arc::new( Decimal256Array::from_iter([<$stat_type_prefix Decimal256DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)), + Some(DataType::Time32(unit)) => { + Ok(match unit { + TimeUnit::Second => Arc::new(Time32SecondArray::from_iter( + [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten(), + )), + TimeUnit::Millisecond => Arc::new(Time32MillisecondArray::from_iter( + [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten(), + )), + _ => { + // don't know how to extract statistics, so return an empty array + new_empty_array(&DataType::Time32(unit.clone())) + } + }) + } + Some(DataType::Time64(unit)) => { + Ok(match unit { + TimeUnit::Microsecond => Arc::new(Time64MicrosecondArray::from_iter( + [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten(), + )), + TimeUnit::Nanosecond => Arc::new(Time64NanosecondArray::from_iter( + [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten(), + )), + _ => { + // don't know how to extract statistics, so return an empty array + new_empty_array(&DataType::Time64(unit.clone())) + } + }) + } _ => unimplemented!() } } diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index 47f079063d3c..b68ebffa1045 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -1204,7 +1204,7 @@ async fn test_time32_second_diff_rg_sizes() { expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])), column_name: "second", - check: Check::RowGroup, + check: Check::Both, } .run(); } @@ -1231,7 +1231,7 @@ async fn test_time32_millisecond_diff_rg_sizes() { expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])), column_name: "millisecond", - check: Check::RowGroup, + check: Check::Both, } .run(); } @@ -1264,7 +1264,7 @@ async fn test_time64_microsecond_diff_rg_sizes() { expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])), column_name: "microsecond", - check: Check::RowGroup, + check: Check::Both, } .run(); } @@ -1297,7 +1297,7 @@ async fn test_time64_nanosecond_diff_rg_sizes() { expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])), column_name: "nanosecond", - check: Check::RowGroup, + check: Check::Both, } .run(); }