1919
2020// TODO: potentially move this to arrow-rs: https://github.com/apache/arrow-rs/issues/4328
2121
22+ use arrow:: array:: builder:: FixedSizeBinaryBuilder ;
2223use arrow:: datatypes:: i256;
2324use arrow:: { array:: ArrayRef , datatypes:: DataType } ;
2425use arrow_array:: {
@@ -600,6 +601,31 @@ make_data_page_stats_iterator!(
600601 Index :: DOUBLE ,
601602 f64
602603) ;
604+ make_data_page_stats_iterator ! (
605+ MinByteArrayDataPageStatsIterator ,
606+ |x: & PageIndex <ByteArray >| { x. min. clone( ) } ,
607+ Index :: BYTE_ARRAY ,
608+ ByteArray
609+ ) ;
610+ make_data_page_stats_iterator ! (
611+ MaxByteArrayDataPageStatsIterator ,
612+ |x: & PageIndex <ByteArray >| { x. max. clone( ) } ,
613+ Index :: BYTE_ARRAY ,
614+ ByteArray
615+ ) ;
616+ make_data_page_stats_iterator ! (
617+ MaxFixedLenByteArrayDataPageStatsIterator ,
618+ |x: & PageIndex <FixedLenByteArray >| { x. max. clone( ) } ,
619+ Index :: FIXED_LEN_BYTE_ARRAY ,
620+ FixedLenByteArray
621+ ) ;
622+
623+ make_data_page_stats_iterator ! (
624+ MinFixedLenByteArrayDataPageStatsIterator ,
625+ |x: & PageIndex <FixedLenByteArray >| { x. min. clone( ) } ,
626+ Index :: FIXED_LEN_BYTE_ARRAY ,
627+ FixedLenByteArray
628+ ) ;
603629
604630macro_rules! get_decimal_page_stats_iterator {
605631 ( $iterator_type: ident, $func: ident, $stat_value_type: ident, $convert_func: ident) => {
@@ -634,9 +660,7 @@ macro_rules! get_decimal_page_stats_iterator {
634660 . indexes
635661 . iter( )
636662 . map( |x| {
637- Some ( $stat_value_type:: from(
638- x. $func. unwrap_or_default( ) ,
639- ) )
663+ x. $func. and_then( |x| Some ( $stat_value_type:: from( x) ) )
640664 } )
641665 . collect:: <Vec <_>>( ) ,
642666 ) ,
@@ -645,9 +669,7 @@ macro_rules! get_decimal_page_stats_iterator {
645669 . indexes
646670 . iter( )
647671 . map( |x| {
648- Some ( $stat_value_type:: from(
649- x. $func. unwrap_or_default( ) ,
650- ) )
672+ x. $func. and_then( |x| Some ( $stat_value_type:: from( x) ) )
651673 } )
652674 . collect:: <Vec <_>>( ) ,
653675 ) ,
@@ -656,9 +678,9 @@ macro_rules! get_decimal_page_stats_iterator {
656678 . indexes
657679 . iter( )
658680 . map( |x| {
659- Some ( $convert_func (
660- x . clone ( ) . $func. unwrap_or_default ( ) . data ( ) ,
661- ) )
681+ x . clone ( )
682+ . $func
683+ . and_then ( |x| Some ( $convert_func ( x . data ( ) ) ) )
662684 } )
663685 . collect:: <Vec <_>>( ) ,
664686 ) ,
@@ -667,9 +689,9 @@ macro_rules! get_decimal_page_stats_iterator {
667689 . indexes
668690 . iter( )
669691 . map( |x| {
670- Some ( $convert_func (
671- x . clone ( ) . $func. unwrap_or_default ( ) . data ( ) ,
672- ) )
692+ x . clone ( )
693+ . $func
694+ . and_then ( |x| Some ( $convert_func ( x . data ( ) ) ) )
673695 } )
674696 . collect:: <Vec <_>>( ) ,
675697 ) ,
@@ -713,32 +735,6 @@ get_decimal_page_stats_iterator!(
713735 i256,
714736 from_bytes_to_i256
715737) ;
716- make_data_page_stats_iterator ! (
717- MinByteArrayDataPageStatsIterator ,
718- |x: & PageIndex <ByteArray >| { x. min. clone( ) } ,
719- Index :: BYTE_ARRAY ,
720- ByteArray
721- ) ;
722- make_data_page_stats_iterator ! (
723- MaxByteArrayDataPageStatsIterator ,
724- |x: & PageIndex <ByteArray >| { x. max. clone( ) } ,
725- Index :: BYTE_ARRAY ,
726- ByteArray
727- ) ;
728-
729- make_data_page_stats_iterator ! (
730- MaxFixedLenByteArrayDataPageStatsIterator ,
731- |x: & PageIndex <FixedLenByteArray >| { x. max. clone( ) } ,
732- Index :: FIXED_LEN_BYTE_ARRAY ,
733- FixedLenByteArray
734- ) ;
735-
736- make_data_page_stats_iterator ! (
737- MinFixedLenByteArrayDataPageStatsIterator ,
738- |x: & PageIndex <FixedLenByteArray >| { x. min. clone( ) } ,
739- Index :: FIXED_LEN_BYTE_ARRAY ,
740- FixedLenByteArray
741- ) ;
742738
743739macro_rules! get_data_page_statistics {
744740 ( $stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
@@ -757,7 +753,7 @@ macro_rules! get_data_page_statistics {
757753 UInt8Array :: from_iter(
758754 [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
759755 . map( |x| {
760- x. into_iter( ) . filter_map ( |x| {
756+ x. into_iter( ) . map ( |x| {
761757 x. and_then( |x| u8 :: try_from( x) . ok( ) )
762758 } )
763759 } )
@@ -768,7 +764,7 @@ macro_rules! get_data_page_statistics {
768764 UInt16Array :: from_iter(
769765 [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
770766 . map( |x| {
771- x. into_iter( ) . filter_map ( |x| {
767+ x. into_iter( ) . map ( |x| {
772768 x. and_then( |x| u16 :: try_from( x) . ok( ) )
773769 } )
774770 } )
@@ -779,7 +775,7 @@ macro_rules! get_data_page_statistics {
779775 UInt32Array :: from_iter(
780776 [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
781777 . map( |x| {
782- x. into_iter( ) . filter_map ( |x| {
778+ x. into_iter( ) . map ( |x| {
783779 x. and_then( |x| Some ( x as u32 ) )
784780 } )
785781 } )
@@ -789,7 +785,7 @@ macro_rules! get_data_page_statistics {
789785 UInt64Array :: from_iter(
790786 [ <$stat_type_prefix Int64DataPageStatsIterator >] :: new( $iterator)
791787 . map( |x| {
792- x. into_iter( ) . filter_map ( |x| {
788+ x. into_iter( ) . map ( |x| {
793789 x. and_then( |x| Some ( x as u64 ) )
794790 } )
795791 } )
@@ -799,7 +795,7 @@ macro_rules! get_data_page_statistics {
799795 Int8Array :: from_iter(
800796 [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
801797 . map( |x| {
802- x. into_iter( ) . filter_map ( |x| {
798+ x. into_iter( ) . map ( |x| {
803799 x. and_then( |x| i8 :: try_from( x) . ok( ) )
804800 } )
805801 } )
@@ -810,7 +806,7 @@ macro_rules! get_data_page_statistics {
810806 Int16Array :: from_iter(
811807 [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
812808 . map( |x| {
813- x. into_iter( ) . filter_map ( |x| {
809+ x. into_iter( ) . map ( |x| {
814810 x. and_then( |x| i16 :: try_from( x) . ok( ) )
815811 } )
816812 } )
@@ -823,8 +819,8 @@ macro_rules! get_data_page_statistics {
823819 Float16Array :: from_iter(
824820 [ <$stat_type_prefix Float16DataPageStatsIterator >] :: new( $iterator)
825821 . map( |x| {
826- x. into_iter( ) . filter_map ( |x| {
827- x. and_then( |x| Some ( from_bytes_to_f16( x. data( ) ) ) )
822+ x. into_iter( ) . map ( |x| {
823+ x. and_then( |x| from_bytes_to_f16( x. data( ) ) )
828824 } )
829825 } )
830826 . flatten( )
@@ -836,7 +832,7 @@ macro_rules! get_data_page_statistics {
836832 Some ( DataType :: LargeBinary ) => Ok ( Arc :: new( LargeBinaryArray :: from_iter( [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
837833 Some ( DataType :: Utf8 ) => Ok ( Arc :: new( StringArray :: from(
838834 [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . map( |x| {
839- x. into_iter( ) . filter_map ( |x| {
835+ x. into_iter( ) . map ( |x| {
840836 x. and_then( |x| {
841837 let res = std:: str :: from_utf8( x. data( ) ) . map( |s| s. to_string( ) ) . ok( ) ;
842838 if res. is_none( ) {
@@ -849,7 +845,7 @@ macro_rules! get_data_page_statistics {
849845 ) ) ) ,
850846 Some ( DataType :: LargeUtf8 ) => Ok ( Arc :: new( LargeStringArray :: from(
851847 [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . map( |x| {
852- x. into_iter( ) . filter_map ( |x| {
848+ x. into_iter( ) . map ( |x| {
853849 x. and_then( |x| {
854850 let res = std:: str :: from_utf8( x. data( ) ) . map( |s| s. to_string( ) ) . ok( ) ;
855851 if res. is_none( ) {
@@ -878,10 +874,10 @@ macro_rules! get_data_page_statistics {
878874 Date64Array :: from( [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
879875 . map( |x| {
880876 x. into_iter( )
881- . filter_map ( |x| {
877+ . map ( |x| {
882878 x. and_then( |x| i64 :: try_from( x) . ok( ) )
879+ . map( |x| x * 24 * 60 * 60 * 1000 )
883880 } )
884- . map( |x| x * 24 * 60 * 60 * 1000 )
885881 } ) . flatten( ) . collect:: <Vec <_>>( )
886882 )
887883 )
@@ -919,16 +915,28 @@ macro_rules! get_data_page_statistics {
919915 } )
920916 } ,
921917 Some ( DataType :: FixedSizeBinary ( size) ) => {
922- Ok ( Arc :: new(
923- FixedSizeBinaryArray :: try_from_iter(
924- [ <$stat_type_prefix FixedLenByteArrayDataPageStatsIterator >] :: new( $iterator)
925- . flat_map( |x| x. into_iter( ) )
926- . filter_map( |x| x)
927- ) . unwrap_or_else( |e| {
928- log:: debug!( "FixedSizeBinary statistics is invalid: {}" , e) ;
929- FixedSizeBinaryArray :: new( * size, vec![ ] . into( ) , None )
930- } )
931- ) )
918+ let mut builder = FixedSizeBinaryBuilder :: new( * size) ;
919+ let iterator = [ <$stat_type_prefix FixedLenByteArrayDataPageStatsIterator >] :: new( $iterator) ;
920+ for x in iterator {
921+ for x in x. into_iter( ) {
922+ let Some ( x) = x else {
923+ builder. append_null( ) ; // no statistics value
924+ continue ;
925+ } ;
926+
927+ if x. len( ) == * size as usize {
928+ let _ = builder. append_value( x. data( ) ) ;
929+ } else {
930+ log:: debug!(
931+ "FixedSizeBinary({}) statistics is a binary of size {}, ignoring it." ,
932+ size,
933+ x. len( ) ,
934+ ) ;
935+ builder. append_null( ) ;
936+ }
937+ }
938+ }
939+ Ok ( Arc :: new( builder. finish( ) ) )
932940 } ,
933941 _ => unimplemented!( )
934942 }
0 commit comments