@@ -78,6 +78,26 @@ pub struct ParquetMetaDataReader {
7878 file_decryption_properties : Option < FileDecryptionProperties > ,
7979}
8080
81+ /// Describes how the footer metadata is stored
82+ ///
83+ /// This is parsed from the last 8 bytes of the Parquet file
84+ pub struct FooterTail {
85+ metadata_length : usize ,
86+ encrypted_footer : bool ,
87+ }
88+
89+ impl FooterTail {
90+ /// The length of the footer metadata in bytes
91+ pub fn metadata_length ( & self ) -> usize {
92+ self . metadata_length
93+ }
94+
95+ /// Whether the footer metadata is encrypted
96+ pub fn encrypted_footer ( & self ) -> bool {
97+ self . encrypted_footer
98+ }
99+ }
100+
81101impl ParquetMetaDataReader {
82102 /// Create a new [`ParquetMetaDataReader`]
83103 pub fn new ( ) -> Self {
@@ -366,6 +386,7 @@ impl ParquetMetaDataReader {
366386 & mut fetch,
367387 file_size,
368388 self . get_prefetch_size ( ) ,
389+ #[ cfg( feature = "encryption" ) ]
369390 self . file_decryption_properties . as_ref ( ) ,
370391 )
371392 . await ?;
@@ -520,7 +541,8 @@ impl ParquetMetaDataReader {
520541 . get_read ( file_size - 8 ) ?
521542 . read_exact ( & mut footer) ?;
522543
523- let metadata_len = Self :: decode_footer ( & footer) ?;
544+ let footer = Self :: decode_footer_tail ( & footer) ?;
545+ let metadata_len = footer. metadata_length ( ) ;
524546 let footer_metadata_len = FOOTER_SIZE + metadata_len;
525547 self . metadata_size = Some ( footer_metadata_len) ;
526548
@@ -536,6 +558,8 @@ impl ParquetMetaDataReader {
536558 chunk_reader. get_bytes ( start, metadata_len) ?. as_ref ( ) ,
537559 #[ cfg( feature = "encryption" ) ]
538560 self . file_decryption_properties . as_ref ( ) ,
561+ #[ cfg( feature = "encryption" ) ]
562+ footer. encrypted_footer ( ) ,
539563 )
540564 }
541565
@@ -557,7 +581,9 @@ impl ParquetMetaDataReader {
557581 fetch : & mut F ,
558582 file_size : usize ,
559583 prefetch : usize ,
560- file_decryption_properties : Option < & FileDecryptionProperties > ,
584+ #[ cfg( feature = "encryption" ) ] file_decryption_properties : Option <
585+ & FileDecryptionProperties ,
586+ > ,
561587 ) -> Result < ( ParquetMetaData , Option < ( usize , Bytes ) > ) > {
562588 if file_size < FOOTER_SIZE {
563589 return Err ( eof_err ! ( "file size of {} is less than footer" , file_size) ) ;
@@ -582,7 +608,8 @@ impl ParquetMetaDataReader {
582608 let mut footer = [ 0 ; FOOTER_SIZE ] ;
583609 footer. copy_from_slice ( & suffix[ suffix_len - FOOTER_SIZE ..suffix_len] ) ;
584610
585- let length = Self :: decode_footer ( & footer) ?;
611+ let footer = Self :: decode_footer_tail ( & footer) ?;
612+ let length = footer. metadata_length ( ) ;
586613
587614 if file_size < length + FOOTER_SIZE {
588615 return Err ( eof_err ! (
@@ -597,22 +624,34 @@ impl ParquetMetaDataReader {
597624 let metadata_start = file_size - length - FOOTER_SIZE ;
598625 let meta = fetch. fetch ( metadata_start..file_size - FOOTER_SIZE ) . await ?;
599626 Ok ( (
600- Self :: decode_metadata ( & meta, file_decryption_properties) ?,
627+ Self :: decode_metadata (
628+ & meta,
629+ #[ cfg( feature = "encryption" ) ]
630+ file_decryption_properties,
631+ #[ cfg( feature = "encryption" ) ]
632+ footer. encrypted_footer ( ) ,
633+ ) ?,
601634 None ,
602635 ) )
603636 } else {
604637 let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
605638 let slice = & suffix[ metadata_start..suffix_len - FOOTER_SIZE ] ;
606639 Ok ( (
607- Self :: decode_metadata ( slice, file_decryption_properties) ?,
640+ Self :: decode_metadata (
641+ slice,
642+ #[ cfg( feature = "encryption" ) ]
643+ file_decryption_properties,
644+ #[ cfg( feature = "encryption" ) ]
645+ footer. encrypted_footer ( ) ,
646+ ) ?,
608647 Some ( ( footer_start, suffix. slice ( ..metadata_start) ) ) ,
609648 ) )
610649 }
611650 }
612651
613- /// Decodes the Parquet footer returning the metadata length in bytes
652+ /// Decodes the end of the Parquet footer
614653 ///
615- /// A parquet footer is 8 bytes long and has the following layout:
654+ /// There are 8 bytes at the end of the Parquet footer with the following layout:
616655 /// * 4 bytes for the metadata length
617656 /// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
618657 ///
@@ -621,16 +660,28 @@ impl ParquetMetaDataReader {
621660 /// | len | 'PAR1' or 'PARE' |
622661 /// +-----+------------------+
623662 /// ```
624- pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < usize > {
625- // check this is indeed a parquet file
626- if slice[ 4 ..] != PARQUET_MAGIC && slice[ 4 ..] != PARQUET_MAGIC_ENCR_FOOTER {
663+ pub fn decode_footer_tail ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < FooterTail > {
664+ let magic = & slice[ 4 ..] ;
665+ let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
666+ true
667+ } else if magic == PARQUET_MAGIC {
668+ false
669+ } else {
627670 return Err ( general_err ! ( "Invalid Parquet file. Corrupt footer" ) ) ;
628- }
629-
671+ } ;
630672 // get the metadata length from the footer
631673 let metadata_len = u32:: from_le_bytes ( slice[ ..4 ] . try_into ( ) . unwrap ( ) ) ;
632- // u32 won't be larger than usize in most cases
633- Ok ( metadata_len as usize )
674+ Ok ( FooterTail {
675+ // u32 won't be larger than usize in most cases
676+ metadata_length : metadata_len as usize ,
677+ encrypted_footer,
678+ } )
679+ }
680+
681+ /// Decodes the Parquet footer, returning the metadata length in bytes
682+ #[ deprecated( note = "use decode_footer_tail instead" ) ]
683+ pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < usize > {
684+ Self :: decode_footer_tail ( slice) . map ( |f| f. metadata_length )
634685 }
635686
636687 /// Decodes [`ParquetMetaData`] from the provided bytes.
@@ -645,18 +696,29 @@ impl ParquetMetaDataReader {
645696 #[ cfg( feature = "encryption" ) ] file_decryption_properties : Option <
646697 & FileDecryptionProperties ,
647698 > ,
699+ #[ cfg( feature = "encryption" ) ] encrypted_footer : bool ,
648700 ) -> Result < ParquetMetaData > {
649701 let mut prot = TCompactSliceInputProtocol :: new ( buf) ;
650702
703+ #[ cfg( not( feature = "encryption" ) ) ]
704+ if encrypted_footer ( ) {
705+ return Err ( general_err ! (
706+ "Parquet file has an encrypted footer but the encryption feature is disabled"
707+ ) ) ;
708+ }
709+
651710 #[ cfg( feature = "encryption" ) ]
652711 let mut file_decryptor = None ;
653712 #[ cfg( feature = "encryption" ) ]
654713 let decrypted_fmd_buf;
655714
656715 #[ cfg( feature = "encryption" ) ]
657- if file_decryption_properties. is_some ( )
658- && file_decryption_properties. unwrap ( ) . has_footer_key ( )
659- {
716+ if encrypted_footer {
717+ if file_decryption_properties. is_none ( ) {
718+ return Err ( general_err ! ( "Parquet file has an encrypted footer but no decryption properties were provided" ) ) ;
719+ } ;
720+ let file_decryption_properties = file_decryption_properties. unwrap ( ) ;
721+
660722 let t_file_crypto_metadata: TFileCryptoMetaData =
661723 TFileCryptoMetaData :: read_from_in_protocol ( & mut prot)
662724 . map_err ( |e| general_err ! ( "Could not parse crypto metadata: {}" , e) ) ?;
@@ -678,7 +740,7 @@ impl ParquetMetaDataReader {
678740 let aad_prefix: Vec < u8 > = aes_gcm_algo. aad_prefix . unwrap_or_default ( ) ;
679741
680742 file_decryptor = Some ( FileDecryptor :: new (
681- file_decryption_properties. unwrap ( ) ,
743+ file_decryption_properties,
682744 aad_file_unique. clone ( ) ,
683745 aad_prefix. clone ( ) ,
684746 ) ) ;
0 commit comments