Skip to content

Commit 026356b

Browse files
authored
Include footer key metadata when writing encrypted Parquet with a plaintext footer (#7600)
# Which issue does this PR close? Closes #7599. # Rationale for this change Written plaintext footer file will not include `footer_signing_key_metadata`, see proposed test for reproduction. Written encrypted non-plaintext footer files shouldn't include `encryption_algorithm`, see proposed test for reproduction. # What changes are included in this PR? `footer_signing_key_metadata` is now included in plaintext footer file and `encryption_algorithm` is not included in the footer if footer is non-plaintext. # Are there any user-facing changes? This doesn't change user facing API.
1 parent ef91857 commit 026356b

File tree

2 files changed

+105
-9
lines changed

2 files changed

+105
-9
lines changed

parquet/src/file/metadata/writer.rs

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,12 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
140140
// in any Statistics or ColumnIndex object in the whole file.
141141
// But for simplicity we always set this field.
142142
let column_orders = Some(column_orders);
143-
144143
let (row_groups, unencrypted_row_groups) = self
145144
.object_writer
146145
.apply_row_group_encryption(self.row_groups)?;
147146

147+
let (encryption_algorithm, footer_signing_key_metadata) =
148+
self.object_writer.get_plaintext_footer_crypto_metadata();
148149
let mut file_metadata = FileMetaData {
149150
num_rows,
150151
row_groups,
@@ -153,8 +154,8 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
153154
schema: types::to_thrift(self.schema.as_ref())?,
154155
created_by: self.created_by.clone(),
155156
column_orders,
156-
encryption_algorithm: self.object_writer.get_footer_encryption_algorithm(),
157-
footer_signing_key_metadata: None,
157+
encryption_algorithm,
158+
footer_signing_key_metadata,
158159
};
159160

160161
// Write file metadata
@@ -479,8 +480,10 @@ impl MetadataObjectWriter {
479480
get_file_magic()
480481
}
481482

482-
fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
483-
None
483+
fn get_plaintext_footer_crypto_metadata(
484+
&self,
485+
) -> (Option<EncryptionAlgorithm>, Option<Vec<u8>>) {
486+
(None, None)
484487
}
485488
}
486489

@@ -635,11 +638,20 @@ impl MetadataObjectWriter {
635638
}
636639
}
637640

638-
fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
639-
if let Some(file_encryptor) = &self.file_encryptor {
640-
return Some(Self::encryption_algorithm_from_encryptor(file_encryptor));
641+
fn get_plaintext_footer_crypto_metadata(
642+
&self,
643+
) -> (Option<EncryptionAlgorithm>, Option<Vec<u8>>) {
644+
// Only plaintext footers may contain encryption algorithm and footer key metadata.
645+
if let Some(file_encryptor) = self.file_encryptor.as_ref() {
646+
let encryption_properties = file_encryptor.properties();
647+
if !encryption_properties.encrypt_footer() {
648+
return (
649+
Some(Self::encryption_algorithm_from_encryptor(file_encryptor)),
650+
encryption_properties.footer_key_metadata().cloned(),
651+
);
652+
}
641653
}
642-
None
654+
(None, None)
643655
}
644656

645657
fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) -> EncryptionAlgorithm {

parquet/tests/encryption/encryption.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,90 @@ fn test_non_uniform_encryption_plaintext_footer_with_key_retriever() {
256256
verify_encryption_test_file_read(file, decryption_properties);
257257
}
258258

259+
#[test]
260+
fn test_uniform_encryption_plaintext_footer_with_key_retriever() {
261+
let test_data = arrow::util::test_util::parquet_test_data();
262+
263+
// Read example data with key retriever
264+
let path = format!("{test_data}/encrypt_columns_plaintext_footer.parquet.encrypted");
265+
let file = File::open(path).unwrap();
266+
267+
let key_retriever = Arc::new(
268+
TestKeyRetriever::new()
269+
.with_key("kf".to_owned(), b"0123456789012345".to_vec())
270+
.with_key("kc1".to_owned(), b"1234567890123450".to_vec())
271+
.with_key("kc2".to_owned(), b"1234567890123451".to_vec()),
272+
);
273+
274+
let decryption_properties = FileDecryptionProperties::with_key_retriever(key_retriever.clone())
275+
.build()
276+
.unwrap();
277+
278+
let options = ArrowReaderOptions::default()
279+
.with_file_decryption_properties(decryption_properties.clone());
280+
let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
281+
282+
// Write data into temporary file with plaintext footer and footer key metadata
283+
let temp_file = tempfile::tempfile().unwrap();
284+
let encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".to_vec())
285+
.with_footer_key_metadata("kf".into())
286+
.with_column_key_and_metadata("double_field", b"1234567890123450".to_vec(), b"kc1".into())
287+
.with_column_key_and_metadata("float_field", b"1234567890123451".to_vec(), b"kc2".into())
288+
.with_plaintext_footer(true)
289+
.build()
290+
.unwrap();
291+
292+
let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
293+
let batch_reader = builder.build().unwrap();
294+
let batches = batch_reader
295+
.collect::<parquet::errors::Result<Vec<RecordBatch>, _>>()
296+
.unwrap();
297+
298+
let props = WriterProperties::builder()
299+
.with_file_encryption_properties(encryption_properties)
300+
.build();
301+
302+
let mut writer = ArrowWriter::try_new(
303+
temp_file.try_clone().unwrap(),
304+
metadata.schema().clone(),
305+
Some(props),
306+
)
307+
.unwrap();
308+
for batch in batches {
309+
writer.write(&batch).unwrap();
310+
}
311+
312+
writer.close().unwrap();
313+
314+
// Read temporary file with plaintext metadata using key retriever
315+
let decryption_properties = FileDecryptionProperties::with_key_retriever(key_retriever)
316+
.build()
317+
.unwrap();
318+
319+
let options = ArrowReaderOptions::default()
320+
.with_file_decryption_properties(decryption_properties.clone());
321+
let _ = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
322+
323+
// Read temporary file with plaintext metadata using key retriever with invalid key
324+
let key_retriever = Arc::new(
325+
TestKeyRetriever::new()
326+
.with_key("kf".to_owned(), b"0133756789012345".to_vec())
327+
.with_key("kc1".to_owned(), b"1234567890123450".to_vec())
328+
.with_key("kc2".to_owned(), b"1234567890123451".to_vec()),
329+
);
330+
let decryption_properties = FileDecryptionProperties::with_key_retriever(key_retriever)
331+
.build()
332+
.unwrap();
333+
let options = ArrowReaderOptions::default()
334+
.with_file_decryption_properties(decryption_properties.clone());
335+
let result = ArrowReaderMetadata::load(&temp_file, options.clone());
336+
assert!(result.is_err());
337+
assert!(result
338+
.unwrap_err()
339+
.to_string()
340+
.starts_with("Parquet error: Footer signature verification failed. Computed: ["));
341+
}
342+
259343
#[test]
260344
fn test_non_uniform_encryption_with_key_retriever() {
261345
let test_data = arrow::util::test_util::parquet_test_data();

0 commit comments

Comments
 (0)