Skip to content

Commit 0ba444f

Browse files
authored
fix: set key_metadata to Null by default (#800)
* fix: set key_metadata to Null by default * fix: return Option<&[u8]> instead of &Option<Vec<u8>> for key_metadata * test: use `None` instead of `Some` for key_metadata fields * refactor: use as_deref instead of explicit ref/deref using map
1 parent d1decdb commit 0ba444f

File tree

6 files changed

+27
-24
lines changed

6 files changed

+27
-24
lines changed

crates/iceberg/src/expr/visitors/expression_evaluator.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ mod tests {
338338
nan_value_counts: HashMap::new(),
339339
lower_bounds: HashMap::new(),
340340
upper_bounds: HashMap::new(),
341-
key_metadata: vec![],
341+
key_metadata: None,
342342
split_offsets: vec![],
343343
equality_ids: vec![],
344344
sort_order_id: None,
@@ -361,7 +361,7 @@ mod tests {
361361
nan_value_counts: HashMap::new(),
362362
lower_bounds: HashMap::new(),
363363
upper_bounds: HashMap::new(),
364-
key_metadata: vec![],
364+
key_metadata: None,
365365
split_offsets: vec![],
366366
equality_ids: vec![],
367367
sort_order_id: None,

crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,7 +1991,7 @@ mod test {
19911991
nan_value_counts: Default::default(),
19921992
lower_bounds: Default::default(),
19931993
upper_bounds: Default::default(),
1994-
key_metadata: vec![],
1994+
key_metadata: None,
19951995
split_offsets: vec![],
19961996
equality_ids: vec![],
19971997
sort_order_id: None,
@@ -2012,7 +2012,7 @@ mod test {
20122012
nan_value_counts: Default::default(),
20132013
lower_bounds: Default::default(),
20142014
upper_bounds: Default::default(),
2015-
key_metadata: vec![],
2015+
key_metadata: None,
20162016
split_offsets: vec![],
20172017
equality_ids: vec![],
20182018
sort_order_id: None,
@@ -2069,7 +2069,7 @@ mod test {
20692069
]),
20702070

20712071
column_sizes: Default::default(),
2072-
key_metadata: vec![],
2072+
key_metadata: None,
20732073
split_offsets: vec![],
20742074
equality_ids: vec![],
20752075
sort_order_id: None,
@@ -2095,7 +2095,7 @@ mod test {
20952095
upper_bounds: HashMap::from([(3, Datum::string("dC"))]),
20962096

20972097
column_sizes: Default::default(),
2098-
key_metadata: vec![],
2098+
key_metadata: None,
20992099
split_offsets: vec![],
21002100
equality_ids: vec![],
21012101
sort_order_id: None,
@@ -2122,7 +2122,7 @@ mod test {
21222122
upper_bounds: HashMap::from([(3, Datum::string("3str3"))]),
21232123

21242124
column_sizes: Default::default(),
2125-
key_metadata: vec![],
2125+
key_metadata: None,
21262126
split_offsets: vec![],
21272127
equality_ids: vec![],
21282128
sort_order_id: None,
@@ -2149,7 +2149,7 @@ mod test {
21492149
upper_bounds: HashMap::from([(3, Datum::string("イロハニホヘト"))]),
21502150

21512151
column_sizes: Default::default(),
2152-
key_metadata: vec![],
2152+
key_metadata: None,
21532153
split_offsets: vec![],
21542154
equality_ids: vec![],
21552155
sort_order_id: None,

crates/iceberg/src/io/object_cache.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ mod tests {
278278
.file_size_in_bytes(100)
279279
.record_count(1)
280280
.partition(Struct::from_iter([Some(Literal::long(100))]))
281+
.key_metadata(None)
281282
.build()
282283
.unwrap(),
283284
)

crates/iceberg/src/scan.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,6 +1073,7 @@ mod tests {
10731073
.file_size_in_bytes(100)
10741074
.record_count(1)
10751075
.partition(Struct::from_iter([Some(Literal::long(100))]))
1076+
.key_metadata(None)
10761077
.build()
10771078
.unwrap(),
10781079
)

crates/iceberg/src/spec/manifest.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,7 @@ pub struct DataFile {
10741074
///
10751075
/// Implementation-specific key metadata for encryption
10761076
#[builder(default)]
1077-
pub(crate) key_metadata: Vec<u8>,
1077+
pub(crate) key_metadata: Option<Vec<u8>>,
10781078
/// field id: 132
10791079
/// element field id: 133
10801080
///
@@ -1164,8 +1164,8 @@ impl DataFile {
11641164
&self.upper_bounds
11651165
}
11661166
/// Get the Implementation-specific key metadata for the data file.
1167-
pub fn key_metadata(&self) -> &[u8] {
1168-
&self.key_metadata
1167+
pub fn key_metadata(&self) -> Option<&[u8]> {
1168+
self.key_metadata.as_deref()
11691169
}
11701170
/// Get the split offsets of the data file.
11711171
/// For example, all row group offsets in a Parquet file.
@@ -1378,12 +1378,13 @@ mod _serde {
13781378
nan_value_counts: Some(to_i64_entry(value.nan_value_counts)?),
13791379
lower_bounds: Some(to_bytes_entry(value.lower_bounds)?),
13801380
upper_bounds: Some(to_bytes_entry(value.upper_bounds)?),
1381-
key_metadata: Some(serde_bytes::ByteBuf::from(value.key_metadata)),
1381+
key_metadata: value.key_metadata.map(serde_bytes::ByteBuf::from),
13821382
split_offsets: Some(value.split_offsets),
13831383
equality_ids: Some(value.equality_ids),
13841384
sort_order_id: value.sort_order_id,
13851385
})
13861386
}
1387+
13871388
pub fn try_into(
13881389
self,
13891390
partition_type: &StructType,
@@ -1441,7 +1442,7 @@ mod _serde {
14411442
.map(|v| parse_bytes_entry(v, schema))
14421443
.transpose()?
14431444
.unwrap_or_default(),
1444-
key_metadata: self.key_metadata.map(|v| v.to_vec()).unwrap_or_default(),
1445+
key_metadata: self.key_metadata.map(|v| v.to_vec()),
14451446
split_offsets: self.split_offsets.unwrap_or_default(),
14461447
equality_ids: self.equality_ids.unwrap_or_default(),
14471448
sort_order_id: self.sort_order_id,
@@ -1657,7 +1658,7 @@ mod tests {
16571658
nan_value_counts: HashMap::new(),
16581659
lower_bounds: HashMap::new(),
16591660
upper_bounds: HashMap::new(),
1660-
key_metadata: Vec::new(),
1661+
key_metadata: None,
16611662
split_offsets: vec![4],
16621663
equality_ids: Vec::new(),
16631664
sort_order_id: None,
@@ -1813,7 +1814,7 @@ mod tests {
18131814
nan_value_counts: HashMap::new(),
18141815
lower_bounds: HashMap::new(),
18151816
upper_bounds: HashMap::new(),
1816-
key_metadata: vec![],
1817+
key_metadata: None,
18171818
split_offsets: vec![4],
18181819
equality_ids: vec![],
18191820
sort_order_id: None,
@@ -1880,7 +1881,7 @@ mod tests {
18801881
nan_value_counts: HashMap::new(),
18811882
lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]),
18821883
upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]),
1883-
key_metadata: vec![],
1884+
key_metadata: None,
18841885
split_offsets: vec![4],
18851886
equality_ids: vec![],
18861887
sort_order_id: Some(0),
@@ -1960,7 +1961,7 @@ mod tests {
19601961
(2, Datum::string("a")),
19611962
(3, Datum::string("x"))
19621963
]),
1963-
key_metadata: vec![],
1964+
key_metadata: None,
19641965
split_offsets: vec![4],
19651966
equality_ids: vec![],
19661967
sort_order_id: Some(0),
@@ -2035,7 +2036,7 @@ mod tests {
20352036
(2, Datum::int(2)),
20362037
(3, Datum::string("x"))
20372038
]),
2038-
key_metadata: vec![],
2039+
key_metadata: None,
20392040
split_offsets: vec![4],
20402041
equality_ids: vec![],
20412042
sort_order_id: None,
@@ -2105,7 +2106,7 @@ mod tests {
21052106
(1, Datum::long(1)),
21062107
(2, Datum::int(2)),
21072108
]),
2108-
key_metadata: vec![],
2109+
key_metadata: None,
21092110
split_offsets: vec![4],
21102111
equality_ids: vec![],
21112112
sort_order_id: None,
@@ -2183,7 +2184,7 @@ mod tests {
21832184
nan_value_counts: HashMap::new(),
21842185
lower_bounds: HashMap::new(),
21852186
upper_bounds: HashMap::new(),
2186-
key_metadata: Vec::new(),
2187+
key_metadata: None,
21872188
split_offsets: vec![4],
21882189
equality_ids: Vec::new(),
21892190
sort_order_id: None,
@@ -2214,7 +2215,7 @@ mod tests {
22142215
nan_value_counts: HashMap::new(),
22152216
lower_bounds: HashMap::new(),
22162217
upper_bounds: HashMap::new(),
2217-
key_metadata: Vec::new(),
2218+
key_metadata: None,
22182219
split_offsets: vec![4],
22192220
equality_ids: Vec::new(),
22202221
sort_order_id: None,
@@ -2246,7 +2247,7 @@ mod tests {
22462247
nan_value_counts: HashMap::new(),
22472248
lower_bounds: HashMap::new(),
22482249
upper_bounds: HashMap::new(),
2249-
key_metadata: Vec::new(),
2250+
key_metadata: None,
22502251
split_offsets: vec![4],
22512252
equality_ids: Vec::new(),
22522253
sort_order_id: None,
@@ -2278,7 +2279,7 @@ mod tests {
22782279
nan_value_counts: HashMap::new(),
22792280
lower_bounds: HashMap::new(),
22802281
upper_bounds: HashMap::new(),
2281-
key_metadata: Vec::new(),
2282+
key_metadata: None,
22822283
split_offsets: vec![4],
22832284
equality_ids: Vec::new(),
22842285
sort_order_id: None,

crates/iceberg/src/writer/file_writer/parquet_writer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ impl ParquetWriter {
381381
// # TODO(#417)
382382
// - nan_value_counts
383383
// - distinct_counts
384-
.key_metadata(metadata.footer_signing_key_metadata.unwrap_or_default())
384+
.key_metadata(metadata.footer_signing_key_metadata)
385385
.split_offsets(
386386
metadata
387387
.row_groups

0 commit comments

Comments
 (0)