Skip to content

Commit 67f70d7

Browse files
committed
Merge branch 'main' into avro-writer-struct-list-types
2 parents 6d96235 + 0c7cb2a commit 67f70d7

File tree

4 files changed

+51
-6
lines changed

4 files changed

+51
-6
lines changed

.github/workflows/integration.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ jobs:
6363
ARROW_INTEGRATION_CPP: ON
6464
ARROW_INTEGRATION_CSHARP: ON
6565
ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust"
66+
ARCHERY_INTEGRATION_WITH_DOTNET: "1"
6667
ARCHERY_INTEGRATION_WITH_GO: "1"
6768
ARCHERY_INTEGRATION_WITH_JAVA: "1"
6869
ARCHERY_INTEGRATION_WITH_JS: "1"
@@ -98,6 +99,11 @@ jobs:
9899
with:
99100
path: rust
100101
fetch-depth: 0
102+
- name: Checkout Arrow .NET
103+
uses: actions/checkout@v5
104+
with:
105+
repository: apache/arrow-dotnet
106+
path: dotnet
101107
- name: Checkout Arrow Go
102108
uses: actions/checkout@v5
103109
with:

arrow-avro/src/schema.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2135,7 +2135,6 @@ mod tests {
21352135

21362136
#[test]
21372137
fn default_order_is_consistent() {
2138-
// Ensure TryFrom delegates to from_arrow_with_options(None)
21392138
let arrow_schema = ArrowSchema::new(vec![ArrowField::new("s", DataType::Utf8, true)]);
21402139
let a = AvroSchema::try_from(&arrow_schema).unwrap().json_string;
21412140
let b = AvroSchema::from_arrow_with_options(&arrow_schema, None);

arrow-avro/src/writer/encoder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ impl<'a> FieldEncoder<'a> {
182182
},
183183
other => {
184184
return Err(ArrowError::NotYetImplemented(format!(
185-
"Avro writer: {other:?} not yet supported".into(),
185+
"Avro writer: {other:?} not yet supported",
186186
)));
187187
}
188188
};

parquet/src/column/writer/mod.rs

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,12 +1104,23 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
11041104
rep_levels_byte_len + def_levels_byte_len + values_data.buf.len();
11051105

11061106
// Data Page v2 compresses values only.
1107-
match self.compressor {
1107+
let is_compressed = match self.compressor {
11081108
Some(ref mut cmpr) => {
1109+
let buffer_len = buffer.len();
11091110
cmpr.compress(&values_data.buf, &mut buffer)?;
1111+
if uncompressed_size <= buffer.len() - buffer_len {
1112+
buffer.truncate(buffer_len);
1113+
buffer.extend_from_slice(&values_data.buf);
1114+
false
1115+
} else {
1116+
true
1117+
}
11101118
}
1111-
None => buffer.extend_from_slice(&values_data.buf),
1112-
}
1119+
None => {
1120+
buffer.extend_from_slice(&values_data.buf);
1121+
false
1122+
}
1123+
};
11131124

11141125
let data_page = Page::DataPageV2 {
11151126
buf: buffer.into(),
@@ -1119,7 +1130,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
11191130
num_rows: self.page_metrics.num_buffered_rows,
11201131
def_levels_byte_len: def_levels_byte_len as u32,
11211132
rep_levels_byte_len: rep_levels_byte_len as u32,
1122-
is_compressed: self.compressor.is_some(),
1133+
is_compressed,
11231134
statistics: page_statistics,
11241135
};
11251136

@@ -4236,4 +4247,33 @@ mod tests {
42364247
.unwrap();
42374248
ColumnDescriptor::new(Arc::new(tpe), max_def_level, max_rep_level, path)
42384249
}
4250+
4251+
#[test]
4252+
fn test_page_v2_snappy_compression_fallback() {
4253+
// Test that PageV2 sets is_compressed to false when Snappy compression increases data size
4254+
let page_writer = TestPageWriter {};
4255+
4256+
// Create WriterProperties with PageV2 and Snappy compression
4257+
let props = WriterProperties::builder()
4258+
.set_writer_version(WriterVersion::PARQUET_2_0)
4259+
// Disable dictionary to ensure data is written directly
4260+
.set_dictionary_enabled(false)
4261+
.set_compression(Compression::SNAPPY)
4262+
.build();
4263+
4264+
let mut column_writer =
4265+
get_test_column_writer::<ByteArrayType>(Box::new(page_writer), 0, 0, Arc::new(props));
4266+
4267+
// Create small, simple data that Snappy compression will likely increase in size
4268+
// due to compression overhead for very small data
4269+
let values = vec![ByteArray::from("a")];
4270+
4271+
column_writer.write_batch(&values, None, None).unwrap();
4272+
4273+
let result = column_writer.close().unwrap();
4274+
assert_eq!(
4275+
result.metadata.uncompressed_size(),
4276+
result.metadata.compressed_size()
4277+
);
4278+
}
42394279
}

0 commit comments

Comments
 (0)