diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index f1282e8cdab3..b1985e5f35fc 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -27,7 +27,7 @@ repository = { workspace = true } authors = { workspace = true } keywords = ["arrow", "parquet", "variant"] readme = "README.md" -edition = { workspace = true } +edition = "2024" rust-version = { workspace = true } [dependencies] diff --git a/parquet-variant/benches/variant_builder.rs b/parquet-variant/benches/variant_builder.rs index 5d00cc054e55..420fa583ee1a 100644 --- a/parquet-variant/benches/variant_builder.rs +++ b/parquet-variant/benches/variant_builder.rs @@ -21,9 +21,9 @@ use criterion::*; use parquet_variant::{Variant, VariantBuilder}; use rand::{ - distr::{uniform::SampleUniform, Alphanumeric}, - rngs::StdRng, Rng, SeedableRng, + distr::{Alphanumeric, uniform::SampleUniform}, + rngs::StdRng, }; use std::{hint, ops::Range}; diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index 17455fc4bfe5..d44e9b10a313 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -16,7 +16,7 @@ // under the License. use crate::decoder::{VariantBasicType, VariantPrimitiveType}; use crate::{ - ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList, + ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantList, VariantMetadata, VariantObject, }; use arrow_schema::ArrowError; @@ -3403,10 +3403,12 @@ mod tests { // This should fail because "unknown_field" is not in the metadata let result = obj.try_insert("unknown_field", "value"); assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("Field name 'unknown_field' not found")); + assert!( + result + .unwrap_err() + .to_string() + .contains("Field name 'unknown_field' not found") + ); } } diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs index 26b4e204fa69..8cf3cec1129e 100644 --- a/parquet-variant/src/decoder.rs +++ b/parquet-variant/src/decoder.rs @@ -14,10 +14,10 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +use crate::ShortString; use crate::utils::{ array_from_slice, overflow_error, slice_from_slice_at_offset, string_from_slice, }; -use crate::ShortString; use arrow_schema::ArrowError; use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, Utc}; @@ -143,7 +143,7 @@ impl OffsetSizeBytes { _ => { return Err(ArrowError::InvalidArgumentError( "offset_size_minus_one must be 0–3".to_string(), - )) + )); } }; Ok(result) diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 849947675b13..5663ab155cb8 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8}; +pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16}; pub use self::list::VariantList; -pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES}; +pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, VariantMetadata}; pub use self::object::VariantObject; // Publically export types used in the API @@ -25,7 +25,7 @@ pub use half::f16; pub use uuid::Uuid; use crate::decoder::{ - self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType, + self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type, }; use crate::path::{VariantPath, VariantPathElement}; use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice}; diff --git a/parquet-variant/src/variant/decimal.rs b/parquet-variant/src/variant/decimal.rs index 4793d88569bf..b0b7d36ed161 100644 --- a/parquet-variant/src/variant/decimal.rs +++ b/parquet-variant/src/variant/decimal.rs @@ -285,20 +285,24 @@ mod tests { decimal4_too_large.is_err(), "Decimal4 precision overflow should fail" ); - assert!(decimal4_too_large - .unwrap_err() - .to_string() - .contains("wider than max precision")); + assert!( + decimal4_too_large + .unwrap_err() + .to_string() + .contains("wider than max precision") + ); let decimal4_too_small = VariantDecimal4::try_new(-1_000_000_000_i32, 2); assert!( decimal4_too_small.is_err(), "Decimal4 precision underflow should fail" ); - assert!(decimal4_too_small - .unwrap_err() - .to_string() - .contains("wider than max precision")); + assert!( + decimal4_too_small + .unwrap_err() + .to_string() + .contains("wider than max precision") + ); // Test valid edge cases for Decimal4 let decimal4_max_valid = VariantDecimal4::try_new(999_999_999_i32, 2); @@ -319,20 +323,24 @@ mod tests { decimal8_too_large.is_err(), "Decimal8 precision overflow should fail" ); - assert!(decimal8_too_large - .unwrap_err() - .to_string() - .contains("wider than max precision")); + assert!( + decimal8_too_large + .unwrap_err() + .to_string() + .contains("wider than max precision") + ); let decimal8_too_small = VariantDecimal8::try_new(-1_000_000_000_000_000_000_i64, 2); assert!( decimal8_too_small.is_err(), "Decimal8 precision underflow should fail" ); - assert!(decimal8_too_small - .unwrap_err() - .to_string() - .contains("wider than max precision")); + assert!( + decimal8_too_small + .unwrap_err() + .to_string() + .contains("wider than max precision") + ); // Test valid edge cases for Decimal8 let decimal8_max_valid = VariantDecimal8::try_new(999_999_999_999_999_999_i64, 2); @@ -354,10 +362,12 @@ mod tests { decimal16_too_large.is_err(), "Decimal16 precision overflow should fail" ); - assert!(decimal16_too_large - .unwrap_err() - .to_string() - .contains("wider than max precision")); + assert!( + decimal16_too_large + .unwrap_err() + .to_string() + .contains("wider than max precision") + ); let decimal16_too_small = VariantDecimal16::try_new(-100000000000000000000000000000000000000_i128, 2); @@ -365,10 +375,12 @@ mod tests { decimal16_too_small.is_err(), "Decimal16 precision underflow should fail" ); - assert!(decimal16_too_small - .unwrap_err() - .to_string() - .contains("wider than max precision")); + assert!( + decimal16_too_small + .unwrap_err() + .to_string() + .contains("wider than max precision") + ); // Test valid edge cases for Decimal16 let decimal16_max_valid = @@ -394,10 +406,12 @@ mod tests { decimal4_invalid_scale.is_err(), "Decimal4 with scale > 9 should fail" ); - assert!(decimal4_invalid_scale - .unwrap_err() - .to_string() - .contains("larger than max precision")); + assert!( + decimal4_invalid_scale + .unwrap_err() + .to_string() + .contains("larger than max precision") + ); let decimal4_invalid_scale_large = VariantDecimal4::try_new(123_i32, 20); assert!( @@ -418,10 +432,12 @@ mod tests { decimal8_invalid_scale.is_err(), "Decimal8 with scale > 18 should fail" ); - assert!(decimal8_invalid_scale - .unwrap_err() - .to_string() - .contains("larger than max precision")); + assert!( + decimal8_invalid_scale + .unwrap_err() + .to_string() + .contains("larger than max precision") + ); let decimal8_invalid_scale_large = VariantDecimal8::try_new(123_i64, 25); assert!( @@ -442,10 +458,12 @@ mod tests { decimal16_invalid_scale.is_err(), "Decimal16 with scale > 38 should fail" ); - assert!(decimal16_invalid_scale - .unwrap_err() - .to_string() - .contains("larger than max precision")); + assert!( + decimal16_invalid_scale + .unwrap_err() + .to_string() + .contains("larger than max precision") + ); let decimal16_invalid_scale_large = VariantDecimal16::try_new(123_i128, 50); assert!( diff --git a/parquet-variant/src/variant/list.rs b/parquet-variant/src/variant/list.rs index d14d3a7796cf..a150295717ef 100644 --- a/parquet-variant/src/variant/list.rs +++ b/parquet-variant/src/variant/list.rs @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes}; +use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets}; use crate::utils::{ first_byte_from_slice, overflow_error, slice_from_slice, slice_from_slice_at_offset, }; diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index 604ee0e890e6..9f9688acd090 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes}; +use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets}; use crate::utils::{ first_byte_from_slice, overflow_error, slice_from_slice, string_from_slice, try_binary_search_range_by, @@ -285,14 +285,13 @@ impl<'m> VariantMetadata<'m> { let mut current_offset = offsets.next().unwrap_or(0); let mut prev_value: Option<&str> = None; for next_offset in offsets { - let current_value = - value_buffer - .get(current_offset..next_offset) - .ok_or_else(|| { - ArrowError::InvalidArgumentError(format!( + let current_value = value_buffer.get(current_offset..next_offset).ok_or_else( + || { + ArrowError::InvalidArgumentError(format!( "range {current_offset}..{next_offset} is invalid or out of bounds" )) - })?; + }, + )?; if let Some(prev_val) = prev_value { if current_value <= prev_val { diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs index 713be977b9eb..4e6faa7ab2f2 100644 --- a/parquet-variant/src/variant/object.rs +++ b/parquet-variant/src/variant/object.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes}; +use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets}; use crate::utils::{ first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by, }; diff --git a/parquet-variant/tests/variant_interop.rs b/parquet-variant/tests/variant_interop.rs index 00c326c06406..d931d3929376 100644 --- a/parquet-variant/tests/variant_interop.rs +++ b/parquet-variant/tests/variant_interop.rs @@ -23,7 +23,7 @@ use std::{env, fs}; use chrono::{DateTime, NaiveDate, NaiveTime}; use parquet_variant::{ - ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, + ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8, VariantDecimal16, }; use rand::rngs::StdRng; @@ -109,14 +109,29 @@ fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> { // Cases are commented out // Enabling is tracked in https://github.com/apache/arrow-rs/issues/7630 vec![ - ("primitive_binary", Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe])), + ( + "primitive_binary", + Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe]), + ), ("primitive_boolean_false", Variant::BooleanFalse), ("primitive_boolean_true", Variant::BooleanTrue), - ("primitive_date", Variant::Date(NaiveDate::from_ymd_opt(2025, 4 , 16).unwrap())), - ("primitive_decimal4", Variant::from(VariantDecimal4::try_new(1234i32, 2u8).unwrap())), + ( + "primitive_date", + Variant::Date(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap()), + ), + ( + "primitive_decimal4", + Variant::from(VariantDecimal4::try_new(1234i32, 2u8).unwrap()), + ), // ("primitive_decimal8", Variant::Decimal8{integer: 1234567890, scale: 2}), - ("primitive_decimal8", Variant::Decimal8(VariantDecimal8::try_new(1234567890,2).unwrap())), - ("primitive_decimal16", Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890, 2).unwrap())), + ( + "primitive_decimal8", + Variant::Decimal8(VariantDecimal8::try_new(1234567890, 2).unwrap()), + ), + ( + "primitive_decimal16", + Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890, 2).unwrap()), + ), ("primitive_float", Variant::Float(1234567890.1234)), ("primitive_double", Variant::Double(1234567890.1234)), ("primitive_int8", Variant::Int8(42)), @@ -124,14 +139,64 @@ fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> { ("primitive_int32", Variant::Int32(123456)), ("primitive_int64", Variant::Int64(1234567890123456789)), ("primitive_null", Variant::Null), - ("primitive_string", Variant::String("This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes several non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!")), - ("primitive_timestamp", Variant::TimestampMicros(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(16, 34, 56, 780).unwrap().and_utc())), - ("primitive_timestampntz", Variant::TimestampNtzMicros(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap())), - ("primitive_timestamp_nanos", Variant::TimestampNanos(NaiveDate::from_ymd_opt(2024, 11, 7).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc())), - ("primitive_timestampntz_nanos", Variant::TimestampNtzNanos(NaiveDate::from_ymd_opt(2024, 11, 7).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap())), - ("primitive_uuid", Variant::Uuid(Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap())), - ("short_string", Variant::ShortString(ShortString::try_new("Less than 64 bytes (❤\u{fe0f} with utf8)").unwrap())), - ("primitive_time", Variant::Time(NaiveTime::from_hms_micro_opt(12, 33, 54, 123456).unwrap())), + ( + "primitive_string", + Variant::String( + "This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes several non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!", + ), + ), + ( + "primitive_timestamp", + Variant::TimestampMicros( + NaiveDate::from_ymd_opt(2025, 4, 16) + .unwrap() + .and_hms_milli_opt(16, 34, 56, 780) + .unwrap() + .and_utc(), + ), + ), + ( + "primitive_timestampntz", + Variant::TimestampNtzMicros( + NaiveDate::from_ymd_opt(2025, 4, 16) + .unwrap() + .and_hms_milli_opt(12, 34, 56, 780) + .unwrap(), + ), + ), + ( + "primitive_timestamp_nanos", + Variant::TimestampNanos( + NaiveDate::from_ymd_opt(2024, 11, 7) + .unwrap() + .and_hms_nano_opt(12, 33, 54, 123456789) + .unwrap() + .and_utc(), + ), + ), + ( + "primitive_timestampntz_nanos", + Variant::TimestampNtzNanos( + NaiveDate::from_ymd_opt(2024, 11, 7) + .unwrap() + .and_hms_nano_opt(12, 33, 54, 123456789) + .unwrap(), + ), + ), + ( + "primitive_uuid", + Variant::Uuid(Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap()), + ), + ( + "short_string", + Variant::ShortString( + ShortString::try_new("Less than 64 bytes (❤\u{fe0f} with utf8)").unwrap(), + ), + ), + ( + "primitive_time", + Variant::Time(NaiveTime::from_hms_micro_opt(12, 33, 54, 123456).unwrap()), + ), ] } #[test]