From c75115bd4fb200f1588d4825b957e9fe7a0e8051 Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Mon, 22 Sep 2025 15:13:04 +0800 Subject: [PATCH 1/9] feat: implement literal expressions with binary serialization support --- src/iceberg/CMakeLists.txt | 3 +- src/iceberg/expression/literal.cc | 5 +- src/iceberg/expression/literal.h | 67 ++++- src/iceberg/test/literal_test.cc | 114 ++++++++ src/iceberg/test/manifest_list_reader_test.cc | 70 +++-- src/iceberg/test/manifest_reader_test.cc | 57 ++-- src/iceberg/type.cc | 42 +++ src/iceberg/type.h | 9 + src/iceberg/util/conversions.cc | 248 ++++++++++++++++++ src/iceberg/util/conversions.h | 51 ++++ 10 files changed, 601 insertions(+), 65 deletions(-) create mode 100644 src/iceberg/util/conversions.cc create mode 100644 src/iceberg/util/conversions.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index c9e665d4..bd2111f0 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -55,10 +55,11 @@ set(ICEBERG_SOURCES manifest_reader_internal.cc manifest_writer.cc arrow_c_data_guard_internal.cc + util/conversions.cc util/decimal.cc + util/gzip_internal.cc util/murmurhash3_internal.cc util/timepoint.cc - util/gzip_internal.cc util/uuid.cc) set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index e3abb6a6..71e2756f 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -23,6 +23,7 @@ #include #include "iceberg/exception.h" +#include "iceberg/util/conversions.h" namespace iceberg { @@ -151,11 +152,11 @@ Literal Literal::Binary(std::vector value) { Result Literal::Deserialize(std::span data, std::shared_ptr type) { - return NotImplemented("Deserialization of Literal is not implemented yet"); + return Conversions::FromBytes(type, data); } Result> Literal::Serialize() const { - return NotImplemented("Serialization of Literal is not implemented yet"); + return Conversions::ToBytes(*this); } // Getters diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index 1c16b8ed..a5a97626 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -144,11 +144,76 @@ class ICEBERG_EXPORT Literal : public util::Formattable { private: Literal(Value value, std::shared_ptr type); + friend class Conversions; friend class LiteralCaster; - private: Value value_; std::shared_ptr type_; }; +template +struct LiteralTraits { + using ValueType = void; +}; + +template <> +struct LiteralTraits { + using ValueType = bool; +}; + +template <> +struct LiteralTraits { + using ValueType = int32_t; +}; + +template <> +struct LiteralTraits { + using ValueType = int32_t; +}; + +template <> +struct LiteralTraits { + using ValueType = int64_t; +}; + +template <> +struct LiteralTraits { + using ValueType = int64_t; +}; + +template <> +struct LiteralTraits { + using ValueType = int64_t; +}; + +template <> +struct LiteralTraits { + using ValueType = int64_t; +}; + +template <> +struct LiteralTraits { + using ValueType = float; +}; + +template <> +struct LiteralTraits { + using ValueType = double; +}; + +template <> +struct LiteralTraits { + using ValueType = std::string; +}; + +template <> +struct LiteralTraits { + using ValueType = std::vector; +}; + +template <> +struct LiteralTraits { + using ValueType = std::vector; +}; + } // namespace iceberg diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index e9ddd47a..e8f09106 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -383,4 +383,118 @@ TEST(LiteralTest, DoubleZeroComparison) { EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less); } +// Type promotion tests +TEST(LiteralSerializationTest, TypePromotion) { + // 4-byte int data can be deserialized as long + std::vector int_data = {32, 0, 0, 0}; + auto long_result = Literal::Deserialize(int_data, int64()); + ASSERT_TRUE(long_result.has_value()); + EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong); + EXPECT_EQ(long_result->ToString(), "32"); + + auto long_bytes = long_result->Serialize(); + ASSERT_TRUE(long_bytes.has_value()); + EXPECT_EQ(long_bytes->size(), 8); + + // 4-byte float data can be deserialized as double + std::vector float_data = {0, 0, 128, 63}; + auto double_result = Literal::Deserialize(float_data, float64()); + ASSERT_TRUE(double_result.has_value()); + EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); + EXPECT_EQ(double_result->ToString(), "1.000000"); + + auto double_bytes = double_result->Serialize(); + ASSERT_TRUE(double_bytes.has_value()); + EXPECT_EQ(double_bytes->size(), 8); +} + +struct LiteralRoundTripParam { + std::string test_name; + std::vector input_bytes; + Literal expected_literal; + std::shared_ptr type; +}; + +class LiteralSerializationParamTest + : public ::testing::TestWithParam {}; + +TEST_P(LiteralSerializationParamTest, RoundTrip) { + const auto& param = GetParam(); + + // Deserialize from bytes + Result literal_result = Literal::Deserialize(param.input_bytes, param.type); + ASSERT_TRUE(literal_result.has_value()) + << "Deserialization failed: " << literal_result.error().message; + + // Check type and value + EXPECT_EQ(literal_result->type()->type_id(), param.expected_literal.type()->type_id()); + EXPECT_EQ(literal_result->ToString(), param.expected_literal.ToString()); + + // Serialize back to bytes + Result> bytes_result = literal_result->Serialize(); + ASSERT_TRUE(bytes_result.has_value()) + << "Serialization failed: " << bytes_result.error().message; + EXPECT_EQ(*bytes_result, param.input_bytes); + + // Deserialize again to verify idempotency + Result final_literal = Literal::Deserialize(*bytes_result, param.type); + ASSERT_TRUE(final_literal.has_value()) + << "Final deserialization failed: " << final_literal.error().message; + EXPECT_EQ(final_literal->type()->type_id(), param.expected_literal.type()->type_id()); + EXPECT_EQ(final_literal->ToString(), param.expected_literal.ToString()); +} + +INSTANTIATE_TEST_SUITE_P( + BinarySerializationTests, LiteralSerializationParamTest, + ::testing::Values( + // Basic types + LiteralRoundTripParam{"BooleanTrue", {1}, Literal::Boolean(true), boolean()}, + LiteralRoundTripParam{"BooleanFalse", {0}, Literal::Boolean(false), boolean()}, + LiteralRoundTripParam{"Int", {32, 0, 0, 0}, Literal::Int(32), int32()}, + LiteralRoundTripParam{ + "Long", {32, 0, 0, 0, 0, 0, 0, 0}, Literal::Long(32), int64()}, + LiteralRoundTripParam{"Float", {0, 0, 128, 63}, Literal::Float(1.0f), float32()}, + LiteralRoundTripParam{ + "Double", {0, 0, 0, 0, 0, 0, 240, 63}, Literal::Double(1.0), float64()}, + LiteralRoundTripParam{"String", + {105, 99, 101, 98, 101, 114, 103}, + Literal::String("iceberg"), + string()}, + LiteralRoundTripParam{"BinaryData", + {0x01, 0x02, 0x03, 0xFF}, + Literal::Binary({0x01, 0x02, 0x03, 0xFF}), + binary()}, + // Edge cases that fit the round-trip pattern + LiteralRoundTripParam{ + "NegativeInt", {224, 255, 255, 255}, Literal::Int(-32), int32()}, + LiteralRoundTripParam{"NegativeLong", + {224, 255, 255, 255, 255, 255, 255, 255}, + Literal::Long(-32), + int64()}, + // IEEE 754 representation for NaN and Infinity (in little-endian) + LiteralRoundTripParam{"FloatInfinity", + {0, 0, 128, 127}, + Literal::Float(std::numeric_limits::infinity()), + float32()}, + LiteralRoundTripParam{"FloatNaN", + {0, 0, 192, 127}, + Literal::Float(std::numeric_limits::quiet_NaN()), + float32()} + // TODO(Li Feiyang): Add tests for Date, Time, Timestamp, TimestampTz + ), + + [](const testing::TestParamInfo& info) { + return info.param.test_name; + }); + +TEST(LiteralSerializationEdgeCaseTest, EmptyStringSerialization) { + auto empty_string = Literal::String(""); + auto empty_bytes = empty_string.Serialize(); + ASSERT_TRUE(empty_bytes.has_value()); + EXPECT_TRUE(empty_bytes->empty()); + + auto deserialize_result = Literal::Deserialize(*empty_bytes, string()); + EXPECT_THAT(deserialize_result, IsError(ErrorKind::kInvalidArgument)); +} + } // namespace iceberg diff --git a/src/iceberg/test/manifest_list_reader_test.cc b/src/iceberg/test/manifest_list_reader_test.cc index a3c08c35..9fd6e4c1 100644 --- a/src/iceberg/test/manifest_list_reader_test.cc +++ b/src/iceberg/test/manifest_list_reader_test.cc @@ -23,6 +23,7 @@ #include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/avro/avro_register.h" +#include "iceberg/expression/literal.h" #include "iceberg/manifest_list.h" #include "iceberg/manifest_reader.h" #include "temp_file_test_base.h" @@ -76,43 +77,38 @@ class ManifestListReaderV1Test : public ManifestListReaderTestBase { std::vector file_size = {6185, 6113}; std::vector snapshot_id = {7532614258660258098, 7532614258660258098}; - std::vector> lower_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; - - std::vector> upper_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; - - return {{.manifest_path = paths[0], - .manifest_length = file_size[0], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[0], - .added_files_count = 4, - .existing_files_count = 0, - .deleted_files_count = 0, - .added_rows_count = 6, - .existing_rows_count = 0, - .deleted_rows_count = 0, - .partitions = {{.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[0], - .upper_bound = upper_bounds[0]}}}, - - {.manifest_path = paths[1], - .manifest_length = file_size[1], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[1], - .added_files_count = 0, - .existing_files_count = 0, - .deleted_files_count = 2, - .added_rows_count = 0, - .existing_rows_count = 0, - .deleted_rows_count = 6, - .partitions = {{.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[1], - .upper_bound = upper_bounds[1]}}}}; + return { + {.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 4, + .existing_files_count = 0, + .deleted_files_count = 0, + .added_rows_count = 6, + .existing_rows_count = 0, + .deleted_rows_count = 0, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = Literal::String("2022-02-22").Serialize().value(), + .upper_bound = + Literal::String("2022-2-23").Serialize().value()}}}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 0, + .deleted_files_count = 2, + .added_rows_count = 0, + .existing_rows_count = 0, + .deleted_rows_count = 6, + .partitions = { + {.contains_null = false, + .contains_nan = false, + .lower_bound = Literal::String("2022-2-22").Serialize().value(), + .upper_bound = Literal::String("2022-2-23").Serialize().value()}}}}; } std::vector PrepareComplexTypeTestData() { diff --git a/src/iceberg/test/manifest_reader_test.cc b/src/iceberg/test/manifest_reader_test.cc index db703c17..7381b298 100644 --- a/src/iceberg/test/manifest_reader_test.cc +++ b/src/iceberg/test/manifest_reader_test.cc @@ -94,24 +94,33 @@ class ManifestReaderV1Test : public ManifestReaderTestBase { "order_ts_hour=2021-01-26-00/" "00000-2-d5ae78b7-4449-45ec-adb7-c0e9c0bdb714-0-00004.parquet"}; std::vector partitions = {447696, 473976, 465192, 447672}; + + // TODO(Li Feiyang): The Decimal type and its serialization logic are not yet fully + // implemented to support variable-length encoding as required by the Iceberg + // specification. Using Literal::Binary as a temporary substitute to represent the raw + // bytes for the decimal values. std::vector>> bounds = { - {{1, {0xd2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {'.', 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {3, {0x12, 0xe2}}, - {4, {0xc0, 'y', 0xe7, 0x98, 0xd6, 0xb9, 0x05, 0x00}}}, - {{1, {0xd2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {'.', 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {3, {0x12, 0xe3}}, - {4, {0xc0, 0x19, '#', '=', 0xe2, 0x0f, 0x06, 0x00}}}, - {{1, {'{', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {0xc8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {3, {0x0e, '"'}}, - {4, {0xc0, 0xd9, '7', 0x93, 0x1f, 0xf3, 0x05, 0x00}}}, - {{1, {'{', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {0xc8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {3, {0x0e, '!'}}, - {4, {0xc0, 0x19, 0x10, '{', 0xc2, 0xb9, 0x05, 0x00}}}, + {{1, Literal::Long(1234).Serialize().value()}, + {2, Literal::Long(5678).Serialize().value()}, + {3, Literal::Binary({0x12, 0xe2}).Serialize().value()}, + + {4, Literal::Timestamp(1611706223000000LL).Serialize().value()}}, + {{1, Literal::Long(1234).Serialize().value()}, + {2, Literal::Long(5678).Serialize().value()}, + {3, Literal::Binary({0x12, 0xe3}).Serialize().value()}, + + {4, Literal::Timestamp(1706314223000000LL).Serialize().value()}}, + {{1, Literal::Long(123).Serialize().value()}, + {2, Literal::Long(456).Serialize().value()}, + {3, Literal::Binary({0x0e, 0x22}).Serialize().value()}, + + {4, Literal::Timestamp(1674691823000000LL).Serialize().value()}}, + {{1, Literal::Long(123).Serialize().value()}, + {2, Literal::Long(456).Serialize().value()}, + {3, Literal::Binary({0x0e, 0x21}).Serialize().value()}, + {4, Literal::Timestamp(1611619823000000LL).Serialize().value()}}, }; + for (int i = 0; i < 4; ++i) { ManifestEntry entry; entry.status = ManifestStatus::kAdded; @@ -159,16 +168,16 @@ class ManifestReaderV2Test : public ManifestReaderTestBase { std::vector record_counts = {4}; std::vector>> lower_bounds = { - {{1, {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 'f', 'o', 'u', 'r'}}, - {3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '1'}}, - {4, {0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xdc, 0x5e, 0x40}}}}; + {{1, Literal::Long(1).Serialize().value()}, + {2, Literal::String("record_four").Serialize().value()}, + {3, Literal::String("data_content_1").Serialize().value()}, + {4, Literal::Double(123.45).Serialize().value()}}}; std::vector>> upper_bounds = { - {{1, {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 't', 'w', 'o'}}, - {3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '4'}}, - {4, {0x14, 0xae, 0x47, 0xe1, 0x7a, 0x8c, 0x7c, 0x40}}}}; + {{1, Literal::Long(4).Serialize().value()}, + {2, Literal::String("record_two").Serialize().value()}, + {3, Literal::String("data_content_4").Serialize().value()}, + {4, Literal::Double(456.78).Serialize().value()}}}; DataFile data_file{.file_path = test_dir_prefix + paths[0], .file_format = FileFormatType::kParquet, diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index 7b0f0945..ddb32858 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include "iceberg/exception.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep @@ -386,4 +387,45 @@ std::shared_ptr struct_(std::vector fields) { return std::make_shared(std::move(fields)); } +std::string_view ToString(TypeId id) { + switch (id) { + case TypeId::kStruct: + return "struct"; + case TypeId::kList: + return "list"; + case TypeId::kMap: + return "map"; + case TypeId::kBoolean: + return "boolean"; + case TypeId::kInt: + return "int"; + case TypeId::kLong: + return "long"; + case TypeId::kFloat: + return "float"; + case TypeId::kDouble: + return "double"; + case TypeId::kDecimal: + return "decimal"; + case TypeId::kDate: + return "date"; + case TypeId::kTime: + return "time"; + case TypeId::kTimestamp: + return "timestamp"; + case TypeId::kTimestampTz: + return "timestamptz"; + case TypeId::kString: + return "string"; + case TypeId::kUuid: + return "uuid"; + case TypeId::kFixed: + return "fixed"; + case TypeId::kBinary: + return "binary"; + } + + std::unreachable(); +} + } // namespace iceberg diff --git a/src/iceberg/type.h b/src/iceberg/type.h index 01c911dd..25652683 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -531,4 +531,13 @@ ICEBERG_EXPORT std::shared_ptr map(SchemaField key, SchemaField value); /// @} +/// \brief Get the lowercase string representation of a TypeId. +/// +/// This returns the same lowercase string as used by Type::ToString() methods. +/// For example: TypeId::kBoolean -> "boolean", TypeId::kInt -> "int", etc. +/// +/// \param id The TypeId to convert to string +/// \return A string_view containing the lowercase type name +ICEBERG_EXPORT std::string_view ToString(TypeId id); + } // namespace iceberg diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc new file mode 100644 index 00000000..925070ee --- /dev/null +++ b/src/iceberg/util/conversions.cc @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/conversions.h" + +#include +#include +#include +#include + +#include "iceberg/util/endian.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +/// \brief Write a value in little-endian format and return as vector. +template +std::vector WriteLittleEndian(T value) { + value = ToLittleEndian(value); + const auto* bytes = reinterpret_cast(&value); + std::vector result; + result.insert(result.end(), bytes, bytes + sizeof(T)); + return result; +} + +/// \brief Read a value in little-endian format from the data. +template +Result ReadLittleEndian(std::span data) { + if (data.size() < sizeof(T)) [[unlikely]] { + return InvalidArgument("Insufficient data to read {} bytes, got {}", sizeof(T), + data.size()); + } + + T value; + std::memcpy(&value, data.data(), sizeof(T)); + return FromLittleEndian(value); +} + +template +Result> ToBytesImpl(const Literal::Value& value) { + using CppType = typename LiteralTraits::ValueType; + return WriteLittleEndian(std::get(value)); +} + +#define DISPATCH_LITERAL_TO_BYTES(type_id) \ + case type_id: \ + return ToBytesImpl(value); + +Result> Conversions::ToBytes(const PrimitiveType& type, + const Literal::Value& value) { + const auto type_id = type.type_id(); + + switch (type_id) { + DISPATCH_LITERAL_TO_BYTES(TypeId::kInt) + DISPATCH_LITERAL_TO_BYTES(TypeId::kDate) + DISPATCH_LITERAL_TO_BYTES(TypeId::kLong) + DISPATCH_LITERAL_TO_BYTES(TypeId::kTime) + DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp) + DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz) + DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat) + DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble) + case TypeId::kBoolean: { + return std::vector{std::get(value) ? static_cast(0x01) + : static_cast(0x00)}; + } + + case TypeId::kString: { + const auto& str = std::get(value); + return std::vector(str.begin(), str.end()); + } + + case TypeId::kBinary: { + return std::get>(value); + } + + case TypeId::kFixed: { + if (std::holds_alternative>(value)) { + const auto& fixed_bytes = std::get>(value); + return std::vector(fixed_bytes.begin(), fixed_bytes.end()); + } else if (std::holds_alternative>(value)) { + return std::get>(value); + } else { + std::string actual_type = std::visit( + [](auto&& arg) -> std::string { return typeid(arg).name(); }, value); + return InvalidArgument("Invalid value type for Fixed literal, got type: {}", + actual_type); + } + } + // TODO(Li Feiyang): Add support for UUID and Decimal + + default: + return NotSupported("Serialization for type {} is not supported", type.ToString()); + } +} + +#undef DISPATCH_LITERAL_TO_BYTES + +Result> Conversions::ToBytes(const Literal& literal) { + // Cannot serialize special values + if (literal.IsAboveMax()) { + return NotSupported("Cannot serialize AboveMax"); + } + if (literal.IsBelowMin()) { + return NotSupported("Cannot serialize BelowMin"); + } + if (literal.IsNull()) { + return NotSupported("Cannot serialize null"); + } + + return ToBytes(*literal.type(), literal.value()); +} + +Result Conversions::FromBytes(const PrimitiveType& type, + std::span data) { + if (data.empty()) { + return InvalidArgument("Data cannot be empty"); + } + + const auto type_id = type.type_id(); + + switch (type_id) { + case TypeId::kBoolean: { + if (data.size() != 1) { + return InvalidArgument("Boolean requires 1 byte, got {}", data.size()); + } + ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); + return Literal::Value{static_cast(value != 0x00)}; + } + + case TypeId::kInt: { + if (data.size() != sizeof(int32_t)) { + return InvalidArgument("Int requires {} bytes, got {}", sizeof(int32_t), + data.size()); + } + ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); + return Literal::Value{value}; + } + + case TypeId::kDate: { + if (data.size() != sizeof(int32_t)) { + return InvalidArgument("Date requires {} bytes, got {}", sizeof(int32_t), + data.size()); + } + ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); + return Literal::Value{value}; + } + + case TypeId::kLong: + case TypeId::kTime: + case TypeId::kTimestamp: + case TypeId::kTimestampTz: { + int64_t value; + if (data.size() == 8) { + ICEBERG_ASSIGN_OR_RAISE(auto long_value, ReadLittleEndian(data)); + value = long_value; + } else if (data.size() == 4) { + // Type was promoted from int to long + ICEBERG_ASSIGN_OR_RAISE(auto int_value, ReadLittleEndian(data)); + value = static_cast(int_value); + } else { + return InvalidArgument("{} requires 4 or 8 bytes, got {}", ToString(type_id), + data.size()); + } + + return Literal::Value{value}; + } + + case TypeId::kFloat: { + if (data.size() != sizeof(float)) { + return InvalidArgument("Float requires {} bytes, got {}", sizeof(float), + data.size()); + } + ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); + return Literal::Value{value}; + } + + case TypeId::kDouble: { + if (data.size() == 8) { + ICEBERG_ASSIGN_OR_RAISE(auto double_value, ReadLittleEndian(data)); + return Literal::Value{double_value}; + } else if (data.size() == 4) { + // Type was promoted from float to double + ICEBERG_ASSIGN_OR_RAISE(auto float_value, ReadLittleEndian(data)); + return Literal::Value{static_cast(float_value)}; + } else { + return InvalidArgument("Double requires 4 or 8 bytes, got {}", data.size()); + } + } + + case TypeId::kString: { + return Literal::Value{ + std::string(reinterpret_cast(data.data()), data.size())}; + } + + case TypeId::kBinary: { + return Literal::Value{std::vector(data.begin(), data.end())}; + } + + case TypeId::kFixed: { + if (data.size() == 16) { + std::array fixed_bytes; + std::ranges::copy(data, fixed_bytes.begin()); + return Literal::Value{fixed_bytes}; + } else { + return Literal::Value{std::vector(data.begin(), data.end())}; + } + } + // TODO(Li Feiyang): Add support for UUID and Decimal + + default: + return NotSupported("Deserialization for type {} is not supported", + type.ToString()); + } +} + +Result Conversions::FromBytes(std::shared_ptr type, + std::span data) { + if (!type) { + return InvalidArgument("Type cannot be null"); + } + + ICEBERG_ASSIGN_OR_RAISE(auto value, FromBytes(*type, data)); + + // If we got a null value (monostate), create a null Literal + if (std::holds_alternative(value)) { + return Literal::Null(std::move(type)); + } + + return Literal(std::move(value), std::move(type)); +} + +} // namespace iceberg diff --git a/src/iceberg/util/conversions.h b/src/iceberg/util/conversions.h new file mode 100644 index 00000000..e0eed44a --- /dev/null +++ b/src/iceberg/util/conversions.h @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/expression/literal.h" +#include "iceberg/result.h" +#include "iceberg/type_fwd.h" + +/// \file iceberg/util/conversions.h +/// \brief Conversion utilities for primitive types + +namespace iceberg { + +/// \brief Conversion utilities for primitive types +class ICEBERG_EXPORT Conversions { + public: + /// \brief Convert a literal value to bytes + static Result> ToBytes(const PrimitiveType& type, + const Literal::Value& value); + + static Result> ToBytes(const Literal& literal); + + /// \brief Convert bytes to a literal value + static Result FromBytes(const PrimitiveType& type, + std::span data); + + static Result FromBytes(std::shared_ptr type, + std::span data); +}; + +} // namespace iceberg From fc297ec27a7dea475b647fb18e83e018e3a9d6ee Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Mon, 22 Sep 2025 16:21:02 +0800 Subject: [PATCH 2/9] fix ci --- src/iceberg/test/literal_test.cc | 72 ++++++++++++------------ src/iceberg/util/conversions.cc | 97 ++++++++++++++------------------ src/iceberg/util/conversions.h | 18 +++++- 3 files changed, 92 insertions(+), 95 deletions(-) diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index e8f09106..062414f8 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -81,7 +81,7 @@ TEST(LiteralTest, IntCastTo) { auto long_result = int_literal.CastTo(iceberg::int64()); ASSERT_THAT(long_result, IsOk()); EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong); - EXPECT_EQ(long_result->ToString(), "42"); + EXPECT_EQ(std::get(long_result->value()), 42L); // Cast to Float auto float_result = int_literal.CastTo(iceberg::float32()); @@ -136,8 +136,8 @@ TEST(LiteralTest, LongCastTo) { EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); } +// Test overflow cases TEST(LiteralTest, LongCastToIntOverflow) { - // Test overflow cases auto max_long = Literal::Long(static_cast(std::numeric_limits::max()) + 1); auto min_long = @@ -383,31 +383,6 @@ TEST(LiteralTest, DoubleZeroComparison) { EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less); } -// Type promotion tests -TEST(LiteralSerializationTest, TypePromotion) { - // 4-byte int data can be deserialized as long - std::vector int_data = {32, 0, 0, 0}; - auto long_result = Literal::Deserialize(int_data, int64()); - ASSERT_TRUE(long_result.has_value()); - EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong); - EXPECT_EQ(long_result->ToString(), "32"); - - auto long_bytes = long_result->Serialize(); - ASSERT_TRUE(long_bytes.has_value()); - EXPECT_EQ(long_bytes->size(), 8); - - // 4-byte float data can be deserialized as double - std::vector float_data = {0, 0, 128, 63}; - auto double_result = Literal::Deserialize(float_data, float64()); - ASSERT_TRUE(double_result.has_value()); - EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); - EXPECT_EQ(double_result->ToString(), "1.000000"); - - auto double_bytes = double_result->Serialize(); - ASSERT_TRUE(double_bytes.has_value()); - EXPECT_EQ(double_bytes->size(), 8); -} - struct LiteralRoundTripParam { std::string test_name; std::vector input_bytes; @@ -415,10 +390,10 @@ struct LiteralRoundTripParam { std::shared_ptr type; }; -class LiteralSerializationParamTest - : public ::testing::TestWithParam {}; +class LiteralSerializationParam : public ::testing::TestWithParam { +}; -TEST_P(LiteralSerializationParamTest, RoundTrip) { +TEST_P(LiteralSerializationParam, RoundTrip) { const auto& param = GetParam(); // Deserialize from bytes @@ -427,8 +402,7 @@ TEST_P(LiteralSerializationParamTest, RoundTrip) { << "Deserialization failed: " << literal_result.error().message; // Check type and value - EXPECT_EQ(literal_result->type()->type_id(), param.expected_literal.type()->type_id()); - EXPECT_EQ(literal_result->ToString(), param.expected_literal.ToString()); + EXPECT_EQ(*literal_result, param.expected_literal); // Serialize back to bytes Result> bytes_result = literal_result->Serialize(); @@ -440,12 +414,11 @@ TEST_P(LiteralSerializationParamTest, RoundTrip) { Result final_literal = Literal::Deserialize(*bytes_result, param.type); ASSERT_TRUE(final_literal.has_value()) << "Final deserialization failed: " << final_literal.error().message; - EXPECT_EQ(final_literal->type()->type_id(), param.expected_literal.type()->type_id()); - EXPECT_EQ(final_literal->ToString(), param.expected_literal.ToString()); + EXPECT_EQ(*final_literal, param.expected_literal); } INSTANTIATE_TEST_SUITE_P( - BinarySerializationTests, LiteralSerializationParamTest, + BinarySerialization, LiteralSerializationParam, ::testing::Values( // Basic types LiteralRoundTripParam{"BooleanTrue", {1}, Literal::Boolean(true), boolean()}, @@ -483,11 +456,11 @@ INSTANTIATE_TEST_SUITE_P( // TODO(Li Feiyang): Add tests for Date, Time, Timestamp, TimestampTz ), - [](const testing::TestParamInfo& info) { + [](const testing::TestParamInfo& info) { return info.param.test_name; }); -TEST(LiteralSerializationEdgeCaseTest, EmptyStringSerialization) { +TEST(LiteralSerializationTest, EmptyString) { auto empty_string = Literal::String(""); auto empty_bytes = empty_string.Serialize(); ASSERT_TRUE(empty_bytes.has_value()); @@ -497,4 +470,29 @@ TEST(LiteralSerializationEdgeCaseTest, EmptyStringSerialization) { EXPECT_THAT(deserialize_result, IsError(ErrorKind::kInvalidArgument)); } +// Type promotion tests +TEST(LiteralSerializationTest, TypePromotion) { + // 4-byte int data can be deserialized as long + std::vector int_data = {32, 0, 0, 0}; + auto long_result = Literal::Deserialize(int_data, int64()); + ASSERT_TRUE(long_result.has_value()); + EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong); + EXPECT_EQ(std::get(long_result->value()), 32L); + + auto long_bytes = long_result->Serialize(); + ASSERT_TRUE(long_bytes.has_value()); + EXPECT_EQ(long_bytes->size(), 8); + + // 4-byte float data can be deserialized as double + std::vector float_data = {0, 0, 128, 63}; + auto double_result = Literal::Deserialize(float_data, float64()); + ASSERT_TRUE(double_result.has_value()); + EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); + EXPECT_EQ(std::get(double_result->value()), 1.0); + + auto double_bytes = double_result->Serialize(); + ASSERT_TRUE(double_bytes.has_value()); + EXPECT_EQ(double_bytes->size(), 8); +} + } // namespace iceberg diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc index 925070ee..e33658a1 100644 --- a/src/iceberg/util/conversions.cc +++ b/src/iceberg/util/conversions.cc @@ -42,7 +42,7 @@ std::vector WriteLittleEndian(T value) { /// \brief Read a value in little-endian format from the data. template Result ReadLittleEndian(std::span data) { - if (data.size() < sizeof(T)) [[unlikely]] { + if (data.size() != sizeof(T)) [[unlikely]] { return InvalidArgument("Insufficient data to read {} bytes, got {}", sizeof(T), data.size()); } @@ -58,6 +58,35 @@ Result> ToBytesImpl(const Literal::Value& value) { return WriteLittleEndian(std::get(value)); } +template <> +Result> ToBytesImpl(const Literal::Value& value) { + return std::vector{std::get(value) ? static_cast(0x01) + : static_cast(0x00)}; +} + +template <> +Result> ToBytesImpl(const Literal::Value& value) { + const auto& str = std::get(value); + return std::vector(str.begin(), str.end()); +} + +template <> +Result> ToBytesImpl(const Literal::Value& value) { + return std::get>(value); +} + +template <> +Result> ToBytesImpl(const Literal::Value& value) { + if (std::holds_alternative>(value)) { + return std::get>(value); + } else { + std::string actual_type = + std::visit([](auto&& arg) -> std::string { return typeid(arg).name(); }, value); + return InvalidArgument("Invalid value type for Fixed literal, got type: {}", + actual_type); + } +} + #define DISPATCH_LITERAL_TO_BYTES(type_id) \ case type_id: \ return ToBytesImpl(value); @@ -75,33 +104,10 @@ Result> Conversions::ToBytes(const PrimitiveType& type, DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz) DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat) DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble) - case TypeId::kBoolean: { - return std::vector{std::get(value) ? static_cast(0x01) - : static_cast(0x00)}; - } - - case TypeId::kString: { - const auto& str = std::get(value); - return std::vector(str.begin(), str.end()); - } - - case TypeId::kBinary: { - return std::get>(value); - } - - case TypeId::kFixed: { - if (std::holds_alternative>(value)) { - const auto& fixed_bytes = std::get>(value); - return std::vector(fixed_bytes.begin(), fixed_bytes.end()); - } else if (std::holds_alternative>(value)) { - return std::get>(value); - } else { - std::string actual_type = std::visit( - [](auto&& arg) -> std::string { return typeid(arg).name(); }, value); - return InvalidArgument("Invalid value type for Fixed literal, got type: {}", - actual_type); - } - } + DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean) + DISPATCH_LITERAL_TO_BYTES(TypeId::kString) + DISPATCH_LITERAL_TO_BYTES(TypeId::kBinary) + DISPATCH_LITERAL_TO_BYTES(TypeId::kFixed) // TODO(Li Feiyang): Add support for UUID and Decimal default: @@ -129,34 +135,23 @@ Result> Conversions::ToBytes(const Literal& literal) { Result Conversions::FromBytes(const PrimitiveType& type, std::span data) { if (data.empty()) { - return InvalidArgument("Data cannot be empty"); + return InvalidArgument("Cannot deserialize empty value"); } const auto type_id = type.type_id(); switch (type_id) { case TypeId::kBoolean: { - if (data.size() != 1) { - return InvalidArgument("Boolean requires 1 byte, got {}", data.size()); - } ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{static_cast(value != 0x00)}; } case TypeId::kInt: { - if (data.size() != sizeof(int32_t)) { - return InvalidArgument("Int requires {} bytes, got {}", sizeof(int32_t), - data.size()); - } ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{value}; } case TypeId::kDate: { - if (data.size() != sizeof(int32_t)) { - return InvalidArgument("Date requires {} bytes, got {}", sizeof(int32_t), - data.size()); - } ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{value}; } @@ -166,40 +161,30 @@ Result Conversions::FromBytes(const PrimitiveType& type, case TypeId::kTimestamp: case TypeId::kTimestampTz: { int64_t value; - if (data.size() == 8) { - ICEBERG_ASSIGN_OR_RAISE(auto long_value, ReadLittleEndian(data)); - value = long_value; - } else if (data.size() == 4) { + if (data.size() < 8) { // Type was promoted from int to long ICEBERG_ASSIGN_OR_RAISE(auto int_value, ReadLittleEndian(data)); value = static_cast(int_value); } else { - return InvalidArgument("{} requires 4 or 8 bytes, got {}", ToString(type_id), - data.size()); + ICEBERG_ASSIGN_OR_RAISE(auto long_value, ReadLittleEndian(data)); + value = long_value; } - return Literal::Value{value}; } case TypeId::kFloat: { - if (data.size() != sizeof(float)) { - return InvalidArgument("Float requires {} bytes, got {}", sizeof(float), - data.size()); - } ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{value}; } case TypeId::kDouble: { - if (data.size() == 8) { - ICEBERG_ASSIGN_OR_RAISE(auto double_value, ReadLittleEndian(data)); - return Literal::Value{double_value}; - } else if (data.size() == 4) { + if (data.size() < 8) { // Type was promoted from float to double ICEBERG_ASSIGN_OR_RAISE(auto float_value, ReadLittleEndian(data)); return Literal::Value{static_cast(float_value)}; } else { - return InvalidArgument("Double requires 4 or 8 bytes, got {}", data.size()); + ICEBERG_ASSIGN_OR_RAISE(auto double_value, ReadLittleEndian(data)); + return Literal::Value{double_value}; } } diff --git a/src/iceberg/util/conversions.h b/src/iceberg/util/conversions.h index e0eed44a..fe383bc5 100644 --- a/src/iceberg/util/conversions.h +++ b/src/iceberg/util/conversions.h @@ -34,16 +34,30 @@ namespace iceberg { /// \brief Conversion utilities for primitive types class ICEBERG_EXPORT Conversions { public: - /// \brief Convert a literal value to bytes + /// \brief Serializes a raw literal value into a byte vector according to its type. + /// \param type The primitive type of the value. + /// \param value The std::variant holding the raw literal value to serialize. + /// \return A Result containing the serialized value. static Result> ToBytes(const PrimitiveType& type, const Literal::Value& value); + /// \brief Serializes a complete Literal object into a byte vector. + /// \param literal The Literal object to serialize. + /// \return A Result containing the serialized value. static Result> ToBytes(const Literal& literal); - /// \brief Convert bytes to a literal value + /// \brief Deserializes a span of bytes into a raw literal value based on the given + /// type. + /// \param type The target primitive type to interpret the bytes as. + /// \param data A std::span of bytes representing the serialized value. + /// \return A Result containing the deserialized value. static Result FromBytes(const PrimitiveType& type, std::span data); + /// \brief Deserializes a span of bytes into a complete Literal object. + /// \param type A shared pointer to the target primitive type. + /// \param data A std::span of bytes representing the serialized value. + /// \return A Result containing the deserialized value. static Result FromBytes(std::shared_ptr type, std::span data); }; From db2e12939e73c7bc158f364a2abdffd3a052823c Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Mon, 22 Sep 2025 17:03:58 +0800 Subject: [PATCH 3/9] add tests for times --- src/iceberg/expression/literal.cc | 15 ++++- src/iceberg/expression/literal.h | 1 + src/iceberg/test/literal_test.cc | 108 +++++++++++++++++++++++++----- 3 files changed, 106 insertions(+), 18 deletions(-) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index 71e2756f..fd6609a2 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -150,6 +150,10 @@ Literal Literal::Binary(std::vector value) { return {Value{std::move(value)}, binary()}; } +Literal Literal::Fixed(std::vector value, int32_t length) { + return {Value{std::move(value)}, fixed(length)}; +} + Result Literal::Deserialize(std::span data, std::shared_ptr type) { return Conversions::FromBytes(type, data); @@ -217,6 +221,7 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { } case TypeId::kLong: + case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: { auto this_val = std::get(value_); @@ -295,9 +300,17 @@ std::string Literal::ToString() const { } return result; } + case TypeId::kFixed: { + const auto& fixed_data = std::get>(value_); + std::string result; + result.reserve(fixed_data.size() * 2); // 2 chars per byte + for (const auto& byte : fixed_data) { + std::format_to(std::back_inserter(result), "{:02X}", byte); + } + return result; + } case TypeId::kDecimal: case TypeId::kUuid: - case TypeId::kFixed: case TypeId::kDate: case TypeId::kTime: case TypeId::kTimestamp: diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index a5a97626..ba189271 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -72,6 +72,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable { static Literal Double(double value); static Literal String(std::string value); static Literal Binary(std::vector value); + static Literal Fixed(std::vector value, int32_t length); /// \brief Create a literal representing a null value. static Literal Null(std::shared_ptr type) { diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 062414f8..58f744a1 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -423,38 +423,112 @@ INSTANTIATE_TEST_SUITE_P( // Basic types LiteralRoundTripParam{"BooleanTrue", {1}, Literal::Boolean(true), boolean()}, LiteralRoundTripParam{"BooleanFalse", {0}, Literal::Boolean(false), boolean()}, + LiteralRoundTripParam{"Int", {32, 0, 0, 0}, Literal::Int(32), int32()}, + LiteralRoundTripParam{ + "IntMaxValue", {255, 255, 255, 127}, Literal::Int(2147483647), int32()}, + LiteralRoundTripParam{ + "IntMinValue", {0, 0, 0, 128}, Literal::Int(-2147483648), int32()}, + LiteralRoundTripParam{ + "NegativeInt", {224, 255, 255, 255}, Literal::Int(-32), int32()}, + LiteralRoundTripParam{ "Long", {32, 0, 0, 0, 0, 0, 0, 0}, Literal::Long(32), int64()}, + LiteralRoundTripParam{"LongMaxValue", + {255, 255, 255, 255, 255, 255, 255, 127}, + Literal::Long(std::numeric_limits::max()), + int64()}, + LiteralRoundTripParam{"LongMinValue", + {0, 0, 0, 0, 0, 0, 0, 128}, + Literal::Long(std::numeric_limits::min()), + int64()}, + LiteralRoundTripParam{"NegativeLong", + {224, 255, 255, 255, 255, 255, 255, 255}, + Literal::Long(-32), + int64()}, + LiteralRoundTripParam{"Float", {0, 0, 128, 63}, Literal::Float(1.0f), float32()}, + LiteralRoundTripParam{"FloatNegativeInfinity", + {0, 0, 128, 255}, + Literal::Float(-std::numeric_limits::infinity()), + float32()}, + LiteralRoundTripParam{"FloatMaxValue", + {255, 255, 127, 127}, + Literal::Float(std::numeric_limits::max()), + float32()}, + LiteralRoundTripParam{"FloatMinValue", + {255, 255, 127, 255}, + Literal::Float(std::numeric_limits::lowest()), + float32()}, + LiteralRoundTripParam{ "Double", {0, 0, 0, 0, 0, 0, 240, 63}, Literal::Double(1.0), float64()}, + LiteralRoundTripParam{"DoubleNegativeInfinity", + {0, 0, 0, 0, 0, 0, 240, 255}, + Literal::Double(-std::numeric_limits::infinity()), + float64()}, + LiteralRoundTripParam{"DoubleMaxValue", + {255, 255, 255, 255, 255, 255, 239, 127}, + Literal::Double(std::numeric_limits::max()), + float64()}, + LiteralRoundTripParam{"DoubleMinValue", + {255, 255, 255, 255, 255, 255, 239, 255}, + Literal::Double(std::numeric_limits::lowest()), + float64()}, + LiteralRoundTripParam{"String", {105, 99, 101, 98, 101, 114, 103}, Literal::String("iceberg"), string()}, + LiteralRoundTripParam{ + "StringLong", + {65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65}, + Literal::String("AAAAAAAAAAAAAAAA"), + string()}, + LiteralRoundTripParam{"BinaryData", {0x01, 0x02, 0x03, 0xFF}, Literal::Binary({0x01, 0x02, 0x03, 0xFF}), binary()}, - // Edge cases that fit the round-trip pattern + LiteralRoundTripParam{"BinarySingleByte", {42}, Literal::Binary({42}), binary()}, + + // Temporal types + LiteralRoundTripParam{"DateEpoch", {0, 0, 0, 0}, Literal::Date(0), date()}, + LiteralRoundTripParam{"DateNextDay", {1, 0, 0, 0}, Literal::Date(1), date()}, + LiteralRoundTripParam{"DateY2K", {205, 42, 0, 0}, Literal::Date(10957), date()}, LiteralRoundTripParam{ - "NegativeInt", {224, 255, 255, 255}, Literal::Int(-32), int32()}, - LiteralRoundTripParam{"NegativeLong", - {224, 255, 255, 255, 255, 255, 255, 255}, - Literal::Long(-32), - int64()}, - // IEEE 754 representation for NaN and Infinity (in little-endian) - LiteralRoundTripParam{"FloatInfinity", - {0, 0, 128, 127}, - Literal::Float(std::numeric_limits::infinity()), - float32()}, - LiteralRoundTripParam{"FloatNaN", - {0, 0, 192, 127}, - Literal::Float(std::numeric_limits::quiet_NaN()), - float32()} - // TODO(Li Feiyang): Add tests for Date, Time, Timestamp, TimestampTz - ), + "DateNegative", {255, 255, 255, 255}, Literal::Date(-1), date()}, + + LiteralRoundTripParam{ + "TimeMidnight", {0, 0, 0, 0, 0, 0, 0, 0}, Literal::Time(0), time()}, + LiteralRoundTripParam{"TimeNoon", + {128, 9, 230, 124, 10, 0, 0, 0}, + Literal::Time(45045123456), + time()}, + LiteralRoundTripParam{ + "TimeOneSecond", {64, 66, 15, 0, 0, 0, 0, 0}, Literal::Time(1000000), time()}, + + LiteralRoundTripParam{"TimestampEpoch", + {0, 0, 0, 0, 0, 0, 0, 0}, + Literal::Timestamp(0), + timestamp()}, + LiteralRoundTripParam{"TimestampOneSecond", + {64, 66, 15, 0, 0, 0, 0, 0}, + Literal::Timestamp(1000000), + timestamp()}, + LiteralRoundTripParam{"TimestampNoon2024", + {128, 9, 230, 124, 10, 0, 0, 0}, + Literal::Timestamp(45045123456), + timestamp()}, + + LiteralRoundTripParam{"TimestampTzEpoch", + {0, 0, 0, 0, 0, 0, 0, 0}, + Literal::TimestampTz(0), + timestamp_tz()}, + LiteralRoundTripParam{"TimestampTzOneHour", + {0, 164, 147, 214, 0, 0, 0, 0}, + Literal::TimestampTz(3600000000), + timestamp_tz()}), [](const testing::TestParamInfo& info) { return info.param.test_name; From e187322c3deee08ee6a3bf9239954944d4923eb7 Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Mon, 22 Sep 2025 17:22:59 +0800 Subject: [PATCH 4/9] modify fixed --- src/iceberg/util/conversions.cc | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc index e33658a1..b95eae15 100644 --- a/src/iceberg/util/conversions.cc +++ b/src/iceberg/util/conversions.cc @@ -198,13 +198,12 @@ Result Conversions::FromBytes(const PrimitiveType& type, } case TypeId::kFixed: { - if (data.size() == 16) { - std::array fixed_bytes; - std::ranges::copy(data, fixed_bytes.begin()); - return Literal::Value{fixed_bytes}; - } else { - return Literal::Value{std::vector(data.begin(), data.end())}; + const auto& fixed_type = static_cast(type); + if (data.size() != fixed_type.length()) { + return InvalidArgument("Invalid data size for Fixed literal, got size: {}", + data.size()); } + return Literal::Value{std::vector(data.begin(), data.end())}; } // TODO(Li Feiyang): Add support for UUID and Decimal @@ -221,12 +220,6 @@ Result Conversions::FromBytes(std::shared_ptr type, } ICEBERG_ASSIGN_OR_RAISE(auto value, FromBytes(*type, data)); - - // If we got a null value (monostate), create a null Literal - if (std::holds_alternative(value)) { - return Literal::Null(std::move(type)); - } - return Literal(std::move(value), std::move(type)); } From bbc59b05eee1500056ed07f7d40c4dbca8b2d43d Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Mon, 22 Sep 2025 17:32:14 +0800 Subject: [PATCH 5/9] support fixed --- src/iceberg/expression/literal.cc | 14 ++++++++++++++ src/iceberg/test/literal_test.cc | 21 +++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index fd6609a2..822ffdb0 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -255,6 +255,20 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { return this_val <=> other_val; } + case TypeId::kFixed: { + // Fixed types can only be compared if they have the same length + auto& this_fixed_type = static_cast(*type_); + auto& other_fixed_type = static_cast(*other.type_); + + if (this_fixed_type.length() != other_fixed_type.length()) { + return std::partial_ordering::unordered; + } + + auto& this_val = std::get>(value_); + auto& other_val = std::get>(other.value_); + return this_val <=> other_val; + } + default: // For unsupported types, return unordered return std::partial_ordering::unordered; diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 58f744a1..659055e9 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -492,6 +492,27 @@ INSTANTIATE_TEST_SUITE_P( binary()}, LiteralRoundTripParam{"BinarySingleByte", {42}, Literal::Binary({42}), binary()}, + // Fixed type + LiteralRoundTripParam{"FixedLength4", + {0x01, 0x02, 0x03, 0x04}, + Literal::Fixed({0x01, 0x02, 0x03, 0x04}, 4), + fixed(4)}, + LiteralRoundTripParam{ + "FixedLength8", + {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}, + Literal::Fixed({0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}, 8), + fixed(8)}, + LiteralRoundTripParam{ + "FixedLength16", + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F}, + Literal::Fixed({0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}, + 16), + fixed(16)}, + LiteralRoundTripParam{ + "FixedSingleByte", {0xFF}, Literal::Fixed({0xFF}, 1), fixed(1)}, + // Temporal types LiteralRoundTripParam{"DateEpoch", {0, 0, 0, 0}, Literal::Date(0), date()}, LiteralRoundTripParam{"DateNextDay", {1, 0, 0, 0}, Literal::Date(1), date()}, From bcdfa60147a2e4caf9b110179d4e1bf82027309f Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Tue, 23 Sep 2025 10:24:19 +0800 Subject: [PATCH 6/9] fix --- src/iceberg/expression/literal.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index 822ffdb0..526f479e 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -156,7 +156,7 @@ Literal Literal::Fixed(std::vector value, int32_t length) { Result Literal::Deserialize(std::span data, std::shared_ptr type) { - return Conversions::FromBytes(type, data); + return Conversions::FromBytes(std::move(type), data); } Result> Literal::Serialize() const { From 8d3f274f8c0e467b89264fbc700806482977352c Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Thu, 25 Sep 2025 19:23:06 +0800 Subject: [PATCH 7/9] fix review --- src/iceberg/expression/literal.cc | 14 ++++--------- src/iceberg/expression/literal.h | 2 +- src/iceberg/test/literal_test.cc | 34 ++++++++++++++++--------------- src/iceberg/util/conversions.cc | 29 +++----------------------- 4 files changed, 26 insertions(+), 53 deletions(-) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index 526f479e..adfe5355 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -24,6 +24,7 @@ #include "iceberg/exception.h" #include "iceberg/util/conversions.h" +#include "iceberg/util/macros.h" namespace iceberg { @@ -150,7 +151,8 @@ Literal Literal::Binary(std::vector value) { return {Value{std::move(value)}, binary()}; } -Literal Literal::Fixed(std::vector value, int32_t length) { +Literal Literal::Fixed(std::vector value) { + auto length = static_cast(value.size()); return {Value{std::move(value)}, fixed(length)}; } @@ -194,7 +196,7 @@ bool Literal::operator==(const Literal& other) const { return (*this <=> other) // Three-way comparison operator std::partial_ordering Literal::operator<=>(const Literal& other) const { // If types are different, comparison is unordered - if (type_->type_id() != other.type_->type_id()) { + if (*type_ != *other.type_) { return std::partial_ordering::unordered; } @@ -256,14 +258,6 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { } case TypeId::kFixed: { - // Fixed types can only be compared if they have the same length - auto& this_fixed_type = static_cast(*type_); - auto& other_fixed_type = static_cast(*other.type_); - - if (this_fixed_type.length() != other_fixed_type.length()) { - return std::partial_ordering::unordered; - } - auto& this_val = std::get>(value_); auto& other_val = std::get>(other.value_); return this_val <=> other_val; diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index ba189271..c11d48f5 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -72,7 +72,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable { static Literal Double(double value); static Literal String(std::string value); static Literal Binary(std::vector value); - static Literal Fixed(std::vector value, int32_t length); + static Literal Fixed(std::vector value); /// \brief Create a literal representing a null value. static Literal Null(std::shared_ptr type) { diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 659055e9..6fc40977 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -136,7 +136,6 @@ TEST(LiteralTest, LongCastTo) { EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); } -// Test overflow cases TEST(LiteralTest, LongCastToIntOverflow) { auto max_long = Literal::Long(static_cast(std::numeric_limits::max()) + 1); @@ -495,23 +494,22 @@ INSTANTIATE_TEST_SUITE_P( // Fixed type LiteralRoundTripParam{"FixedLength4", {0x01, 0x02, 0x03, 0x04}, - Literal::Fixed({0x01, 0x02, 0x03, 0x04}, 4), + Literal::Fixed({0x01, 0x02, 0x03, 0x04}), fixed(4)}, LiteralRoundTripParam{ "FixedLength8", {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}, - Literal::Fixed({0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}, 8), + Literal::Fixed({0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}), fixed(8)}, LiteralRoundTripParam{ "FixedLength16", {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}, Literal::Fixed({0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, - 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}, - 16), + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}), fixed(16)}, LiteralRoundTripParam{ - "FixedSingleByte", {0xFF}, Literal::Fixed({0xFF}, 1), fixed(1)}, + "FixedSingleByte", {0xFF}, Literal::Fixed({0xFF}), fixed(1)}, // Temporal types LiteralRoundTripParam{"DateEpoch", {0, 0, 0, 0}, Literal::Date(0), date()}, @@ -562,7 +560,19 @@ TEST(LiteralSerializationTest, EmptyString) { EXPECT_TRUE(empty_bytes->empty()); auto deserialize_result = Literal::Deserialize(*empty_bytes, string()); - EXPECT_THAT(deserialize_result, IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(deserialize_result, IsOk()); + EXPECT_TRUE(std::get(deserialize_result->value()).empty()); +} + +TEST(LiteralSerializationTest, EmptyBinary) { + auto empty_binary = Literal::Binary({}); + auto empty_bytes = empty_binary.Serialize(); + ASSERT_TRUE(empty_bytes.has_value()); + EXPECT_TRUE(empty_bytes->empty()); + + auto deserialize_result = Literal::Deserialize(*empty_bytes, binary()); + ASSERT_THAT(deserialize_result, IsOk()); + EXPECT_TRUE(std::get>(deserialize_result->value()).empty()); } // Type promotion tests @@ -574,20 +584,12 @@ TEST(LiteralSerializationTest, TypePromotion) { EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong); EXPECT_EQ(std::get(long_result->value()), 32L); - auto long_bytes = long_result->Serialize(); - ASSERT_TRUE(long_bytes.has_value()); - EXPECT_EQ(long_bytes->size(), 8); - // 4-byte float data can be deserialized as double std::vector float_data = {0, 0, 128, 63}; auto double_result = Literal::Deserialize(float_data, float64()); ASSERT_TRUE(double_result.has_value()); EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); - EXPECT_EQ(std::get(double_result->value()), 1.0); - - auto double_bytes = double_result->Serialize(); - ASSERT_TRUE(double_bytes.has_value()); - EXPECT_EQ(double_bytes->size(), 8); + EXPECT_DOUBLE_EQ(std::get(double_result->value()), 1.0); } } // namespace iceberg diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc index b95eae15..da739eab 100644 --- a/src/iceberg/util/conversions.cc +++ b/src/iceberg/util/conversions.cc @@ -77,14 +77,7 @@ Result> ToBytesImpl(const Literal::Value& template <> Result> ToBytesImpl(const Literal::Value& value) { - if (std::holds_alternative>(value)) { - return std::get>(value); - } else { - std::string actual_type = - std::visit([](auto&& arg) -> std::string { return typeid(arg).name(); }, value); - return InvalidArgument("Invalid value type for Fixed literal, got type: {}", - actual_type); - } + return std::get>(value); } #define DISPATCH_LITERAL_TO_BYTES(type_id) \ @@ -134,28 +127,20 @@ Result> Conversions::ToBytes(const Literal& literal) { Result Conversions::FromBytes(const PrimitiveType& type, std::span data) { - if (data.empty()) { - return InvalidArgument("Cannot deserialize empty value"); - } - const auto type_id = type.type_id(); - switch (type_id) { case TypeId::kBoolean: { ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{static_cast(value != 0x00)}; } - case TypeId::kInt: { ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{value}; } - case TypeId::kDate: { ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{value}; } - case TypeId::kLong: case TypeId::kTime: case TypeId::kTimestamp: @@ -171,12 +156,10 @@ Result Conversions::FromBytes(const PrimitiveType& type, } return Literal::Value{value}; } - case TypeId::kFloat: { ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian(data)); return Literal::Value{value}; } - case TypeId::kDouble: { if (data.size() < 8) { // Type was promoted from float to double @@ -187,16 +170,11 @@ Result Conversions::FromBytes(const PrimitiveType& type, return Literal::Value{double_value}; } } - - case TypeId::kString: { + case TypeId::kString: return Literal::Value{ std::string(reinterpret_cast(data.data()), data.size())}; - } - - case TypeId::kBinary: { + case TypeId::kBinary: return Literal::Value{std::vector(data.begin(), data.end())}; - } - case TypeId::kFixed: { const auto& fixed_type = static_cast(type); if (data.size() != fixed_type.length()) { @@ -206,7 +184,6 @@ Result Conversions::FromBytes(const PrimitiveType& type, return Literal::Value{std::vector(data.begin(), data.end())}; } // TODO(Li Feiyang): Add support for UUID and Decimal - default: return NotSupported("Deserialization for type {} is not supported", type.ToString()); From e09c6037323b9f0eaf8c5c3d5526618453c8ab43 Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Thu, 25 Sep 2025 19:24:17 +0800 Subject: [PATCH 8/9] remove header --- src/iceberg/util/conversions.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc index da739eab..c5dbcf35 100644 --- a/src/iceberg/util/conversions.cc +++ b/src/iceberg/util/conversions.cc @@ -19,7 +19,6 @@ #include "iceberg/util/conversions.h" -#include #include #include #include From bce09c60433ecf6f8bab5d6f273d52088d2e605c Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Sat, 27 Sep 2025 19:58:56 +0800 Subject: [PATCH 9/9] fix review --- src/iceberg/test/literal_test.cc | 264 +++++++++++++++---------------- 1 file changed, 129 insertions(+), 135 deletions(-) diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 6fc40977..bd7544bf 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -382,178 +382,172 @@ TEST(LiteralTest, DoubleZeroComparison) { EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less); } -struct LiteralRoundTripParam { +struct LiteralParam { std::string test_name; - std::vector input_bytes; - Literal expected_literal; + std::vector serialized; + Literal value; std::shared_ptr type; }; -class LiteralSerializationParam : public ::testing::TestWithParam { -}; +class LiteralSerDeParam : public ::testing::TestWithParam {}; -TEST_P(LiteralSerializationParam, RoundTrip) { +TEST_P(LiteralSerDeParam, RoundTrip) { const auto& param = GetParam(); // Deserialize from bytes - Result literal_result = Literal::Deserialize(param.input_bytes, param.type); + Result literal_result = Literal::Deserialize(param.serialized, param.type); ASSERT_TRUE(literal_result.has_value()) << "Deserialization failed: " << literal_result.error().message; // Check type and value - EXPECT_EQ(*literal_result, param.expected_literal); + EXPECT_EQ(*literal_result, param.value); // Serialize back to bytes Result> bytes_result = literal_result->Serialize(); ASSERT_TRUE(bytes_result.has_value()) << "Serialization failed: " << bytes_result.error().message; - EXPECT_EQ(*bytes_result, param.input_bytes); + EXPECT_EQ(*bytes_result, param.serialized); // Deserialize again to verify idempotency Result final_literal = Literal::Deserialize(*bytes_result, param.type); ASSERT_TRUE(final_literal.has_value()) << "Final deserialization failed: " << final_literal.error().message; - EXPECT_EQ(*final_literal, param.expected_literal); + EXPECT_EQ(*final_literal, param.value); } INSTANTIATE_TEST_SUITE_P( - BinarySerialization, LiteralSerializationParam, + BinarySerialization, LiteralSerDeParam, ::testing::Values( // Basic types - LiteralRoundTripParam{"BooleanTrue", {1}, Literal::Boolean(true), boolean()}, - LiteralRoundTripParam{"BooleanFalse", {0}, Literal::Boolean(false), boolean()}, + LiteralParam{"BooleanTrue", {1}, Literal::Boolean(true), boolean()}, + LiteralParam{"BooleanFalse", {0}, Literal::Boolean(false), boolean()}, - LiteralRoundTripParam{"Int", {32, 0, 0, 0}, Literal::Int(32), int32()}, - LiteralRoundTripParam{ + LiteralParam{"Int", {32, 0, 0, 0}, Literal::Int(32), int32()}, + LiteralParam{ "IntMaxValue", {255, 255, 255, 127}, Literal::Int(2147483647), int32()}, - LiteralRoundTripParam{ - "IntMinValue", {0, 0, 0, 128}, Literal::Int(-2147483648), int32()}, - LiteralRoundTripParam{ - "NegativeInt", {224, 255, 255, 255}, Literal::Int(-32), int32()}, - - LiteralRoundTripParam{ - "Long", {32, 0, 0, 0, 0, 0, 0, 0}, Literal::Long(32), int64()}, - LiteralRoundTripParam{"LongMaxValue", - {255, 255, 255, 255, 255, 255, 255, 127}, - Literal::Long(std::numeric_limits::max()), - int64()}, - LiteralRoundTripParam{"LongMinValue", - {0, 0, 0, 0, 0, 0, 0, 128}, - Literal::Long(std::numeric_limits::min()), - int64()}, - LiteralRoundTripParam{"NegativeLong", - {224, 255, 255, 255, 255, 255, 255, 255}, - Literal::Long(-32), - int64()}, - - LiteralRoundTripParam{"Float", {0, 0, 128, 63}, Literal::Float(1.0f), float32()}, - LiteralRoundTripParam{"FloatNegativeInfinity", - {0, 0, 128, 255}, - Literal::Float(-std::numeric_limits::infinity()), - float32()}, - LiteralRoundTripParam{"FloatMaxValue", - {255, 255, 127, 127}, - Literal::Float(std::numeric_limits::max()), - float32()}, - LiteralRoundTripParam{"FloatMinValue", - {255, 255, 127, 255}, - Literal::Float(std::numeric_limits::lowest()), - float32()}, - - LiteralRoundTripParam{ + LiteralParam{"IntMinValue", {0, 0, 0, 128}, Literal::Int(-2147483648), int32()}, + LiteralParam{"NegativeInt", {224, 255, 255, 255}, Literal::Int(-32), int32()}, + + LiteralParam{"Long", {32, 0, 0, 0, 0, 0, 0, 0}, Literal::Long(32), int64()}, + LiteralParam{"LongMaxValue", + {255, 255, 255, 255, 255, 255, 255, 127}, + Literal::Long(std::numeric_limits::max()), + int64()}, + LiteralParam{"LongMinValue", + {0, 0, 0, 0, 0, 0, 0, 128}, + Literal::Long(std::numeric_limits::min()), + int64()}, + LiteralParam{"NegativeLong", + {224, 255, 255, 255, 255, 255, 255, 255}, + Literal::Long(-32), + int64()}, + + LiteralParam{"Float", {0, 0, 128, 63}, Literal::Float(1.0f), float32()}, + LiteralParam{"FloatNegativeInfinity", + {0, 0, 128, 255}, + Literal::Float(-std::numeric_limits::infinity()), + float32()}, + LiteralParam{"FloatMaxValue", + {255, 255, 127, 127}, + Literal::Float(std::numeric_limits::max()), + float32()}, + LiteralParam{"FloatMinValue", + {255, 255, 127, 255}, + Literal::Float(std::numeric_limits::lowest()), + float32()}, + + LiteralParam{ "Double", {0, 0, 0, 0, 0, 0, 240, 63}, Literal::Double(1.0), float64()}, - LiteralRoundTripParam{"DoubleNegativeInfinity", - {0, 0, 0, 0, 0, 0, 240, 255}, - Literal::Double(-std::numeric_limits::infinity()), - float64()}, - LiteralRoundTripParam{"DoubleMaxValue", - {255, 255, 255, 255, 255, 255, 239, 127}, - Literal::Double(std::numeric_limits::max()), - float64()}, - LiteralRoundTripParam{"DoubleMinValue", - {255, 255, 255, 255, 255, 255, 239, 255}, - Literal::Double(std::numeric_limits::lowest()), - float64()}, - - LiteralRoundTripParam{"String", - {105, 99, 101, 98, 101, 114, 103}, - Literal::String("iceberg"), - string()}, - LiteralRoundTripParam{ - "StringLong", - {65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65}, - Literal::String("AAAAAAAAAAAAAAAA"), - string()}, - - LiteralRoundTripParam{"BinaryData", - {0x01, 0x02, 0x03, 0xFF}, - Literal::Binary({0x01, 0x02, 0x03, 0xFF}), - binary()}, - LiteralRoundTripParam{"BinarySingleByte", {42}, Literal::Binary({42}), binary()}, + LiteralParam{"DoubleNegativeInfinity", + {0, 0, 0, 0, 0, 0, 240, 255}, + Literal::Double(-std::numeric_limits::infinity()), + float64()}, + LiteralParam{"DoubleMaxValue", + {255, 255, 255, 255, 255, 255, 239, 127}, + Literal::Double(std::numeric_limits::max()), + float64()}, + LiteralParam{"DoubleMinValue", + {255, 255, 255, 255, 255, 255, 239, 255}, + Literal::Double(std::numeric_limits::lowest()), + float64()}, + + LiteralParam{"String", + {105, 99, 101, 98, 101, 114, 103}, + Literal::String("iceberg"), + string()}, + LiteralParam{"StringLong", + {65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65}, + Literal::String("AAAAAAAAAAAAAAAA"), + string()}, + + LiteralParam{"BinaryData", + {0x01, 0x02, 0x03, 0xFF}, + Literal::Binary({0x01, 0x02, 0x03, 0xFF}), + binary()}, + LiteralParam{"BinarySingleByte", {42}, Literal::Binary({42}), binary()}, // Fixed type - LiteralRoundTripParam{"FixedLength4", - {0x01, 0x02, 0x03, 0x04}, - Literal::Fixed({0x01, 0x02, 0x03, 0x04}), - fixed(4)}, - LiteralRoundTripParam{ - "FixedLength8", - {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}, - Literal::Fixed({0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}), - fixed(8)}, - LiteralRoundTripParam{ - "FixedLength16", - {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, - 0x0D, 0x0E, 0x0F}, - Literal::Fixed({0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, - 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}), - fixed(16)}, - LiteralRoundTripParam{ - "FixedSingleByte", {0xFF}, Literal::Fixed({0xFF}), fixed(1)}, + LiteralParam{"FixedLength4", + {0x01, 0x02, 0x03, 0x04}, + Literal::Fixed({0x01, 0x02, 0x03, 0x04}), + fixed(4)}, + LiteralParam{"FixedLength8", + {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}, + Literal::Fixed({0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11}), + fixed(8)}, + LiteralParam{"FixedLength16", + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}, + Literal::Fixed({0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}), + fixed(16)}, + LiteralParam{"FixedSingleByte", {0xFF}, Literal::Fixed({0xFF}), fixed(1)}, // Temporal types - LiteralRoundTripParam{"DateEpoch", {0, 0, 0, 0}, Literal::Date(0), date()}, - LiteralRoundTripParam{"DateNextDay", {1, 0, 0, 0}, Literal::Date(1), date()}, - LiteralRoundTripParam{"DateY2K", {205, 42, 0, 0}, Literal::Date(10957), date()}, - LiteralRoundTripParam{ - "DateNegative", {255, 255, 255, 255}, Literal::Date(-1), date()}, - - LiteralRoundTripParam{ - "TimeMidnight", {0, 0, 0, 0, 0, 0, 0, 0}, Literal::Time(0), time()}, - LiteralRoundTripParam{"TimeNoon", - {128, 9, 230, 124, 10, 0, 0, 0}, - Literal::Time(45045123456), - time()}, - LiteralRoundTripParam{ + LiteralParam{"DateEpoch", {0, 0, 0, 0}, Literal::Date(0), date()}, + LiteralParam{"DateNextDay", {1, 0, 0, 0}, Literal::Date(1), date()}, + LiteralParam{"DateY2K", {205, 42, 0, 0}, Literal::Date(10957), date()}, + LiteralParam{"DateNegative", {255, 255, 255, 255}, Literal::Date(-1), date()}, + + LiteralParam{"TimeMidnight", {0, 0, 0, 0, 0, 0, 0, 0}, Literal::Time(0), time()}, + LiteralParam{"TimeNoon", + {128, 9, 230, 124, 10, 0, 0, 0}, + Literal::Time(45045123456), + time()}, + LiteralParam{ "TimeOneSecond", {64, 66, 15, 0, 0, 0, 0, 0}, Literal::Time(1000000), time()}, - LiteralRoundTripParam{"TimestampEpoch", - {0, 0, 0, 0, 0, 0, 0, 0}, - Literal::Timestamp(0), - timestamp()}, - LiteralRoundTripParam{"TimestampOneSecond", - {64, 66, 15, 0, 0, 0, 0, 0}, - Literal::Timestamp(1000000), - timestamp()}, - LiteralRoundTripParam{"TimestampNoon2024", - {128, 9, 230, 124, 10, 0, 0, 0}, - Literal::Timestamp(45045123456), - timestamp()}, - - LiteralRoundTripParam{"TimestampTzEpoch", - {0, 0, 0, 0, 0, 0, 0, 0}, - Literal::TimestampTz(0), - timestamp_tz()}, - LiteralRoundTripParam{"TimestampTzOneHour", - {0, 164, 147, 214, 0, 0, 0, 0}, - Literal::TimestampTz(3600000000), - timestamp_tz()}), - - [](const testing::TestParamInfo& info) { + LiteralParam{"TimestampEpoch", + {0, 0, 0, 0, 0, 0, 0, 0}, + Literal::Timestamp(0), + timestamp()}, + LiteralParam{"TimestampOneSecond", + {64, 66, 15, 0, 0, 0, 0, 0}, + Literal::Timestamp(1000000), + timestamp()}, + LiteralParam{"TimestampNoon2024", + {128, 9, 230, 124, 10, 0, 0, 0}, + Literal::Timestamp(45045123456), + timestamp()}, + + LiteralParam{"TimestampTzEpoch", + {0, 0, 0, 0, 0, 0, 0, 0}, + Literal::TimestampTz(0), + timestamp_tz()}, + LiteralParam{"TimestampTzOneHour", + {0, 164, 147, 214, 0, 0, 0, 0}, + Literal::TimestampTz(3600000000), + timestamp_tz()}, + + // Empty values + LiteralParam{"EmptyString", {}, Literal::String(""), string()}, + LiteralParam{"EmptyBinary", {}, Literal::Binary({}), binary()}), + + [](const testing::TestParamInfo& info) { return info.param.test_name; }); -TEST(LiteralSerializationTest, EmptyString) { +TEST(LiteralSerDeTest, EmptyString) { auto empty_string = Literal::String(""); auto empty_bytes = empty_string.Serialize(); ASSERT_TRUE(empty_bytes.has_value()); @@ -564,7 +558,7 @@ TEST(LiteralSerializationTest, EmptyString) { EXPECT_TRUE(std::get(deserialize_result->value()).empty()); } -TEST(LiteralSerializationTest, EmptyBinary) { +TEST(LiteralSerDeTest, EmptyBinary) { auto empty_binary = Literal::Binary({}); auto empty_bytes = empty_binary.Serialize(); ASSERT_TRUE(empty_bytes.has_value()); @@ -576,7 +570,7 @@ TEST(LiteralSerializationTest, EmptyBinary) { } // Type promotion tests -TEST(LiteralSerializationTest, TypePromotion) { +TEST(LiteralSerDeTest, TypePromotion) { // 4-byte int data can be deserialized as long std::vector int_data = {32, 0, 0, 0}; auto long_result = Literal::Deserialize(int_data, int64());