From 3d6733e1106afcf4155610ecfad3fc7f5104c36a Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Fri, 12 Sep 2025 14:09:16 +0800 Subject: [PATCH 1/7] feat: Implement Type Casting and toString for Literals --- src/iceberg/expression/literal.cc | 236 +++++++++++++--- src/iceberg/test/date_time_util_test.cc | 345 +++++++++++++++++++++++ src/iceberg/test/literal_test.cc | 359 ++++++++++++++++++++++-- src/iceberg/util/date_time_util.cc | 208 ++++++++++++++ src/iceberg/util/date_time_util.h | 67 +++++ 5 files changed, 1156 insertions(+), 59 deletions(-) create mode 100644 src/iceberg/test/date_time_util_test.cc create mode 100644 src/iceberg/util/date_time_util.cc create mode 100644 src/iceberg/util/date_time_util.h diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index adfe5355..783fa097 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -19,12 +19,15 @@ #include "iceberg/expression/literal.h" -#include #include +#include + +#include #include "iceberg/exception.h" #include "iceberg/util/conversions.h" #include "iceberg/util/macros.h" +#include "iceberg/util/date_time_util.h" namespace iceberg { @@ -54,6 +57,30 @@ class LiteralCaster { /// Cast from Float type to target type. static Result CastFromFloat(const Literal& literal, const std::shared_ptr& target_type); + + /// Cast from Double type to target type. + static Result CastFromDouble( + const Literal& literal, const std::shared_ptr& target_type); + + /// Cast from String type to target type. + static Result CastFromString( + const Literal& literal, const std::shared_ptr& target_type); + + /// Cast from Timestamp type to target type. + static Result CastFromTimestamp( + const Literal& literal, const std::shared_ptr& target_type); + + /// Cast from TimestampTz type to target type. + static Result CastFromTimestampTz( + const Literal& literal, const std::shared_ptr& target_type); + + /// Cast from Binary type to target type. + static Result CastFromBinary( + const Literal& literal, const std::shared_ptr& target_type); + + /// Cast from Fixed type to target type. + static Result CastFromFixed(const Literal& literal, + const std::shared_ptr& target_type); }; Literal LiteralCaster::BelowMinLiteral(std::shared_ptr type) { @@ -76,6 +103,9 @@ Result LiteralCaster::CastFromInt( return Literal::Float(static_cast(int_val)); case TypeId::kDouble: return Literal::Double(static_cast(int_val)); + case TypeId::kDate: + return Literal::Date(int_val); + // TODO(Li Feiyang): Implement cast from Int to decimal default: return NotSupported("Cast from Int to {} is not implemented", target_type->ToString()); @@ -85,15 +115,14 @@ Result LiteralCaster::CastFromInt( Result LiteralCaster::CastFromLong( const Literal& literal, const std::shared_ptr& target_type) { auto long_val = std::get(literal.value_); - auto target_type_id = target_type->type_id(); - switch (target_type_id) { + switch (target_type->type_id()) { case TypeId::kInt: { // Check for overflow - if (long_val >= std::numeric_limits::max()) { + if (long_val > std::numeric_limits::max()) { return AboveMaxLiteral(target_type); } - if (long_val <= std::numeric_limits::min()) { + if (long_val < std::numeric_limits::min()) { return BelowMinLiteral(target_type); } return Literal::Int(static_cast(long_val)); @@ -102,6 +131,22 @@ Result LiteralCaster::CastFromLong( return Literal::Float(static_cast(long_val)); case TypeId::kDouble: return Literal::Double(static_cast(long_val)); + case TypeId::kDate: { + if (long_val > std::numeric_limits::max()) { + return AboveMaxLiteral(target_type); + } + if (long_val < std::numeric_limits::min()) { + return BelowMinLiteral(target_type); + } + return Literal::Date(static_cast(long_val)); + } + case TypeId::kTime: + return Literal::Time(long_val); + case TypeId::kTimestamp: + return Literal::Timestamp(long_val); + case TypeId::kTimestampTz: + return Literal::TimestampTz(long_val); + // TODO(Li Feiyang): Implement cast from Long to decimal, TimestampNs and default: return NotSupported("Cast from Long to {} is not supported", target_type->ToString()); @@ -111,17 +156,141 @@ Result LiteralCaster::CastFromLong( Result LiteralCaster::CastFromFloat( const Literal& literal, const std::shared_ptr& target_type) { auto float_val = std::get(literal.value_); - auto target_type_id = target_type->type_id(); - switch (target_type_id) { + switch (target_type->type_id()) { case TypeId::kDouble: return Literal::Double(static_cast(float_val)); + // TODO(Li Feiyang): Implement cast from Float to decimal default: return NotSupported("Cast from Float to {} is not supported", target_type->ToString()); } } +Result LiteralCaster::CastFromDouble( + const Literal& literal, const std::shared_ptr& target_type) { + auto double_val = std::get(literal.value_); + + switch (target_type->type_id()) { + case TypeId::kFloat: { + if (double_val > static_cast(std::numeric_limits::max())) { + return AboveMaxLiteral(target_type); + } + if (double_val < static_cast(std::numeric_limits::lowest())) { + return BelowMinLiteral(target_type); + } + return Literal::Float(static_cast(double_val)); + } + default: + return NotSupported("Cast from Double to {} is not supported", + target_type->ToString()); + } +} + +Result LiteralCaster::CastFromString( + const Literal& literal, const std::shared_ptr& target_type) { + const auto& str_val = std::get(literal.value_); + + switch (target_type->type_id()) { + case TypeId::kDate: { + auto days_result = ParseDateString(str_val); + if (!days_result.has_value()) { + return std::unexpected(days_result.error()); + } + return Literal::Date(days_result.value()); + } + + case TypeId::kTime: { + auto micros_result = ParseTimeString(str_val); + if (!micros_result.has_value()) { + return std::unexpected(micros_result.error()); + } + return Literal::Time(micros_result.value()); + } + + case TypeId::kTimestamp: { + auto micros_result = ParseTimestampString(str_val); + if (!micros_result.has_value()) { + return std::unexpected(micros_result.error()); + } + return Literal::Timestamp(micros_result.value()); + } + + case TypeId::kTimestampTz: { + auto micros_result = ParseTimestampTzString(str_val); + if (!micros_result.has_value()) { + return std::unexpected(micros_result.error()); + } + return Literal::TimestampTz(micros_result.value()); + } + // TODO(Li Feiyang): Implement cast from String to uuid and decimal + + default: + return NotSupported("Cast from String to {} is not supported", + target_type->ToString()); + } +} + +Result LiteralCaster::CastFromTimestamp( + const Literal& literal, const std::shared_ptr& target_type) { + auto timestamp_val = std::get(literal.value_); + + switch (target_type->type_id()) { + case TypeId::kDate: + return Literal::Date(MicrosToDays(timestamp_val)); + case TypeId::kTimestampTz: + return Literal::TimestampTz(timestamp_val); + default: + return NotSupported("Cast from Timestamp to {} is not supported", + target_type->ToString()); + } +} + +Result LiteralCaster::CastFromTimestampTz( + const Literal& literal, const std::shared_ptr& target_type) { + auto micros = std::get(literal.value_); + + switch (target_type->type_id()) { + case TypeId::kDate: + return Literal::Date(MicrosToDays(micros)); + case TypeId::kTimestamp: + return Literal::Timestamp(micros); + default: + return NotSupported("Cast from TimestampTz to {} is not supported", + target_type->ToString()); + } +} + +Result LiteralCaster::CastFromBinary( + const Literal& literal, const std::shared_ptr& target_type) { + auto binary_val = std::get>(literal.value_); + switch (target_type->type_id()) { + case TypeId::kFixed: { + auto target_fixed_type = std::static_pointer_cast(target_type); + if (binary_val.size() == target_fixed_type->length()) { + return Literal::Fixed(std::move(binary_val)); + } + return InvalidArgument("Failed to cast Binary with length {} to Fixed({})", + binary_val.size(), target_fixed_type->length()); + } + default: + return NotSupported("Cast from Binary to {} is not supported", + target_type->ToString()); + } +} + +Result LiteralCaster::CastFromFixed( + const Literal& literal, const std::shared_ptr& target_type) { + switch (target_type->type_id()) { + case TypeId::kBinary: + return Literal::Binary( + std::get>(literal.value_)); // 直接拷贝+move + default: + return NotSupported("Cast from Fixed to {} is not supported", + target_type->ToString()); + } +} + // Constructor Literal::Literal(Value value, std::shared_ptr type) : value_(std::move(value)), type_(std::move(type)) {} @@ -152,8 +321,8 @@ Literal Literal::Binary(std::vector value) { } Literal Literal::Fixed(std::vector value) { - auto length = static_cast(value.size()); - return {Value{std::move(value)}, fixed(length)}; + const auto size = value.size(); + return {Value{std::move(value)}, fixed(size)}; } Result Literal::Deserialize(std::span data, @@ -251,12 +420,7 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { return this_val <=> other_val; } - case TypeId::kBinary: { - auto& this_val = std::get>(value_); - auto& other_val = std::get>(other.value_); - return this_val <=> other_val; - } - + case TypeId::kBinary: case TypeId::kFixed: { auto& this_val = std::get>(value_); auto& other_val = std::get>(other.value_); @@ -299,31 +463,29 @@ std::string Literal::ToString() const { case TypeId::kString: { return std::get(value_); } - case TypeId::kBinary: { + case TypeId::kBinary: + case TypeId::kFixed: { const auto& binary_data = std::get>(value_); - std::string result; - result.reserve(binary_data.size() * 2); // 2 chars per byte + std::string result = "X'"; + result.reserve(2 + binary_data.size() * 2 + + 1); // 2 chars per byte and 2 + 1 for prefix and suffix for (const auto& byte : binary_data) { std::format_to(std::back_inserter(result), "{:02X}", byte); } + result.push_back('\''); return result; } - case TypeId::kFixed: { - const auto& fixed_data = std::get>(value_); - std::string result; - result.reserve(fixed_data.size() * 2); // 2 chars per byte - for (const auto& byte : fixed_data) { - std::format_to(std::back_inserter(result), "{:02X}", byte); - } - return result; - } - case TypeId::kDecimal: - case TypeId::kUuid: - case TypeId::kDate: case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: { - throw IcebergError("Not implemented: ToString for " + type_->ToString()); + return std::to_string(std::get(value_)); + } + case TypeId::kDate: { + return std::to_string(std::get(value_)); + } + case TypeId::kDecimal: + case TypeId::kUuid: { + throw NotImplemented("kDecimal and kUuid are not implemented yet"); } default: { throw IcebergError("Unknown type: " + type_->ToString()); @@ -365,10 +527,18 @@ Result LiteralCaster::CastTo(const Literal& literal, case TypeId::kFloat: return CastFromFloat(literal, target_type); case TypeId::kDouble: - case TypeId::kBoolean: + return CastFromDouble(literal, target_type); case TypeId::kString: + return CastFromString(literal, target_type); case TypeId::kBinary: - break; + return CastFromBinary(literal, target_type); + case TypeId::kFixed: + return CastFromFixed(literal, target_type); + case TypeId::kTimestamp: + return CastFromTimestamp(literal, target_type); + case TypeId::kTimestampTz: + return CastFromTimestampTz(literal, target_type); + case TypeId::kBoolean: default: break; } diff --git a/src/iceberg/test/date_time_util_test.cc b/src/iceberg/test/date_time_util_test.cc new file mode 100644 index 00000000..a879ccec --- /dev/null +++ b/src/iceberg/test/date_time_util_test.cc @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/date_time_util.h" + +#include + +#include + +namespace iceberg { + +// Constants for better readability +constexpr int64_t kMicrosPerSecond = 1000000LL; +constexpr int64_t kSecondsPerMinute = 60LL; +constexpr int64_t kSecondsPerHour = 3600LL; +constexpr int64_t kMicrosPerDay = 86400000000LL; + +// Helper function for creating tm with designated initializers +std::tm make_tm(int year, int mon, int mday, int hour = 0, int min = 0, int sec = 0) { + return std::tm{.tm_sec = sec, + .tm_min = min, + .tm_hour = hour, + .tm_mday = mday, + .tm_mon = mon, + .tm_year = year - 1900, + .tm_wday = 0, + .tm_yday = 0, + .tm_isdst = -1}; +} + +// MicrosToDays Tests +TEST(DateTimeUtilTest, MicrosToDaysUnixEpoch) { + // Unix epoch (1970-01-01 00:00:00 UTC) should be day 0 + EXPECT_EQ(MicrosToDays(0), 0); +} + +TEST(DateTimeUtilTest, MicrosToDaysPositiveValues) { + // Test with cleaner constant usage + EXPECT_EQ(MicrosToDays(kMicrosPerDay), 1); + EXPECT_EQ(MicrosToDays(2 * kMicrosPerDay), 2); + EXPECT_EQ(MicrosToDays(365 * kMicrosPerDay), 365); + + // Test partial day - should floor down + EXPECT_EQ(MicrosToDays(kMicrosPerDay - 1), 0); + EXPECT_EQ(MicrosToDays(kMicrosPerDay + 12 * kSecondsPerHour * kMicrosPerSecond), 1); +} + +TEST(DateTimeUtilTest, MicrosToDaysNegativeValues) { + EXPECT_EQ(MicrosToDays(-kMicrosPerDay), -1); + EXPECT_EQ(MicrosToDays(-2 * kMicrosPerDay), -2); + + // Test partial negative day - should floor down (more negative) + EXPECT_EQ(MicrosToDays(-1), -1); + EXPECT_EQ(MicrosToDays(-kMicrosPerDay + 1), -1); +} + +// TimegmCustom Tests +TEST(DateTimeUtilTest, TimegmCustomUnixEpoch) { + auto tm = make_tm(1970, 0, 1); // Much cleaner! + EXPECT_EQ(TimegmCustom(&tm), 0); +} + +TEST(DateTimeUtilTest, TimegmCustomValidDates) { + // 2000-01-01 00:00:00 UTC = 946684800 seconds since epoch + auto tm = make_tm(2000, 0, 1); + EXPECT_EQ(TimegmCustom(&tm), 946684800); + + // 2020-12-31 23:59:59 UTC = 1609459199 seconds since epoch + tm = make_tm(2020, 11, 31, 23, 59, 59); + EXPECT_EQ(TimegmCustom(&tm), 1609459199); +} + +TEST(DateTimeUtilTest, TimegmCustomLeapYear) { + // 2000-02-29 00:00:00 UTC (leap year) + auto tm = make_tm(2000, 1, 29); // Much more readable! + + // Should not crash and return valid result + time_t result = TimegmCustom(&tm); + EXPECT_GT(result, 0); +} + +// ParseDateString Tests +TEST(DateTimeUtilTest, ParseDateStringValidFormats) { + // Unix epoch + auto result = ParseDateString("1970-01-01"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 0); + + // Common dates + result = ParseDateString("2000-01-01"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 10957); // Days since epoch + + result = ParseDateString("2020-12-31"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 18627); // Days since epoch +} + +TEST(DateTimeUtilTest, ParseDateStringLeapYear) { + // Leap year date + auto result = ParseDateString("2000-02-29"); + ASSERT_TRUE(result.has_value()); + EXPECT_GT(result.value(), 0); + + // Non-leap year - February 29th should fail in validation if implemented + result = ParseDateString("1999-02-29"); + // Note: Current implementation might not validate this properly + // This is a known limitation that could be improved +} + +TEST(DateTimeUtilTest, ParseDateStringInvalidFormats) { + // Wrong format + EXPECT_FALSE(ParseDateString("01-01-2000").has_value()); + EXPECT_FALSE(ParseDateString("2000/01/01").has_value()); + + // Invalid dates + EXPECT_FALSE(ParseDateString("2000-13-01").has_value()); // Invalid month + EXPECT_FALSE(ParseDateString("2000-01-32").has_value()); // Invalid day + EXPECT_FALSE(ParseDateString("2000-00-01").has_value()); // Invalid month + EXPECT_FALSE(ParseDateString("2000-01-00").has_value()); // Invalid day + + // Empty and malformed strings + EXPECT_FALSE(ParseDateString("").has_value()); + EXPECT_FALSE(ParseDateString("not-a-date").has_value()); + EXPECT_FALSE(ParseDateString("2000-01").has_value()); + EXPECT_FALSE(ParseDateString("2000-01-01-extra").has_value()); +} + +// ParseTimeString Tests +TEST(DateTimeUtilTest, ParseTimeStringValidFormats) { + // Basic time without fractional seconds + auto result = ParseTimeString("00:00:00"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 0); + + result = ParseTimeString("12:30:45"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), (12 * 3600 + 30 * 60 + 45) * 1000000LL); + + result = ParseTimeString("23:59:59"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), (23 * 3600 + 59 * 60 + 59) * 1000000LL); +} + +TEST(DateTimeUtilTest, ParseTimeStringWithFractionalSeconds) { + constexpr int64_t base_time_micros = + (12 * kSecondsPerHour + 30 * kSecondsPerMinute + 45) * kMicrosPerSecond; + + // Single digit fractional + auto result = ParseTimeString("12:30:45.1"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), base_time_micros + 100000LL); + + // Three digit fractional + result = ParseTimeString("12:30:45.123"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), base_time_micros + 123000LL); + + // Six digit fractional (microseconds) + result = ParseTimeString("12:30:45.123456"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), base_time_micros + 123456LL); + + // More than 6 digits should be truncated + result = ParseTimeString("12:30:45.1234567890"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), base_time_micros + 123456LL); +} + +TEST(DateTimeUtilTest, ParseTimeStringInvalidFormats) { + // Invalid time values + EXPECT_FALSE(ParseTimeString("24:00:00").has_value()); // Invalid hour + EXPECT_FALSE(ParseTimeString("12:60:00").has_value()); // Invalid minute + EXPECT_FALSE(ParseTimeString("-1:30:45").has_value()); // Negative hour + + // Wrong format + EXPECT_FALSE(ParseTimeString("12-30-45").has_value()); + EXPECT_FALSE(ParseTimeString("12:30:45:67").has_value()); + + // Empty and malformed + EXPECT_FALSE(ParseTimeString("").has_value()); + EXPECT_FALSE(ParseTimeString("not-a-time").has_value()); + EXPECT_FALSE(ParseTimeString("12:30:45 extra").has_value()); +} + +// ParseTimestampString Tests +TEST(DateTimeUtilTest, ParseTimestampStringValidFormats) { + // Unix epoch + auto result = ParseTimestampString("1970-01-01T00:00:00"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 0); + + // Standard timestamp + result = ParseTimestampString("2000-01-01T12:30:45"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), + 946684800LL * 1000000LL + (12 * 3600 + 30 * 60 + 45) * 1000000LL); + + // With fractional seconds + result = ParseTimestampString("2000-01-01T12:30:45.123456"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), + 946684800LL * 1000000LL + (12 * 3600 + 30 * 60 + 45) * 1000000LL + 123456LL); +} + +TEST(DateTimeUtilTest, ParseTimestampStringInvalidFormats) { + // Wrong separator + EXPECT_FALSE(ParseTimestampString("2000-01-01 12:30:45").has_value()); + + // Invalid date part + EXPECT_FALSE(ParseTimestampString("2000-13-01T12:30:45").has_value()); + + // Invalid time part + EXPECT_FALSE(ParseTimestampString("2000-01-01T25:30:45").has_value()); + + // Incomplete + EXPECT_FALSE(ParseTimestampString("2000-01-01T").has_value()); + + // Extra characters + EXPECT_FALSE(ParseTimestampString("2000-01-01T12:30:45 extra").has_value()); +} + +// ParseTimestampTzString Tests +TEST(DateTimeUtilTest, ParseTimestampTzStringValidFormats) { + // Without Z suffix (should still work) + auto result = ParseTimestampTzString("2000-01-01T12:30:45"); + ASSERT_TRUE(result.has_value()); + EXPECT_GT(result.value(), 0); + + // With Z suffix + result = ParseTimestampTzString("2000-01-01T12:30:45Z"); + ASSERT_TRUE(result.has_value()); + EXPECT_GT(result.value(), 0); + + // With fractional seconds and Z + result = ParseTimestampTzString("2000-01-01T12:30:45.123456Z"); + ASSERT_TRUE(result.has_value()); + EXPECT_GT(result.value(), 0); +} + +TEST(DateTimeUtilTest, ParseTimestampTzStringInvalidFormats) { + // Invalid timezone formats (only Z is supported) + EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45+08:00").has_value()); + EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45-05:00").has_value()); + EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45GMT").has_value()); + + // Multiple Z characters + EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45ZZ").has_value()); + + // Extra characters after Z + EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45Z extra").has_value()); +} + +// ParseFractionalSeconds Tests +TEST(DateTimeUtilTest, ParseFractionalSecondsValidInputs) { + // Empty string should return 0 + auto result = ParseFractionalSeconds(""); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 0); + + // Single digit (100000 microseconds = 0.1 seconds) + result = ParseFractionalSeconds("1"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 100000); + + // Two digits + result = ParseFractionalSeconds("12"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 120000); + + // Three digits (milliseconds) + result = ParseFractionalSeconds("123"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 123000); + + // Six digits (microseconds) + result = ParseFractionalSeconds("123456"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 123456); + + // Leading zeros + result = ParseFractionalSeconds("000123"); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), 123); +} + +TEST(DateTimeUtilTest, ParseFractionalSecondsInvalidInputs) { + // More than 6 digits + EXPECT_FALSE(ParseFractionalSeconds("1234567").has_value()); + + // Non-numeric characters + EXPECT_FALSE(ParseFractionalSeconds("12a").has_value()); + EXPECT_FALSE(ParseFractionalSeconds("abc").has_value()); + EXPECT_FALSE(ParseFractionalSeconds("12.3").has_value()); + EXPECT_FALSE(ParseFractionalSeconds("12-3").has_value()); + EXPECT_FALSE(ParseFractionalSeconds("-123").has_value()); + EXPECT_FALSE(ParseFractionalSeconds(" 123").has_value()); +} + +// Edge Cases and Integration Tests +TEST(DateTimeUtilTest, EdgeCasesBoundaryValues) { + // Test year boundaries + auto date_result = ParseDateString("1970-01-01"); + ASSERT_TRUE(date_result.has_value()); + EXPECT_EQ(date_result.value(), 0); + + // Test time boundaries + auto time_result = ParseTimeString("00:00:00.000000"); + ASSERT_TRUE(time_result.has_value()); + EXPECT_EQ(time_result.value(), 0); + + time_result = ParseTimeString("23:59:59.999999"); + ASSERT_TRUE(time_result.has_value()); + EXPECT_EQ(time_result.value(), 86399999999LL); // Almost 1 day in microseconds +} + +TEST(DateTimeUtilTest, ConsistencyBetweenFunctions) { + // Ensure MicrosToDays and date parsing are consistent + auto date_days = ParseDateString("2000-01-01"); + ASSERT_TRUE(date_days.has_value()); + + auto timestamp_micros = ParseTimestampString("2000-01-01T00:00:00"); + ASSERT_TRUE(timestamp_micros.has_value()); + + auto derived_days = MicrosToDays(timestamp_micros.value()); + EXPECT_EQ(date_days.value(), derived_days); +} + +} // namespace iceberg diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index bd7544bf..76130fac 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -30,6 +30,22 @@ namespace iceberg { +namespace { + +// Helper function to assert that a CastTo operation succeeds and checks +// the resulting type and value. +template +void AssertCastSucceeds(const Result& result, TypeId expected_type_id, + const T& expected_value) { + ASSERT_THAT(result, IsOk()); + EXPECT_EQ(result->type()->type_id(), expected_type_id); + ASSERT_NO_THROW(EXPECT_EQ(std::get(result->value()), expected_value)) + << "Value type mismatch in std::get. Expected type for TypeId " + << static_cast(expected_type_id); +} + +} // namespace + // Boolean type tests TEST(LiteralTest, BooleanBasics) { auto true_literal = Literal::Boolean(true); @@ -78,20 +94,17 @@ TEST(LiteralTest, IntCastTo) { auto int_literal = Literal::Int(42); // Cast to Long - auto long_result = int_literal.CastTo(iceberg::int64()); - ASSERT_THAT(long_result, IsOk()); - EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong); - EXPECT_EQ(std::get(long_result->value()), 42L); + AssertCastSucceeds(int_literal.CastTo(int64()), TypeId::kLong, + static_cast(42)); // Cast to Float - auto float_result = int_literal.CastTo(iceberg::float32()); - ASSERT_THAT(float_result, IsOk()); - EXPECT_EQ(float_result->type()->type_id(), TypeId::kFloat); + AssertCastSucceeds(int_literal.CastTo(float32()), TypeId::kFloat, 42.0f); // Cast to Double - auto double_result = int_literal.CastTo(iceberg::float64()); - ASSERT_THAT(double_result, IsOk()); - EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); + AssertCastSucceeds(int_literal.CastTo(float64()), TypeId::kDouble, 42.0); + + // Cast to Date + AssertCastSucceeds(int_literal.CastTo(date()), TypeId::kDate, 42); } // Long type tests @@ -120,23 +133,32 @@ TEST(LiteralTest, LongCastTo) { auto long_literal = Literal::Long(42L); // Cast to Int (within range) - auto int_result = long_literal.CastTo(iceberg::int32()); - ASSERT_THAT(int_result, IsOk()); - EXPECT_EQ(int_result->type()->type_id(), TypeId::kInt); - EXPECT_EQ(int_result->ToString(), "42"); + AssertCastSucceeds(long_literal.CastTo(int32()), TypeId::kInt, 42); // Cast to Float - auto float_result = long_literal.CastTo(iceberg::float32()); - ASSERT_THAT(float_result, IsOk()); - EXPECT_EQ(float_result->type()->type_id(), TypeId::kFloat); + AssertCastSucceeds(long_literal.CastTo(float32()), TypeId::kFloat, 42.0f); // Cast to Double - auto double_result = long_literal.CastTo(iceberg::float64()); - ASSERT_THAT(double_result, IsOk()); - EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); + AssertCastSucceeds(long_literal.CastTo(float64()), TypeId::kDouble, 42.0); + + // Cast to Date + AssertCastSucceeds(long_literal.CastTo(date()), TypeId::kDate, 42); + + // Cast to Time + AssertCastSucceeds(long_literal.CastTo(time()), TypeId::kTime, + static_cast(42L)); + + // Cast to Timestamp + AssertCastSucceeds(long_literal.CastTo(timestamp()), TypeId::kTimestamp, + static_cast(42L)); + + // Cast to TimestampTz + AssertCastSucceeds(long_literal.CastTo(timestamp_tz()), TypeId::kTimestampTz, + static_cast(42L)); } -TEST(LiteralTest, LongCastToIntOverflow) { +TEST(LiteralTest, LongCastToOverflow) { + // Test overflow cases auto max_long = Literal::Long(static_cast(std::numeric_limits::max()) + 1); auto min_long = @@ -149,6 +171,14 @@ TEST(LiteralTest, LongCastToIntOverflow) { auto min_result = min_long.CastTo(iceberg::int32()); ASSERT_THAT(min_result, IsOk()); EXPECT_TRUE(min_result->IsBelowMin()); + + max_result = max_long.CastTo(iceberg::date()); + ASSERT_THAT(max_result, IsOk()); + EXPECT_TRUE(max_result->IsAboveMax()); + + min_result = min_long.CastTo(iceberg::date()); + ASSERT_THAT(min_result, IsOk()); + EXPECT_TRUE(min_result->IsBelowMin()); } // Float type tests @@ -177,9 +207,8 @@ TEST(LiteralTest, FloatCastTo) { auto float_literal = Literal::Float(3.14f); // Cast to Double - auto double_result = float_literal.CastTo(iceberg::float64()); - ASSERT_THAT(double_result, IsOk()); - EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); + AssertCastSucceeds(float_literal.CastTo(iceberg::float64()), TypeId::kDouble, + static_cast(3.14f)); } // Double type tests @@ -204,6 +233,29 @@ TEST(LiteralTest, DoubleComparison) { EXPECT_EQ(double2 <=> double1, std::partial_ordering::greater); } +TEST(LiteralTest, DoubleCastTo) { + auto double_literal = Literal::Double(3.14); + + // Cast to Float + AssertCastSucceeds(double_literal.CastTo(iceberg::float32()), TypeId::kFloat, 3.14f); +} + +TEST(LiteralTest, DoubleCastToOverflow) { + // Test overflow cases for Double to Float + auto max_double = + Literal::Double(static_cast(std::numeric_limits::max()) * 2); + auto min_double = + Literal::Double(-static_cast(std::numeric_limits::max()) * 2); + + auto max_result = max_double.CastTo(iceberg::float32()); + ASSERT_THAT(max_result, IsOk()); + EXPECT_TRUE(max_result->IsAboveMax()); + + auto min_result = min_double.CastTo(iceberg::float32()); + ASSERT_THAT(min_result, IsOk()); + EXPECT_TRUE(min_result->IsBelowMin()); +} + // String type tests TEST(LiteralTest, StringBasics) { auto string_literal = Literal::String("hello world"); @@ -226,6 +278,73 @@ TEST(LiteralTest, StringComparison) { EXPECT_EQ(string2 <=> string1, std::partial_ordering::greater); } +TEST(LiteralTest, StringCastToDate) { + AssertCastSucceeds(Literal::String("2023-05-15").CastTo(iceberg::date()), TypeId::kDate, + 19492); + AssertCastSucceeds(Literal::String("1970-01-01").CastTo(iceberg::date()), TypeId::kDate, + 0); + AssertCastSucceeds(Literal::String("1969-12-31").CastTo(iceberg::date()), TypeId::kDate, + -1); + + // Invalid Formats + EXPECT_THAT(Literal::String("2023/05/15").CastTo(iceberg::date()), + IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(Literal::String("2023-05-15 extra").CastTo(iceberg::date()), + IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(Literal::String("2023-05").CastTo(iceberg::date()), + IsError(ErrorKind::kInvalidArgument)); +} + +TEST(LiteralTest, StringCastToTime) { + AssertCastSucceeds(Literal::String("12:00:00").CastTo(iceberg::time()), TypeId::kTime, + static_cast(43200000000)); + AssertCastSucceeds(Literal::String("12:34:56.123456").CastTo(iceberg::time()), + TypeId::kTime, static_cast(45296123456)); + AssertCastSucceeds(Literal::String("01:02:03.123").CastTo(iceberg::time()), + TypeId::kTime, static_cast(3723123000)); + AssertCastSucceeds(Literal::String("23:59:59.987654321").CastTo(iceberg::time()), + TypeId::kTime, static_cast(86399987654)); + + // Invalid Formats + EXPECT_THAT(Literal::String("12-00-00").CastTo(iceberg::time()), + IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(Literal::String("12:00:00 extra").CastTo(iceberg::time()), + IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(Literal::String("25:00:00").CastTo(iceberg::time()), + IsError(ErrorKind::kInvalidArgument)); +} + +TEST(LiteralTest, StringCastToTimestamp) { + AssertCastSucceeds(Literal::String("2023-05-15T12:00:00").CastTo(iceberg::timestamp()), + TypeId::kTimestamp, static_cast(1684152000000000)); + AssertCastSucceeds( + Literal::String("2023-05-15T12:34:56.123456").CastTo(iceberg::timestamp()), + TypeId::kTimestamp, static_cast(1684154096123456)); + + // Invalid Formats + EXPECT_THAT(Literal::String("2023-05-15 12:00:00").CastTo(iceberg::timestamp()), + IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(Literal::String("2023-05-15T12:00:00Z").CastTo(iceberg::timestamp()), + IsError(ErrorKind::kInvalidArgument)); +} + +TEST(LiteralTest, StringCastToTimestampTz) { + AssertCastSucceeds( + Literal::String("2023-05-15T12:34:56.123456Z").CastTo(iceberg::timestamp_tz()), + TypeId::kTimestampTz, static_cast(1684154096123456)); + AssertCastSucceeds( + Literal::String("2023-05-15T12:00:00").CastTo(iceberg::timestamp_tz()), + TypeId::kTimestampTz, static_cast(1684152000000000)); + + // Invalid & Unsupported Formats + EXPECT_THAT( + Literal::String("2023-05-15T12:00:00+08:00").CastTo(iceberg::timestamp_tz()), + IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT( + Literal::String("2023-05-15T12:00:00Z oops").CastTo(iceberg::timestamp_tz()), + IsError(ErrorKind::kInvalidArgument)); +} + // Binary type tests TEST(LiteralTest, BinaryBasics) { std::vector data = {0x01, 0x02, 0x03, 0xFF}; @@ -235,8 +354,8 @@ TEST(LiteralTest, BinaryBasics) { EXPECT_EQ(binary_literal.type()->type_id(), TypeId::kBinary); EXPECT_EQ(empty_binary.type()->type_id(), TypeId::kBinary); - EXPECT_EQ(binary_literal.ToString(), "010203FF"); - EXPECT_EQ(empty_binary.ToString(), ""); + EXPECT_EQ(binary_literal.ToString(), "X'010203FF'"); + EXPECT_EQ(empty_binary.ToString(), "X''"); } TEST(LiteralTest, BinaryComparison) { @@ -253,6 +372,194 @@ TEST(LiteralTest, BinaryComparison) { EXPECT_EQ(binary2 <=> binary1, std::partial_ordering::greater); } +// Fixed type tests +TEST(LiteralTest, FixedBasics) { + std::vector data = {0x01, 0x02, 0x03, 0xFF}; + auto fixed_literal = Literal::Fixed(data); + auto empty_fixed = Literal::Fixed({}); + + EXPECT_EQ(fixed_literal.type()->type_id(), TypeId::kFixed); + EXPECT_EQ(empty_fixed.type()->type_id(), TypeId::kFixed); + + EXPECT_EQ(fixed_literal.ToString(), "X'010203FF'"); + EXPECT_EQ(empty_fixed.ToString(), "X''"); +} + +TEST(LiteralTest, FixedComparison) { + std::vector data1 = {0x01, 0x02}; + std::vector data2 = {0x01, 0x03}; + std::vector data3 = {0x01, 0x02}; + + auto fixed1 = Literal::Fixed(data1); + auto fixed2 = Literal::Fixed(data2); + auto fixed3 = Literal::Fixed(data3); + + EXPECT_EQ(fixed1 <=> fixed3, std::partial_ordering::equivalent); + EXPECT_EQ(fixed1 <=> fixed2, std::partial_ordering::less); + EXPECT_EQ(fixed2 <=> fixed1, std::partial_ordering::greater); +} + +// Date type tests +TEST(LiteralTest, DateBasics) { + auto date_literal = Literal::Date(19489); // May 15, 2023 + auto negative_date = Literal::Date(-1); // December 31, 1969 + + EXPECT_EQ(date_literal.type()->type_id(), TypeId::kDate); + EXPECT_EQ(negative_date.type()->type_id(), TypeId::kDate); + + EXPECT_EQ(date_literal.ToString(), "19489"); + EXPECT_EQ(negative_date.ToString(), "-1"); +} + +TEST(LiteralTest, DateComparison) { + auto date1 = Literal::Date(100); + auto date2 = Literal::Date(200); + auto date3 = Literal::Date(100); + + EXPECT_EQ(date1 <=> date3, std::partial_ordering::equivalent); + EXPECT_EQ(date1 <=> date2, std::partial_ordering::less); + EXPECT_EQ(date2 <=> date1, std::partial_ordering::greater); +} + +// Time type tests +TEST(LiteralTest, TimeBasics) { + auto time_literal = Literal::Time(43200000000LL); // 12:00:00 in microseconds + auto midnight = Literal::Time(0LL); + + EXPECT_EQ(time_literal.type()->type_id(), TypeId::kTime); + EXPECT_EQ(midnight.type()->type_id(), TypeId::kTime); + + EXPECT_EQ(time_literal.ToString(), "43200000000"); + EXPECT_EQ(midnight.ToString(), "0"); +} + +TEST(LiteralTest, TimeComparison) { + auto time1 = Literal::Time(43200000000LL); // 12:00:00 + auto time2 = Literal::Time(86400000000LL); // 24:00:00 (invalid but for testing) + auto time3 = Literal::Time(43200000000LL); + + EXPECT_EQ(time1 <=> time3, std::partial_ordering::equivalent); + EXPECT_EQ(time1 <=> time2, std::partial_ordering::less); + EXPECT_EQ(time2 <=> time1, std::partial_ordering::greater); +} + +// Timestamp type tests +TEST(LiteralTest, TimestampBasics) { + auto timestamp_literal = + Literal::Timestamp(1684137600000000LL); // May 15, 2023 12:00:00 UTC + auto epoch = Literal::Timestamp(0LL); + + EXPECT_EQ(timestamp_literal.type()->type_id(), TypeId::kTimestamp); + EXPECT_EQ(epoch.type()->type_id(), TypeId::kTimestamp); + + EXPECT_EQ(timestamp_literal.ToString(), "1684137600000000"); + EXPECT_EQ(epoch.ToString(), "0"); +} + +TEST(LiteralTest, TimestampComparison) { + auto timestamp1 = Literal::Timestamp(1000000LL); + auto timestamp2 = Literal::Timestamp(2000000LL); + auto timestamp3 = Literal::Timestamp(1000000LL); + + EXPECT_EQ(timestamp1 <=> timestamp3, std::partial_ordering::equivalent); + EXPECT_EQ(timestamp1 <=> timestamp2, std::partial_ordering::less); + EXPECT_EQ(timestamp2 <=> timestamp1, std::partial_ordering::greater); +} + +TEST(LiteralTest, TimestampCastTo) { + const int64_t micros = 1684137600000000; // May 15, 2023 08:00:00 UTC + auto timestamp_literal = Literal::Timestamp(micros); + + // Cast to Date (1684137600000000 / 86400000000 = 19492.33...) -> floors to 19492 + AssertCastSucceeds(timestamp_literal.CastTo(iceberg::date()), TypeId::kDate, 19492); + + // Cast to TimestampTz + AssertCastSucceeds(timestamp_literal.CastTo(iceberg::timestamp_tz()), + TypeId::kTimestampTz, micros); +} + +// TimestampTz type tests +TEST(LiteralTest, TimestampTzBasics) { + auto timestamptz_literal = + Literal::TimestampTz(1684137600000000LL); // May 15, 2023 12:00:00 UTC + auto epoch = Literal::TimestampTz(0LL); + + EXPECT_EQ(timestamptz_literal.type()->type_id(), TypeId::kTimestampTz); + EXPECT_EQ(epoch.type()->type_id(), TypeId::kTimestampTz); + + EXPECT_EQ(timestamptz_literal.ToString(), "1684137600000000"); + EXPECT_EQ(epoch.ToString(), "0"); +} + +TEST(LiteralTest, TimestampTzComparison) { + auto timestamptz1 = Literal::TimestampTz(1000000LL); + auto timestamptz2 = Literal::TimestampTz(2000000LL); + auto timestamptz3 = Literal::TimestampTz(1000000LL); + + EXPECT_EQ(timestamptz1 <=> timestamptz3, std::partial_ordering::equivalent); + EXPECT_EQ(timestamptz1 <=> timestamptz2, std::partial_ordering::less); + EXPECT_EQ(timestamptz2 <=> timestamptz1, std::partial_ordering::greater); +} + +TEST(LiteralTest, TimestampTzCastTo) { + const int64_t micros = 1684137600000000; // May 15, 2023 08:00:00 UTC + auto timestamptz_literal = Literal::TimestampTz(micros); + + // Cast to Date + AssertCastSucceeds(timestamptz_literal.CastTo(iceberg::date()), TypeId::kDate, 19492); + + // Cast to Timestamp + AssertCastSucceeds(timestamptz_literal.CastTo(iceberg::timestamp()), TypeId::kTimestamp, + micros); +} + +TEST(LiteralTest, BinaryCastTo) { + std::vector data4 = {0x01, 0x02, 0x03, 0x04}; + auto binary_literal = Literal::Binary(data4); + + // Cast to Fixed with matching length + AssertCastSucceeds(binary_literal.CastTo(iceberg::fixed(4)), TypeId::kFixed, data4); + + // Cast to Fixed with different length should fail + EXPECT_THAT(binary_literal.CastTo(iceberg::fixed(5)), + IsError(ErrorKind::kInvalidArgument)); +} + +TEST(LiteralTest, FixedCastTo) { + std::vector data4 = {0x01, 0x02, 0x03, 0x04}; + auto fixed_literal = Literal::Fixed(data4); + + // Cast to Binary + AssertCastSucceeds(fixed_literal.CastTo(iceberg::binary()), TypeId::kBinary, data4); + + // Cast to Fixed with same length + AssertCastSucceeds(fixed_literal.CastTo(iceberg::fixed(4)), TypeId::kFixed, data4); + + // Cast to Fixed with different length should fail + EXPECT_THAT(fixed_literal.CastTo(iceberg::fixed(5)), IsError(ErrorKind::kNotSupported)); +} + +// Microseconds to Days conversion tests +TEST(LiteralTest, MicrosToDaysConversion) { + constexpr int64_t kMicrosPerDay = 86400000000LL; + + // Test full day conversion + AssertCastSucceeds(Literal::Timestamp(kMicrosPerDay).CastTo(iceberg::date()), + TypeId::kDate, 1); + + // Test partial day conversion (should floor to 0) + AssertCastSucceeds(Literal::Timestamp(kMicrosPerDay / 2).CastTo(iceberg::date()), + TypeId::kDate, 0); + + // Test negative timestamp (should floor to -1) + AssertCastSucceeds(Literal::Timestamp(-kMicrosPerDay / 2).CastTo(iceberg::date()), + TypeId::kDate, -1); + + // Test exactly -1 day + AssertCastSucceeds(Literal::Timestamp(-kMicrosPerDay).CastTo(iceberg::date()), + TypeId::kDate, -1); +} + // Cross-type comparison tests TEST(LiteralTest, CrossTypeComparison) { auto int_literal = Literal::Int(42); diff --git a/src/iceberg/util/date_time_util.cc b/src/iceberg/util/date_time_util.cc new file mode 100644 index 00000000..fdbbe5fc --- /dev/null +++ b/src/iceberg/util/date_time_util.cc @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/date_time_util.h" + +#include +#include +#include +#include + +#include "iceberg/exception.h" + +namespace iceberg { + +namespace { + +// Helper function to parse fractional seconds from input stream +Result ParseAndAddFractionalSeconds(std::istringstream& in) { + if (in.peek() != '.') { + return 0LL; + } + + in.ignore(); + std::string fractional_str; + char c; + while (in.get(c) && std::isdigit(c)) { + fractional_str += c; + } + if (in) { + in.unget(); + } + + if (fractional_str.length() > 6) { + fractional_str.resize(6); + } + + return ParseFractionalSeconds(fractional_str); +} + +} // namespace + +int32_t MicrosToDays(int64_t micros_since_epoch) { + std::chrono::microseconds micros(micros_since_epoch); + auto days_duration = std::chrono::floor(micros); + return static_cast(days_duration.count()); +} + +time_t TimegmCustom(std::tm* tm) { +#if defined(_WIN32) + return _mkgmtime(tm); +#else + return timegm(tm); +#endif +} + +Result ParseFractionalSeconds(const std::string& fractional_str) { + if (fractional_str.empty()) { + return 0LL; + } + + if (fractional_str.length() > 6) { + return InvalidArgument("Fractional seconds cannot exceed 6 digits"); + } + + // Validate that all characters are digits + for (char c : fractional_str) { + if (!std::isdigit(c)) { + return InvalidArgument("Fractional seconds must contain only digits"); + } + } + + try { + std::string padded_fractional = fractional_str; + padded_fractional.append(6 - fractional_str.length(), '0'); + return std::stoll(padded_fractional); + } catch (const std::exception&) { + return InvalidArgument("Failed to parse fractional seconds '{}'", fractional_str); + } +} + +Result ParseDateString(const std::string& date_str) { + std::istringstream in{date_str}; + std::tm tm = {}; + + // Parse "YYYY-MM-DD" into days since 1970-01-01 epoch. + in >> std::get_time(&tm, "%Y-%m-%d"); + + if (in.fail() || tm.tm_mday == 0 || !in.eof()) { + return InvalidArgument("Failed to parse '{}' as a valid Date (expected YYYY-MM-DD)", + date_str); + } + + auto time_point = std::chrono::system_clock::from_time_t(TimegmCustom(&tm)); + auto days_since_epoch = std::chrono::floor(time_point); + return static_cast(days_since_epoch.time_since_epoch().count()); +} + +Result ParseTimeString(const std::string& time_str) { + std::istringstream in{time_str}; + std::tm tm = {}; + + // Parse "HH:MM:SS.ffffff" into microseconds since midnight. + in >> std::get_time(&tm, "%H:%M:%S"); + + if (in.fail()) { + return InvalidArgument( + "Failed to parse '{}' as a valid Time (expected HH:MM:SS.ffffff)", time_str); + } + + int64_t total_micros = (tm.tm_hour * 3600LL + tm.tm_min * 60LL + tm.tm_sec) * 1000000LL; + + auto fractional_result = ParseAndAddFractionalSeconds(in); + if (!fractional_result.has_value()) { + return std::unexpected(fractional_result.error()); + } + total_micros += fractional_result.value(); + + if (in.peek() != EOF) { + return InvalidArgument("Unconsumed characters found after parsing Time '{}'", + time_str); + } + + return total_micros; +} + +Result ParseTimestampString(const std::string& timestamp_str) { + std::istringstream in{timestamp_str}; + std::tm tm = {}; + + // Parse "YYYY-MM-DDTHH:MM:SS.ffffff" + in >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%S"); + + if (in.fail()) { + return InvalidArgument( + "Failed to parse '{}' as a valid Timestamp (expected YYYY-MM-DDTHH:MM:SS...)", + timestamp_str); + } + + auto seconds_since_epoch = TimegmCustom(&tm); + int64_t total_micros = seconds_since_epoch * 1000000LL; + + auto fractional_result = ParseAndAddFractionalSeconds(in); + if (!fractional_result.has_value()) { + return std::unexpected(fractional_result.error()); + } + total_micros += fractional_result.value(); + + if (in.peek() != EOF) { + return InvalidArgument("Unconsumed characters found after parsing Timestamp '{}'", + timestamp_str); + } + + return total_micros; +} + +Result ParseTimestampTzString(const std::string& timestamptz_str) { + std::istringstream in{timestamptz_str}; + std::tm tm = {}; + + // Parse "YYYY-MM-DDTHH:MM:SS.ffffff" and optional 'Z' + in >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%S"); + + if (in.fail()) { + return InvalidArgument( + "Failed to parse '{}' as a valid Timestamp (expected YYYY-MM-DDTHH:MM:SS...)", + timestamptz_str); + } + + auto seconds_since_epoch = TimegmCustom(&tm); + int64_t total_micros = seconds_since_epoch * 1000000LL; + + auto fractional_result = ParseAndAddFractionalSeconds(in); + if (!fractional_result.has_value()) { + return std::unexpected(fractional_result.error()); + } + total_micros += fractional_result.value(); + + // NOTE: This implementation DOES NOT support timezone offsets like + // '+08:00' or '-07:00'. It only supports the UTC designator 'Z'. + if (in.peek() == 'Z') { + in.ignore(); // Consume 'Z' + } + + if (in.peek() != EOF) { + return InvalidArgument("Unconsumed characters found after parsing Timestamp '{}'", + timestamptz_str); + } + + return total_micros; +} + +} // namespace iceberg diff --git a/src/iceberg/util/date_time_util.h b/src/iceberg/util/date_time_util.h new file mode 100644 index 00000000..c725afbd --- /dev/null +++ b/src/iceberg/util/date_time_util.h @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief Convert microseconds since epoch to days since epoch +/// \param micros_since_epoch Microseconds since Unix epoch +/// \return Days since Unix epoch (1970-01-01) +ICEBERG_EXPORT int32_t MicrosToDays(int64_t micros_since_epoch); + +/// \brief Cross-platform implementation of timegm function +/// \param tm Time structure to convert +/// \return Time as seconds since Unix epoch +ICEBERG_EXPORT time_t TimegmCustom(std::tm* tm); + +/// \brief Parse a date string in YYYY-MM-DD format +/// \param date_str Date string to parse +/// \return Days since Unix epoch (1970-01-01) on success +ICEBERG_EXPORT Result ParseDateString(const std::string& date_str); + +/// \brief Parse a time string in HH:MM:SS.ffffff format +/// \param time_str Time string to parse +/// \return Microseconds since midnight on success +ICEBERG_EXPORT Result ParseTimeString(const std::string& time_str); + +/// \brief Parse a timestamp string in YYYY-MM-DDTHH:MM:SS.ffffff format +/// \param timestamp_str Timestamp string to parse +/// \return Microseconds since Unix epoch on success +ICEBERG_EXPORT Result ParseTimestampString(const std::string& timestamp_str); + +/// \brief Parse a timestamp with timezone string in YYYY-MM-DDTHH:MM:SS.ffffffZ format +/// \param timestamptz_str Timestamp with timezone string to parse +/// \return Microseconds since Unix epoch on success +/// +/// \note This implementation only supports UTC designator 'Z', not timezone offsets +ICEBERG_EXPORT Result ParseTimestampTzString(const std::string& timestamptz_str); + +/// \brief Parse fractional seconds from a string +/// \param fractional_str Fractional seconds string (up to 6 digits) +/// \return Microseconds value of the fractional part +ICEBERG_EXPORT Result ParseFractionalSeconds(const std::string& fractional_str); + +} // namespace iceberg From 9240512fd55127ccd6837de2be199ffbefd4055e Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Fri, 12 Sep 2025 14:45:36 +0800 Subject: [PATCH 2/7] remove string cast to time types --- src/iceberg/expression/literal.cc | 47 +--- src/iceberg/test/date_time_util_test.cc | 345 ------------------------ src/iceberg/test/literal_test.cc | 112 -------- src/iceberg/util/date_time_util.cc | 208 -------------- src/iceberg/util/date_time_util.h | 67 ----- 5 files changed, 9 insertions(+), 770 deletions(-) delete mode 100644 src/iceberg/test/date_time_util_test.cc delete mode 100644 src/iceberg/util/date_time_util.cc delete mode 100644 src/iceberg/util/date_time_util.h diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index 783fa097..a5e883ef 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -22,12 +22,8 @@ #include #include -#include - -#include "iceberg/exception.h" #include "iceberg/util/conversions.h" -#include "iceberg/util/macros.h" -#include "iceberg/util/date_time_util.h" +#include "iceberg/exception.h" namespace iceberg { @@ -192,37 +188,12 @@ Result LiteralCaster::CastFromString( const auto& str_val = std::get(literal.value_); switch (target_type->type_id()) { - case TypeId::kDate: { - auto days_result = ParseDateString(str_val); - if (!days_result.has_value()) { - return std::unexpected(days_result.error()); - } - return Literal::Date(days_result.value()); - } - - case TypeId::kTime: { - auto micros_result = ParseTimeString(str_val); - if (!micros_result.has_value()) { - return std::unexpected(micros_result.error()); - } - return Literal::Time(micros_result.value()); - } - - case TypeId::kTimestamp: { - auto micros_result = ParseTimestampString(str_val); - if (!micros_result.has_value()) { - return std::unexpected(micros_result.error()); - } - return Literal::Timestamp(micros_result.value()); - } - - case TypeId::kTimestampTz: { - auto micros_result = ParseTimestampTzString(str_val); - if (!micros_result.has_value()) { - return std::unexpected(micros_result.error()); - } - return Literal::TimestampTz(micros_result.value()); - } + case TypeId::kDate: + case TypeId::kTime: + case TypeId::kTimestamp: + case TypeId::kTimestampTz: + return NotImplemented("Cast from String to {} is not implemented yet", + target_type->ToString()); // TODO(Li Feiyang): Implement cast from String to uuid and decimal default: @@ -237,7 +208,7 @@ Result LiteralCaster::CastFromTimestamp( switch (target_type->type_id()) { case TypeId::kDate: - return Literal::Date(MicrosToDays(timestamp_val)); + return NotImplemented("Cast from Timestamp to Date is not implemented yet"); case TypeId::kTimestampTz: return Literal::TimestampTz(timestamp_val); default: @@ -252,7 +223,7 @@ Result LiteralCaster::CastFromTimestampTz( switch (target_type->type_id()) { case TypeId::kDate: - return Literal::Date(MicrosToDays(micros)); + return NotImplemented("Cast from TimestampTz to Date is not implemented yet"); case TypeId::kTimestamp: return Literal::Timestamp(micros); default: diff --git a/src/iceberg/test/date_time_util_test.cc b/src/iceberg/test/date_time_util_test.cc deleted file mode 100644 index a879ccec..00000000 --- a/src/iceberg/test/date_time_util_test.cc +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "iceberg/util/date_time_util.h" - -#include - -#include - -namespace iceberg { - -// Constants for better readability -constexpr int64_t kMicrosPerSecond = 1000000LL; -constexpr int64_t kSecondsPerMinute = 60LL; -constexpr int64_t kSecondsPerHour = 3600LL; -constexpr int64_t kMicrosPerDay = 86400000000LL; - -// Helper function for creating tm with designated initializers -std::tm make_tm(int year, int mon, int mday, int hour = 0, int min = 0, int sec = 0) { - return std::tm{.tm_sec = sec, - .tm_min = min, - .tm_hour = hour, - .tm_mday = mday, - .tm_mon = mon, - .tm_year = year - 1900, - .tm_wday = 0, - .tm_yday = 0, - .tm_isdst = -1}; -} - -// MicrosToDays Tests -TEST(DateTimeUtilTest, MicrosToDaysUnixEpoch) { - // Unix epoch (1970-01-01 00:00:00 UTC) should be day 0 - EXPECT_EQ(MicrosToDays(0), 0); -} - -TEST(DateTimeUtilTest, MicrosToDaysPositiveValues) { - // Test with cleaner constant usage - EXPECT_EQ(MicrosToDays(kMicrosPerDay), 1); - EXPECT_EQ(MicrosToDays(2 * kMicrosPerDay), 2); - EXPECT_EQ(MicrosToDays(365 * kMicrosPerDay), 365); - - // Test partial day - should floor down - EXPECT_EQ(MicrosToDays(kMicrosPerDay - 1), 0); - EXPECT_EQ(MicrosToDays(kMicrosPerDay + 12 * kSecondsPerHour * kMicrosPerSecond), 1); -} - -TEST(DateTimeUtilTest, MicrosToDaysNegativeValues) { - EXPECT_EQ(MicrosToDays(-kMicrosPerDay), -1); - EXPECT_EQ(MicrosToDays(-2 * kMicrosPerDay), -2); - - // Test partial negative day - should floor down (more negative) - EXPECT_EQ(MicrosToDays(-1), -1); - EXPECT_EQ(MicrosToDays(-kMicrosPerDay + 1), -1); -} - -// TimegmCustom Tests -TEST(DateTimeUtilTest, TimegmCustomUnixEpoch) { - auto tm = make_tm(1970, 0, 1); // Much cleaner! - EXPECT_EQ(TimegmCustom(&tm), 0); -} - -TEST(DateTimeUtilTest, TimegmCustomValidDates) { - // 2000-01-01 00:00:00 UTC = 946684800 seconds since epoch - auto tm = make_tm(2000, 0, 1); - EXPECT_EQ(TimegmCustom(&tm), 946684800); - - // 2020-12-31 23:59:59 UTC = 1609459199 seconds since epoch - tm = make_tm(2020, 11, 31, 23, 59, 59); - EXPECT_EQ(TimegmCustom(&tm), 1609459199); -} - -TEST(DateTimeUtilTest, TimegmCustomLeapYear) { - // 2000-02-29 00:00:00 UTC (leap year) - auto tm = make_tm(2000, 1, 29); // Much more readable! - - // Should not crash and return valid result - time_t result = TimegmCustom(&tm); - EXPECT_GT(result, 0); -} - -// ParseDateString Tests -TEST(DateTimeUtilTest, ParseDateStringValidFormats) { - // Unix epoch - auto result = ParseDateString("1970-01-01"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 0); - - // Common dates - result = ParseDateString("2000-01-01"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 10957); // Days since epoch - - result = ParseDateString("2020-12-31"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 18627); // Days since epoch -} - -TEST(DateTimeUtilTest, ParseDateStringLeapYear) { - // Leap year date - auto result = ParseDateString("2000-02-29"); - ASSERT_TRUE(result.has_value()); - EXPECT_GT(result.value(), 0); - - // Non-leap year - February 29th should fail in validation if implemented - result = ParseDateString("1999-02-29"); - // Note: Current implementation might not validate this properly - // This is a known limitation that could be improved -} - -TEST(DateTimeUtilTest, ParseDateStringInvalidFormats) { - // Wrong format - EXPECT_FALSE(ParseDateString("01-01-2000").has_value()); - EXPECT_FALSE(ParseDateString("2000/01/01").has_value()); - - // Invalid dates - EXPECT_FALSE(ParseDateString("2000-13-01").has_value()); // Invalid month - EXPECT_FALSE(ParseDateString("2000-01-32").has_value()); // Invalid day - EXPECT_FALSE(ParseDateString("2000-00-01").has_value()); // Invalid month - EXPECT_FALSE(ParseDateString("2000-01-00").has_value()); // Invalid day - - // Empty and malformed strings - EXPECT_FALSE(ParseDateString("").has_value()); - EXPECT_FALSE(ParseDateString("not-a-date").has_value()); - EXPECT_FALSE(ParseDateString("2000-01").has_value()); - EXPECT_FALSE(ParseDateString("2000-01-01-extra").has_value()); -} - -// ParseTimeString Tests -TEST(DateTimeUtilTest, ParseTimeStringValidFormats) { - // Basic time without fractional seconds - auto result = ParseTimeString("00:00:00"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 0); - - result = ParseTimeString("12:30:45"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), (12 * 3600 + 30 * 60 + 45) * 1000000LL); - - result = ParseTimeString("23:59:59"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), (23 * 3600 + 59 * 60 + 59) * 1000000LL); -} - -TEST(DateTimeUtilTest, ParseTimeStringWithFractionalSeconds) { - constexpr int64_t base_time_micros = - (12 * kSecondsPerHour + 30 * kSecondsPerMinute + 45) * kMicrosPerSecond; - - // Single digit fractional - auto result = ParseTimeString("12:30:45.1"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), base_time_micros + 100000LL); - - // Three digit fractional - result = ParseTimeString("12:30:45.123"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), base_time_micros + 123000LL); - - // Six digit fractional (microseconds) - result = ParseTimeString("12:30:45.123456"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), base_time_micros + 123456LL); - - // More than 6 digits should be truncated - result = ParseTimeString("12:30:45.1234567890"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), base_time_micros + 123456LL); -} - -TEST(DateTimeUtilTest, ParseTimeStringInvalidFormats) { - // Invalid time values - EXPECT_FALSE(ParseTimeString("24:00:00").has_value()); // Invalid hour - EXPECT_FALSE(ParseTimeString("12:60:00").has_value()); // Invalid minute - EXPECT_FALSE(ParseTimeString("-1:30:45").has_value()); // Negative hour - - // Wrong format - EXPECT_FALSE(ParseTimeString("12-30-45").has_value()); - EXPECT_FALSE(ParseTimeString("12:30:45:67").has_value()); - - // Empty and malformed - EXPECT_FALSE(ParseTimeString("").has_value()); - EXPECT_FALSE(ParseTimeString("not-a-time").has_value()); - EXPECT_FALSE(ParseTimeString("12:30:45 extra").has_value()); -} - -// ParseTimestampString Tests -TEST(DateTimeUtilTest, ParseTimestampStringValidFormats) { - // Unix epoch - auto result = ParseTimestampString("1970-01-01T00:00:00"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 0); - - // Standard timestamp - result = ParseTimestampString("2000-01-01T12:30:45"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), - 946684800LL * 1000000LL + (12 * 3600 + 30 * 60 + 45) * 1000000LL); - - // With fractional seconds - result = ParseTimestampString("2000-01-01T12:30:45.123456"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), - 946684800LL * 1000000LL + (12 * 3600 + 30 * 60 + 45) * 1000000LL + 123456LL); -} - -TEST(DateTimeUtilTest, ParseTimestampStringInvalidFormats) { - // Wrong separator - EXPECT_FALSE(ParseTimestampString("2000-01-01 12:30:45").has_value()); - - // Invalid date part - EXPECT_FALSE(ParseTimestampString("2000-13-01T12:30:45").has_value()); - - // Invalid time part - EXPECT_FALSE(ParseTimestampString("2000-01-01T25:30:45").has_value()); - - // Incomplete - EXPECT_FALSE(ParseTimestampString("2000-01-01T").has_value()); - - // Extra characters - EXPECT_FALSE(ParseTimestampString("2000-01-01T12:30:45 extra").has_value()); -} - -// ParseTimestampTzString Tests -TEST(DateTimeUtilTest, ParseTimestampTzStringValidFormats) { - // Without Z suffix (should still work) - auto result = ParseTimestampTzString("2000-01-01T12:30:45"); - ASSERT_TRUE(result.has_value()); - EXPECT_GT(result.value(), 0); - - // With Z suffix - result = ParseTimestampTzString("2000-01-01T12:30:45Z"); - ASSERT_TRUE(result.has_value()); - EXPECT_GT(result.value(), 0); - - // With fractional seconds and Z - result = ParseTimestampTzString("2000-01-01T12:30:45.123456Z"); - ASSERT_TRUE(result.has_value()); - EXPECT_GT(result.value(), 0); -} - -TEST(DateTimeUtilTest, ParseTimestampTzStringInvalidFormats) { - // Invalid timezone formats (only Z is supported) - EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45+08:00").has_value()); - EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45-05:00").has_value()); - EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45GMT").has_value()); - - // Multiple Z characters - EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45ZZ").has_value()); - - // Extra characters after Z - EXPECT_FALSE(ParseTimestampTzString("2000-01-01T12:30:45Z extra").has_value()); -} - -// ParseFractionalSeconds Tests -TEST(DateTimeUtilTest, ParseFractionalSecondsValidInputs) { - // Empty string should return 0 - auto result = ParseFractionalSeconds(""); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 0); - - // Single digit (100000 microseconds = 0.1 seconds) - result = ParseFractionalSeconds("1"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 100000); - - // Two digits - result = ParseFractionalSeconds("12"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 120000); - - // Three digits (milliseconds) - result = ParseFractionalSeconds("123"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 123000); - - // Six digits (microseconds) - result = ParseFractionalSeconds("123456"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 123456); - - // Leading zeros - result = ParseFractionalSeconds("000123"); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(result.value(), 123); -} - -TEST(DateTimeUtilTest, ParseFractionalSecondsInvalidInputs) { - // More than 6 digits - EXPECT_FALSE(ParseFractionalSeconds("1234567").has_value()); - - // Non-numeric characters - EXPECT_FALSE(ParseFractionalSeconds("12a").has_value()); - EXPECT_FALSE(ParseFractionalSeconds("abc").has_value()); - EXPECT_FALSE(ParseFractionalSeconds("12.3").has_value()); - EXPECT_FALSE(ParseFractionalSeconds("12-3").has_value()); - EXPECT_FALSE(ParseFractionalSeconds("-123").has_value()); - EXPECT_FALSE(ParseFractionalSeconds(" 123").has_value()); -} - -// Edge Cases and Integration Tests -TEST(DateTimeUtilTest, EdgeCasesBoundaryValues) { - // Test year boundaries - auto date_result = ParseDateString("1970-01-01"); - ASSERT_TRUE(date_result.has_value()); - EXPECT_EQ(date_result.value(), 0); - - // Test time boundaries - auto time_result = ParseTimeString("00:00:00.000000"); - ASSERT_TRUE(time_result.has_value()); - EXPECT_EQ(time_result.value(), 0); - - time_result = ParseTimeString("23:59:59.999999"); - ASSERT_TRUE(time_result.has_value()); - EXPECT_EQ(time_result.value(), 86399999999LL); // Almost 1 day in microseconds -} - -TEST(DateTimeUtilTest, ConsistencyBetweenFunctions) { - // Ensure MicrosToDays and date parsing are consistent - auto date_days = ParseDateString("2000-01-01"); - ASSERT_TRUE(date_days.has_value()); - - auto timestamp_micros = ParseTimestampString("2000-01-01T00:00:00"); - ASSERT_TRUE(timestamp_micros.has_value()); - - auto derived_days = MicrosToDays(timestamp_micros.value()); - EXPECT_EQ(date_days.value(), derived_days); -} - -} // namespace iceberg diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 76130fac..43f67277 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -278,73 +278,6 @@ TEST(LiteralTest, StringComparison) { EXPECT_EQ(string2 <=> string1, std::partial_ordering::greater); } -TEST(LiteralTest, StringCastToDate) { - AssertCastSucceeds(Literal::String("2023-05-15").CastTo(iceberg::date()), TypeId::kDate, - 19492); - AssertCastSucceeds(Literal::String("1970-01-01").CastTo(iceberg::date()), TypeId::kDate, - 0); - AssertCastSucceeds(Literal::String("1969-12-31").CastTo(iceberg::date()), TypeId::kDate, - -1); - - // Invalid Formats - EXPECT_THAT(Literal::String("2023/05/15").CastTo(iceberg::date()), - IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(Literal::String("2023-05-15 extra").CastTo(iceberg::date()), - IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(Literal::String("2023-05").CastTo(iceberg::date()), - IsError(ErrorKind::kInvalidArgument)); -} - -TEST(LiteralTest, StringCastToTime) { - AssertCastSucceeds(Literal::String("12:00:00").CastTo(iceberg::time()), TypeId::kTime, - static_cast(43200000000)); - AssertCastSucceeds(Literal::String("12:34:56.123456").CastTo(iceberg::time()), - TypeId::kTime, static_cast(45296123456)); - AssertCastSucceeds(Literal::String("01:02:03.123").CastTo(iceberg::time()), - TypeId::kTime, static_cast(3723123000)); - AssertCastSucceeds(Literal::String("23:59:59.987654321").CastTo(iceberg::time()), - TypeId::kTime, static_cast(86399987654)); - - // Invalid Formats - EXPECT_THAT(Literal::String("12-00-00").CastTo(iceberg::time()), - IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(Literal::String("12:00:00 extra").CastTo(iceberg::time()), - IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(Literal::String("25:00:00").CastTo(iceberg::time()), - IsError(ErrorKind::kInvalidArgument)); -} - -TEST(LiteralTest, StringCastToTimestamp) { - AssertCastSucceeds(Literal::String("2023-05-15T12:00:00").CastTo(iceberg::timestamp()), - TypeId::kTimestamp, static_cast(1684152000000000)); - AssertCastSucceeds( - Literal::String("2023-05-15T12:34:56.123456").CastTo(iceberg::timestamp()), - TypeId::kTimestamp, static_cast(1684154096123456)); - - // Invalid Formats - EXPECT_THAT(Literal::String("2023-05-15 12:00:00").CastTo(iceberg::timestamp()), - IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(Literal::String("2023-05-15T12:00:00Z").CastTo(iceberg::timestamp()), - IsError(ErrorKind::kInvalidArgument)); -} - -TEST(LiteralTest, StringCastToTimestampTz) { - AssertCastSucceeds( - Literal::String("2023-05-15T12:34:56.123456Z").CastTo(iceberg::timestamp_tz()), - TypeId::kTimestampTz, static_cast(1684154096123456)); - AssertCastSucceeds( - Literal::String("2023-05-15T12:00:00").CastTo(iceberg::timestamp_tz()), - TypeId::kTimestampTz, static_cast(1684152000000000)); - - // Invalid & Unsupported Formats - EXPECT_THAT( - Literal::String("2023-05-15T12:00:00+08:00").CastTo(iceberg::timestamp_tz()), - IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT( - Literal::String("2023-05-15T12:00:00Z oops").CastTo(iceberg::timestamp_tz()), - IsError(ErrorKind::kInvalidArgument)); -} - // Binary type tests TEST(LiteralTest, BinaryBasics) { std::vector data = {0x01, 0x02, 0x03, 0xFF}; @@ -466,18 +399,6 @@ TEST(LiteralTest, TimestampComparison) { EXPECT_EQ(timestamp2 <=> timestamp1, std::partial_ordering::greater); } -TEST(LiteralTest, TimestampCastTo) { - const int64_t micros = 1684137600000000; // May 15, 2023 08:00:00 UTC - auto timestamp_literal = Literal::Timestamp(micros); - - // Cast to Date (1684137600000000 / 86400000000 = 19492.33...) -> floors to 19492 - AssertCastSucceeds(timestamp_literal.CastTo(iceberg::date()), TypeId::kDate, 19492); - - // Cast to TimestampTz - AssertCastSucceeds(timestamp_literal.CastTo(iceberg::timestamp_tz()), - TypeId::kTimestampTz, micros); -} - // TimestampTz type tests TEST(LiteralTest, TimestampTzBasics) { auto timestamptz_literal = @@ -501,18 +422,6 @@ TEST(LiteralTest, TimestampTzComparison) { EXPECT_EQ(timestamptz2 <=> timestamptz1, std::partial_ordering::greater); } -TEST(LiteralTest, TimestampTzCastTo) { - const int64_t micros = 1684137600000000; // May 15, 2023 08:00:00 UTC - auto timestamptz_literal = Literal::TimestampTz(micros); - - // Cast to Date - AssertCastSucceeds(timestamptz_literal.CastTo(iceberg::date()), TypeId::kDate, 19492); - - // Cast to Timestamp - AssertCastSucceeds(timestamptz_literal.CastTo(iceberg::timestamp()), TypeId::kTimestamp, - micros); -} - TEST(LiteralTest, BinaryCastTo) { std::vector data4 = {0x01, 0x02, 0x03, 0x04}; auto binary_literal = Literal::Binary(data4); @@ -539,27 +448,6 @@ TEST(LiteralTest, FixedCastTo) { EXPECT_THAT(fixed_literal.CastTo(iceberg::fixed(5)), IsError(ErrorKind::kNotSupported)); } -// Microseconds to Days conversion tests -TEST(LiteralTest, MicrosToDaysConversion) { - constexpr int64_t kMicrosPerDay = 86400000000LL; - - // Test full day conversion - AssertCastSucceeds(Literal::Timestamp(kMicrosPerDay).CastTo(iceberg::date()), - TypeId::kDate, 1); - - // Test partial day conversion (should floor to 0) - AssertCastSucceeds(Literal::Timestamp(kMicrosPerDay / 2).CastTo(iceberg::date()), - TypeId::kDate, 0); - - // Test negative timestamp (should floor to -1) - AssertCastSucceeds(Literal::Timestamp(-kMicrosPerDay / 2).CastTo(iceberg::date()), - TypeId::kDate, -1); - - // Test exactly -1 day - AssertCastSucceeds(Literal::Timestamp(-kMicrosPerDay).CastTo(iceberg::date()), - TypeId::kDate, -1); -} - // Cross-type comparison tests TEST(LiteralTest, CrossTypeComparison) { auto int_literal = Literal::Int(42); diff --git a/src/iceberg/util/date_time_util.cc b/src/iceberg/util/date_time_util.cc deleted file mode 100644 index fdbbe5fc..00000000 --- a/src/iceberg/util/date_time_util.cc +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "iceberg/util/date_time_util.h" - -#include -#include -#include -#include - -#include "iceberg/exception.h" - -namespace iceberg { - -namespace { - -// Helper function to parse fractional seconds from input stream -Result ParseAndAddFractionalSeconds(std::istringstream& in) { - if (in.peek() != '.') { - return 0LL; - } - - in.ignore(); - std::string fractional_str; - char c; - while (in.get(c) && std::isdigit(c)) { - fractional_str += c; - } - if (in) { - in.unget(); - } - - if (fractional_str.length() > 6) { - fractional_str.resize(6); - } - - return ParseFractionalSeconds(fractional_str); -} - -} // namespace - -int32_t MicrosToDays(int64_t micros_since_epoch) { - std::chrono::microseconds micros(micros_since_epoch); - auto days_duration = std::chrono::floor(micros); - return static_cast(days_duration.count()); -} - -time_t TimegmCustom(std::tm* tm) { -#if defined(_WIN32) - return _mkgmtime(tm); -#else - return timegm(tm); -#endif -} - -Result ParseFractionalSeconds(const std::string& fractional_str) { - if (fractional_str.empty()) { - return 0LL; - } - - if (fractional_str.length() > 6) { - return InvalidArgument("Fractional seconds cannot exceed 6 digits"); - } - - // Validate that all characters are digits - for (char c : fractional_str) { - if (!std::isdigit(c)) { - return InvalidArgument("Fractional seconds must contain only digits"); - } - } - - try { - std::string padded_fractional = fractional_str; - padded_fractional.append(6 - fractional_str.length(), '0'); - return std::stoll(padded_fractional); - } catch (const std::exception&) { - return InvalidArgument("Failed to parse fractional seconds '{}'", fractional_str); - } -} - -Result ParseDateString(const std::string& date_str) { - std::istringstream in{date_str}; - std::tm tm = {}; - - // Parse "YYYY-MM-DD" into days since 1970-01-01 epoch. - in >> std::get_time(&tm, "%Y-%m-%d"); - - if (in.fail() || tm.tm_mday == 0 || !in.eof()) { - return InvalidArgument("Failed to parse '{}' as a valid Date (expected YYYY-MM-DD)", - date_str); - } - - auto time_point = std::chrono::system_clock::from_time_t(TimegmCustom(&tm)); - auto days_since_epoch = std::chrono::floor(time_point); - return static_cast(days_since_epoch.time_since_epoch().count()); -} - -Result ParseTimeString(const std::string& time_str) { - std::istringstream in{time_str}; - std::tm tm = {}; - - // Parse "HH:MM:SS.ffffff" into microseconds since midnight. - in >> std::get_time(&tm, "%H:%M:%S"); - - if (in.fail()) { - return InvalidArgument( - "Failed to parse '{}' as a valid Time (expected HH:MM:SS.ffffff)", time_str); - } - - int64_t total_micros = (tm.tm_hour * 3600LL + tm.tm_min * 60LL + tm.tm_sec) * 1000000LL; - - auto fractional_result = ParseAndAddFractionalSeconds(in); - if (!fractional_result.has_value()) { - return std::unexpected(fractional_result.error()); - } - total_micros += fractional_result.value(); - - if (in.peek() != EOF) { - return InvalidArgument("Unconsumed characters found after parsing Time '{}'", - time_str); - } - - return total_micros; -} - -Result ParseTimestampString(const std::string& timestamp_str) { - std::istringstream in{timestamp_str}; - std::tm tm = {}; - - // Parse "YYYY-MM-DDTHH:MM:SS.ffffff" - in >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%S"); - - if (in.fail()) { - return InvalidArgument( - "Failed to parse '{}' as a valid Timestamp (expected YYYY-MM-DDTHH:MM:SS...)", - timestamp_str); - } - - auto seconds_since_epoch = TimegmCustom(&tm); - int64_t total_micros = seconds_since_epoch * 1000000LL; - - auto fractional_result = ParseAndAddFractionalSeconds(in); - if (!fractional_result.has_value()) { - return std::unexpected(fractional_result.error()); - } - total_micros += fractional_result.value(); - - if (in.peek() != EOF) { - return InvalidArgument("Unconsumed characters found after parsing Timestamp '{}'", - timestamp_str); - } - - return total_micros; -} - -Result ParseTimestampTzString(const std::string& timestamptz_str) { - std::istringstream in{timestamptz_str}; - std::tm tm = {}; - - // Parse "YYYY-MM-DDTHH:MM:SS.ffffff" and optional 'Z' - in >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%S"); - - if (in.fail()) { - return InvalidArgument( - "Failed to parse '{}' as a valid Timestamp (expected YYYY-MM-DDTHH:MM:SS...)", - timestamptz_str); - } - - auto seconds_since_epoch = TimegmCustom(&tm); - int64_t total_micros = seconds_since_epoch * 1000000LL; - - auto fractional_result = ParseAndAddFractionalSeconds(in); - if (!fractional_result.has_value()) { - return std::unexpected(fractional_result.error()); - } - total_micros += fractional_result.value(); - - // NOTE: This implementation DOES NOT support timezone offsets like - // '+08:00' or '-07:00'. It only supports the UTC designator 'Z'. - if (in.peek() == 'Z') { - in.ignore(); // Consume 'Z' - } - - if (in.peek() != EOF) { - return InvalidArgument("Unconsumed characters found after parsing Timestamp '{}'", - timestamptz_str); - } - - return total_micros; -} - -} // namespace iceberg diff --git a/src/iceberg/util/date_time_util.h b/src/iceberg/util/date_time_util.h deleted file mode 100644 index c725afbd..00000000 --- a/src/iceberg/util/date_time_util.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -#include -#include - -#include "iceberg/iceberg_export.h" -#include "iceberg/result.h" - -namespace iceberg { - -/// \brief Convert microseconds since epoch to days since epoch -/// \param micros_since_epoch Microseconds since Unix epoch -/// \return Days since Unix epoch (1970-01-01) -ICEBERG_EXPORT int32_t MicrosToDays(int64_t micros_since_epoch); - -/// \brief Cross-platform implementation of timegm function -/// \param tm Time structure to convert -/// \return Time as seconds since Unix epoch -ICEBERG_EXPORT time_t TimegmCustom(std::tm* tm); - -/// \brief Parse a date string in YYYY-MM-DD format -/// \param date_str Date string to parse -/// \return Days since Unix epoch (1970-01-01) on success -ICEBERG_EXPORT Result ParseDateString(const std::string& date_str); - -/// \brief Parse a time string in HH:MM:SS.ffffff format -/// \param time_str Time string to parse -/// \return Microseconds since midnight on success -ICEBERG_EXPORT Result ParseTimeString(const std::string& time_str); - -/// \brief Parse a timestamp string in YYYY-MM-DDTHH:MM:SS.ffffff format -/// \param timestamp_str Timestamp string to parse -/// \return Microseconds since Unix epoch on success -ICEBERG_EXPORT Result ParseTimestampString(const std::string& timestamp_str); - -/// \brief Parse a timestamp with timezone string in YYYY-MM-DDTHH:MM:SS.ffffffZ format -/// \param timestamptz_str Timestamp with timezone string to parse -/// \return Microseconds since Unix epoch on success -/// -/// \note This implementation only supports UTC designator 'Z', not timezone offsets -ICEBERG_EXPORT Result ParseTimestampTzString(const std::string& timestamptz_str); - -/// \brief Parse fractional seconds from a string -/// \param fractional_str Fractional seconds string (up to 6 digits) -/// \return Microseconds value of the fractional part -ICEBERG_EXPORT Result ParseFractionalSeconds(const std::string& fractional_str); - -} // namespace iceberg From 24bec4674ec8fb8ce020754103d533f74d408e99 Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Tue, 23 Sep 2025 15:41:48 +0800 Subject: [PATCH 3/7] feat: Implement Type Casting and toString for Literals --- src/iceberg/expression/literal.cc | 37 ++++++------ src/iceberg/expression/literal.h | 2 +- src/iceberg/test/literal_test.cc | 98 +++++++++++++++---------------- src/iceberg/transform_function.cc | 1 + 4 files changed, 69 insertions(+), 69 deletions(-) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index a5e883ef..001e909d 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -19,11 +19,15 @@ #include "iceberg/expression/literal.h" +#include #include #include +#include #include "iceberg/util/conversions.h" #include "iceberg/exception.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/macros.h" namespace iceberg { @@ -101,7 +105,6 @@ Result LiteralCaster::CastFromInt( return Literal::Double(static_cast(int_val)); case TypeId::kDate: return Literal::Date(int_val); - // TODO(Li Feiyang): Implement cast from Int to decimal default: return NotSupported("Cast from Int to {} is not implemented", target_type->ToString()); @@ -142,7 +145,6 @@ Result LiteralCaster::CastFromLong( return Literal::Timestamp(long_val); case TypeId::kTimestampTz: return Literal::TimestampTz(long_val); - // TODO(Li Feiyang): Implement cast from Long to decimal, TimestampNs and default: return NotSupported("Cast from Long to {} is not supported", target_type->ToString()); @@ -156,7 +158,6 @@ Result LiteralCaster::CastFromFloat( switch (target_type->type_id()) { case TypeId::kDouble: return Literal::Double(static_cast(float_val)); - // TODO(Li Feiyang): Implement cast from Float to decimal default: return NotSupported("Cast from Float to {} is not supported", target_type->ToString()); @@ -192,10 +193,9 @@ Result LiteralCaster::CastFromString( case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kUuid: return NotImplemented("Cast from String to {} is not implemented yet", target_type->ToString()); - // TODO(Li Feiyang): Implement cast from String to uuid and decimal - default: return NotSupported("Cast from String to {} is not supported", target_type->ToString()); @@ -254,8 +254,7 @@ Result LiteralCaster::CastFromFixed( const Literal& literal, const std::shared_ptr& target_type) { switch (target_type->type_id()) { case TypeId::kBinary: - return Literal::Binary( - std::get>(literal.value_)); // 直接拷贝+move + return Literal::Binary(std::get>(literal.value_)); default: return NotSupported("Cast from Fixed to {} is not supported", target_type->ToString()); @@ -405,6 +404,13 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { } std::string Literal::ToString() const { + auto unsupported_error = [this]() { + return std::format("ToString not supported for type: {}", type_->ToString()); + }; + auto invalid_argument = [this]() { + return std::format("Invalid argument for type: {}", type_->ToString()); + }; + if (std::holds_alternative(value_)) { return "belowMin"; } @@ -432,14 +438,13 @@ std::string Literal::ToString() const { return std::to_string(std::get(value_)); } case TypeId::kString: { - return std::get(value_); + return "\"" + std::get(value_) + "\""; } case TypeId::kBinary: case TypeId::kFixed: { const auto& binary_data = std::get>(value_); std::string result = "X'"; - result.reserve(2 + binary_data.size() * 2 + - 1); // 2 chars per byte and 2 + 1 for prefix and suffix + result.reserve(/*prefix*/ 2 + /*suffix*/ 1 + /*data*/ binary_data.size() * 2); for (const auto& byte : binary_data) { std::format_to(std::back_inserter(result), "{:02X}", byte); } @@ -454,12 +459,8 @@ std::string Literal::ToString() const { case TypeId::kDate: { return std::to_string(std::get(value_)); } - case TypeId::kDecimal: - case TypeId::kUuid: { - throw NotImplemented("kDecimal and kUuid are not implemented yet"); - } default: { - throw IcebergError("Unknown type: " + type_->ToString()); + return unsupported_error(); } } } @@ -491,6 +492,9 @@ Result LiteralCaster::CastTo(const Literal& literal, // Delegate to specific cast functions based on source type switch (source_type_id) { + case TypeId::kBoolean: + // No casts defined for Boolean, other than to itself. + break; case TypeId::kInt: return CastFromInt(literal, target_type); case TypeId::kLong: @@ -509,12 +513,11 @@ Result LiteralCaster::CastTo(const Literal& literal, return CastFromTimestamp(literal, target_type); case TypeId::kTimestampTz: return CastFromTimestampTz(literal, target_type); - case TypeId::kBoolean: default: break; } - return NotSupported("Cast from {} to {} is not implemented", literal.type_->ToString(), + return NotSupported("Cast from {} to {} is not supported", literal.type_->ToString(), target_type->ToString()); } diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index c11d48f5..aa868aef 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -57,7 +57,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable { double, // for double std::string, // for string std::vector, // for binary, fixed - std::array, // for uuid and decimal + std::array, // for uuid BelowMin, AboveMax>; /// \brief Factory methods for primitive types diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 43f67277..9f3d83c8 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -40,7 +40,7 @@ void AssertCastSucceeds(const Result& result, TypeId expected_type_id, ASSERT_THAT(result, IsOk()); EXPECT_EQ(result->type()->type_id(), expected_type_id); ASSERT_NO_THROW(EXPECT_EQ(std::get(result->value()), expected_value)) - << "Value type mismatch in std::get. Expected type for TypeId " + << "Type mismatch in std::get. Expected type for TypeId " << static_cast(expected_type_id); } @@ -164,19 +164,19 @@ TEST(LiteralTest, LongCastToOverflow) { auto min_long = Literal::Long(static_cast(std::numeric_limits::min()) - 1); - auto max_result = max_long.CastTo(iceberg::int32()); + auto max_result = max_long.CastTo(int32()); ASSERT_THAT(max_result, IsOk()); EXPECT_TRUE(max_result->IsAboveMax()); - auto min_result = min_long.CastTo(iceberg::int32()); + auto min_result = min_long.CastTo(int32()); ASSERT_THAT(min_result, IsOk()); EXPECT_TRUE(min_result->IsBelowMin()); - max_result = max_long.CastTo(iceberg::date()); + max_result = max_long.CastTo(date()); ASSERT_THAT(max_result, IsOk()); EXPECT_TRUE(max_result->IsAboveMax()); - min_result = min_long.CastTo(iceberg::date()); + min_result = min_long.CastTo(date()); ASSERT_THAT(min_result, IsOk()); EXPECT_TRUE(min_result->IsBelowMin()); } @@ -204,11 +204,11 @@ TEST(LiteralTest, FloatComparison) { } TEST(LiteralTest, FloatCastTo) { - auto float_literal = Literal::Float(3.14f); + auto float_literal = Literal::Float(2.0f); // Cast to Double - AssertCastSucceeds(float_literal.CastTo(iceberg::float64()), TypeId::kDouble, - static_cast(3.14f)); + AssertCastSucceeds(float_literal.CastTo(float64()), TypeId::kDouble, + static_cast(2.0f)); } // Double type tests @@ -234,10 +234,10 @@ TEST(LiteralTest, DoubleComparison) { } TEST(LiteralTest, DoubleCastTo) { - auto double_literal = Literal::Double(3.14); + auto double_literal = Literal::Double(2.0); // Cast to Float - AssertCastSucceeds(double_literal.CastTo(iceberg::float32()), TypeId::kFloat, 3.14f); + AssertCastSucceeds(double_literal.CastTo(float32()), TypeId::kFloat, 2.0f); } TEST(LiteralTest, DoubleCastToOverflow) { @@ -247,11 +247,11 @@ TEST(LiteralTest, DoubleCastToOverflow) { auto min_double = Literal::Double(-static_cast(std::numeric_limits::max()) * 2); - auto max_result = max_double.CastTo(iceberg::float32()); + auto max_result = max_double.CastTo(float32()); ASSERT_THAT(max_result, IsOk()); EXPECT_TRUE(max_result->IsAboveMax()); - auto min_result = min_double.CastTo(iceberg::float32()); + auto min_result = min_double.CastTo(float32()); ASSERT_THAT(min_result, IsOk()); EXPECT_TRUE(min_result->IsBelowMin()); } @@ -264,8 +264,8 @@ TEST(LiteralTest, StringBasics) { EXPECT_EQ(string_literal.type()->type_id(), TypeId::kString); EXPECT_EQ(empty_string.type()->type_id(), TypeId::kString); - EXPECT_EQ(string_literal.ToString(), "hello world"); - EXPECT_EQ(empty_string.ToString(), ""); + EXPECT_EQ(string_literal.ToString(), "\"hello world\""); + EXPECT_EQ(empty_string.ToString(), "\"\""); } TEST(LiteralTest, StringComparison) { @@ -305,6 +305,18 @@ TEST(LiteralTest, BinaryComparison) { EXPECT_EQ(binary2 <=> binary1, std::partial_ordering::greater); } +TEST(LiteralTest, BinaryCastTo) { + std::vector data4 = {0x01, 0x02, 0x03, 0x04}; + auto binary_literal = Literal::Binary(data4); + + // Cast to Fixed with matching length + AssertCastSucceeds(binary_literal.CastTo(fixed(4)), TypeId::kFixed, data4); + + // Cast to Fixed with different length should fail + EXPECT_THAT(binary_literal.CastTo(fixed(5)), + IsError(ErrorKind::kInvalidArgument)); +} + // Fixed type tests TEST(LiteralTest, FixedBasics) { std::vector data = {0x01, 0x02, 0x03, 0xFF}; @@ -332,6 +344,20 @@ TEST(LiteralTest, FixedComparison) { EXPECT_EQ(fixed2 <=> fixed1, std::partial_ordering::greater); } +TEST(LiteralTest, FixedCastTo) { + std::vector data4 = {0x01, 0x02, 0x03, 0x04}; + auto fixed_literal = Literal::Fixed(data4); + + // Cast to Binary + AssertCastSucceeds(fixed_literal.CastTo(binary()), TypeId::kBinary, data4); + + // Cast to Fixed with same length + AssertCastSucceeds(fixed_literal.CastTo(fixed(4)), TypeId::kFixed, data4); + + // Cast to Fixed with different length should fail + EXPECT_THAT(fixed_literal.CastTo(fixed(5)), IsError(ErrorKind::kNotSupported)); +} + // Date type tests TEST(LiteralTest, DateBasics) { auto date_literal = Literal::Date(19489); // May 15, 2023 @@ -422,32 +448,6 @@ TEST(LiteralTest, TimestampTzComparison) { EXPECT_EQ(timestamptz2 <=> timestamptz1, std::partial_ordering::greater); } -TEST(LiteralTest, BinaryCastTo) { - std::vector data4 = {0x01, 0x02, 0x03, 0x04}; - auto binary_literal = Literal::Binary(data4); - - // Cast to Fixed with matching length - AssertCastSucceeds(binary_literal.CastTo(iceberg::fixed(4)), TypeId::kFixed, data4); - - // Cast to Fixed with different length should fail - EXPECT_THAT(binary_literal.CastTo(iceberg::fixed(5)), - IsError(ErrorKind::kInvalidArgument)); -} - -TEST(LiteralTest, FixedCastTo) { - std::vector data4 = {0x01, 0x02, 0x03, 0x04}; - auto fixed_literal = Literal::Fixed(data4); - - // Cast to Binary - AssertCastSucceeds(fixed_literal.CastTo(iceberg::binary()), TypeId::kBinary, data4); - - // Cast to Fixed with same length - AssertCastSucceeds(fixed_literal.CastTo(iceberg::fixed(4)), TypeId::kFixed, data4); - - // Cast to Fixed with different length should fail - EXPECT_THAT(fixed_literal.CastTo(iceberg::fixed(5)), IsError(ErrorKind::kNotSupported)); -} - // Cross-type comparison tests TEST(LiteralTest, CrossTypeComparison) { auto int_literal = Literal::Int(42); @@ -457,22 +457,18 @@ TEST(LiteralTest, CrossTypeComparison) { EXPECT_EQ(int_literal <=> string_literal, std::partial_ordering::unordered); } -// Special value tests -TEST(LiteralTest, SpecialValues) { +// Same type cast tests +TEST(LiteralTest, SameTypeCast) { auto int_literal = Literal::Int(42); - EXPECT_FALSE(int_literal.IsAboveMax()); - EXPECT_FALSE(int_literal.IsBelowMin()); + AssertCastSucceeds(int_literal.CastTo(int32()), TypeId::kInt, 42); } -// Same type cast test -TEST(LiteralTest, SameTypeCast) { +// Special value tests +TEST(LiteralTest, SpecialValues) { auto int_literal = Literal::Int(42); - - auto same_type_result = int_literal.CastTo(iceberg::int32()); - ASSERT_THAT(same_type_result, IsOk()); - EXPECT_EQ(same_type_result->type()->type_id(), TypeId::kInt); - EXPECT_EQ(same_type_result->ToString(), "42"); + EXPECT_FALSE(int_literal.IsAboveMax()); + EXPECT_FALSE(int_literal.IsBelowMin()); } // Float special values tests diff --git a/src/iceberg/transform_function.cc b/src/iceberg/transform_function.cc index b043c397..78ddc732 100644 --- a/src/iceberg/transform_function.cc +++ b/src/iceberg/transform_function.cc @@ -27,6 +27,7 @@ #include "iceberg/expression/literal.h" #include "iceberg/type.h" +#include "iceberg/util/int128.h" #include "iceberg/util/murmurhash3_internal.h" #include "iceberg/util/truncate_util.h" From 1663f19120f440404b48d2e1d729cc8f294f0780 Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Fri, 10 Oct 2025 11:53:26 +0800 Subject: [PATCH 4/7] fix --- src/iceberg/expression/literal.cc | 12 +++--------- src/iceberg/expression/literal.h | 2 +- src/iceberg/expression/predicate.cc | 7 +++---- src/iceberg/test/literal_test.cc | 24 +++++++++--------------- src/iceberg/transform_function.cc | 1 - 5 files changed, 16 insertions(+), 30 deletions(-) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index 001e909d..fb19456f 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -27,6 +27,7 @@ #include "iceberg/util/conversions.h" #include "iceberg/exception.h" #include "iceberg/type_fwd.h" +#include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" namespace iceberg { @@ -237,7 +238,7 @@ Result LiteralCaster::CastFromBinary( auto binary_val = std::get>(literal.value_); switch (target_type->type_id()) { case TypeId::kFixed: { - auto target_fixed_type = std::static_pointer_cast(target_type); + auto target_fixed_type = internal::checked_pointer_cast(target_type); if (binary_val.size() == target_fixed_type->length()) { return Literal::Fixed(std::move(binary_val)); } @@ -404,13 +405,6 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { } std::string Literal::ToString() const { - auto unsupported_error = [this]() { - return std::format("ToString not supported for type: {}", type_->ToString()); - }; - auto invalid_argument = [this]() { - return std::format("Invalid argument for type: {}", type_->ToString()); - }; - if (std::holds_alternative(value_)) { return "belowMin"; } @@ -460,7 +454,7 @@ std::string Literal::ToString() const { return std::to_string(std::get(value_)); } default: { - return unsupported_error(); + return std::format("invalid literal of type {}", type_->ToString()); } } } diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index aa868aef..c11d48f5 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -57,7 +57,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable { double, // for double std::string, // for string std::vector, // for binary, fixed - std::array, // for uuid + std::array, // for uuid and decimal BelowMin, AboveMax>; /// \brief Factory methods for primitive types diff --git a/src/iceberg/expression/predicate.cc b/src/iceberg/expression/predicate.cc index 144ef2b0..2ce04a16 100644 --- a/src/iceberg/expression/predicate.cc +++ b/src/iceberg/expression/predicate.cc @@ -100,12 +100,11 @@ std::string UnboundPredicate::ToString() const { return values_.size() == 1 ? std::format("{} != {}", term, values_[0]) : invalid_predicate_string(op); case Expression::Operation::kStartsWith: - return values_.size() == 1 ? std::format("{} startsWith \"{}\"", term, values_[0]) + return values_.size() == 1 ? std::format("{} startsWith {}", term, values_[0]) : invalid_predicate_string(op); case Expression::Operation::kNotStartsWith: - return values_.size() == 1 - ? std::format("{} notStartsWith \"{}\"", term, values_[0]) - : invalid_predicate_string(op); + return values_.size() == 1 ? std::format("{} notStartsWith {}", term, values_[0]) + : invalid_predicate_string(op); case Expression::Operation::kIn: return std::format("{} in {}", term, values_); case Expression::Operation::kNotIn: diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 9f3d83c8..8face909 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -38,8 +38,8 @@ template void AssertCastSucceeds(const Result& result, TypeId expected_type_id, const T& expected_value) { ASSERT_THAT(result, IsOk()); - EXPECT_EQ(result->type()->type_id(), expected_type_id); - ASSERT_NO_THROW(EXPECT_EQ(std::get(result->value()), expected_value)) + ASSERT_EQ(result->type()->type_id(), expected_type_id); + EXPECT_EQ(std::get(result->value()), expected_value) << "Type mismatch in std::get. Expected type for TypeId " << static_cast(expected_type_id); } @@ -145,16 +145,14 @@ TEST(LiteralTest, LongCastTo) { AssertCastSucceeds(long_literal.CastTo(date()), TypeId::kDate, 42); // Cast to Time - AssertCastSucceeds(long_literal.CastTo(time()), TypeId::kTime, - static_cast(42L)); + AssertCastSucceeds(long_literal.CastTo(time()), TypeId::kTime, int64_t{42}); // Cast to Timestamp - AssertCastSucceeds(long_literal.CastTo(timestamp()), TypeId::kTimestamp, - static_cast(42L)); + AssertCastSucceeds(long_literal.CastTo(timestamp()), TypeId::kTimestamp, int64_t{42}); // Cast to TimestampTz AssertCastSucceeds(long_literal.CastTo(timestamp_tz()), TypeId::kTimestampTz, - static_cast(42L)); + int64_t{42}); } TEST(LiteralTest, LongCastToOverflow) { @@ -207,8 +205,7 @@ TEST(LiteralTest, FloatCastTo) { auto float_literal = Literal::Float(2.0f); // Cast to Double - AssertCastSucceeds(float_literal.CastTo(float64()), TypeId::kDouble, - static_cast(2.0f)); + AssertCastSucceeds(float_literal.CastTo(float64()), TypeId::kDouble, double{2.0f}); } // Double type tests @@ -242,10 +239,8 @@ TEST(LiteralTest, DoubleCastTo) { TEST(LiteralTest, DoubleCastToOverflow) { // Test overflow cases for Double to Float - auto max_double = - Literal::Double(static_cast(std::numeric_limits::max()) * 2); - auto min_double = - Literal::Double(-static_cast(std::numeric_limits::max()) * 2); + auto max_double = Literal::Double(double{std::numeric_limits::max()} * 2); + auto min_double = Literal::Double(-double{std::numeric_limits::max()} * 2); auto max_result = max_double.CastTo(float32()); ASSERT_THAT(max_result, IsOk()); @@ -313,8 +308,7 @@ TEST(LiteralTest, BinaryCastTo) { AssertCastSucceeds(binary_literal.CastTo(fixed(4)), TypeId::kFixed, data4); // Cast to Fixed with different length should fail - EXPECT_THAT(binary_literal.CastTo(fixed(5)), - IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(binary_literal.CastTo(fixed(5)), IsError(ErrorKind::kInvalidArgument)); } // Fixed type tests diff --git a/src/iceberg/transform_function.cc b/src/iceberg/transform_function.cc index 78ddc732..b043c397 100644 --- a/src/iceberg/transform_function.cc +++ b/src/iceberg/transform_function.cc @@ -27,7 +27,6 @@ #include "iceberg/expression/literal.h" #include "iceberg/type.h" -#include "iceberg/util/int128.h" #include "iceberg/util/murmurhash3_internal.h" #include "iceberg/util/truncate_util.h" From 869b04565f32b3e47bafc7de6bb40d5ac2771dcb Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Fri, 10 Oct 2025 16:01:47 +0800 Subject: [PATCH 5/7] use parameterized test to use common code for testing --- src/iceberg/test/literal_test.cc | 608 +++++++++++++------------------ 1 file changed, 262 insertions(+), 346 deletions(-) diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 8face909..10a553e0 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -46,115 +46,71 @@ void AssertCastSucceeds(const Result& result, TypeId expected_type_id, } // namespace -// Boolean type tests -TEST(LiteralTest, BooleanBasics) { - auto true_literal = Literal::Boolean(true); - auto false_literal = Literal::Boolean(false); - - EXPECT_EQ(true_literal.type()->type_id(), TypeId::kBoolean); - EXPECT_EQ(false_literal.type()->type_id(), TypeId::kBoolean); - - EXPECT_EQ(true_literal.ToString(), "true"); - EXPECT_EQ(false_literal.ToString(), "false"); -} - -TEST(LiteralTest, BooleanComparison) { - auto true_literal = Literal::Boolean(true); - auto false_literal = Literal::Boolean(false); - auto another_true = Literal::Boolean(true); - - EXPECT_EQ(true_literal <=> another_true, std::partial_ordering::equivalent); - EXPECT_EQ(true_literal <=> false_literal, std::partial_ordering::greater); - EXPECT_EQ(false_literal <=> true_literal, std::partial_ordering::less); -} - -// Int type tests -TEST(LiteralTest, IntBasics) { - auto int_literal = Literal::Int(42); - auto negative_int = Literal::Int(-123); - - EXPECT_EQ(int_literal.type()->type_id(), TypeId::kInt); - EXPECT_EQ(negative_int.type()->type_id(), TypeId::kInt); +// Parameter struct for basic literal tests +struct BasicLiteralTestParam { + std::string test_name; + Literal literal; + TypeId expected_type_id; + std::string expected_string; +}; - EXPECT_EQ(int_literal.ToString(), "42"); - EXPECT_EQ(negative_int.ToString(), "-123"); -} +class BasicLiteralTest : public ::testing::TestWithParam {}; -TEST(LiteralTest, IntComparison) { - auto int1 = Literal::Int(10); - auto int2 = Literal::Int(20); - auto int3 = Literal::Int(10); +TEST_P(BasicLiteralTest, BasicsTest) { + const auto& param = GetParam(); - EXPECT_EQ(int1 <=> int3, std::partial_ordering::equivalent); - EXPECT_EQ(int1 <=> int2, std::partial_ordering::less); - EXPECT_EQ(int2 <=> int1, std::partial_ordering::greater); + EXPECT_EQ(param.literal.type()->type_id(), param.expected_type_id); + EXPECT_EQ(param.literal.ToString(), param.expected_string); } -TEST(LiteralTest, IntCastTo) { - auto int_literal = Literal::Int(42); - - // Cast to Long - AssertCastSucceeds(int_literal.CastTo(int64()), TypeId::kLong, - static_cast(42)); +// Parameter struct for comparison tests +struct ComparisonLiteralTestParam { + std::string test_name; + Literal small_literal; + Literal large_literal; + Literal equal_literal; // same as small_literal +}; - // Cast to Float - AssertCastSucceeds(int_literal.CastTo(float32()), TypeId::kFloat, 42.0f); +class ComparisonLiteralTest + : public ::testing::TestWithParam {}; - // Cast to Double - AssertCastSucceeds(int_literal.CastTo(float64()), TypeId::kDouble, 42.0); +TEST_P(ComparisonLiteralTest, ComparisonTest) { + const auto& param = GetParam(); - // Cast to Date - AssertCastSucceeds(int_literal.CastTo(date()), TypeId::kDate, 42); + EXPECT_EQ(param.small_literal <=> param.equal_literal, + std::partial_ordering::equivalent); + EXPECT_EQ(param.small_literal <=> param.large_literal, std::partial_ordering::less); + EXPECT_EQ(param.large_literal <=> param.small_literal, std::partial_ordering::greater); } -// Long type tests -TEST(LiteralTest, LongBasics) { - auto long_literal = Literal::Long(1234567890L); - auto negative_long = Literal::Long(-9876543210L); +// Parameter struct for cast tests +struct CastLiteralTestParam { + std::string test_name; + Literal source_literal; + std::shared_ptr target_type; + Literal expected_literal; +}; - EXPECT_EQ(long_literal.type()->type_id(), TypeId::kLong); - EXPECT_EQ(negative_long.type()->type_id(), TypeId::kLong); +class CastLiteralTest : public ::testing::TestWithParam {}; - EXPECT_EQ(long_literal.ToString(), "1234567890"); - EXPECT_EQ(negative_long.ToString(), "-9876543210"); -} - -TEST(LiteralTest, LongComparison) { - auto long1 = Literal::Long(100L); - auto long2 = Literal::Long(200L); - auto long3 = Literal::Long(100L); +TEST_P(CastLiteralTest, CastTest) { + const auto& param = GetParam(); + auto result = param.source_literal.CastTo(param.target_type); - EXPECT_EQ(long1 <=> long3, std::partial_ordering::equivalent); - EXPECT_EQ(long1 <=> long2, std::partial_ordering::less); - EXPECT_EQ(long2 <=> long1, std::partial_ordering::greater); + ASSERT_THAT(result, IsOk()); + EXPECT_EQ(*result, param.expected_literal); } -TEST(LiteralTest, LongCastTo) { - auto long_literal = Literal::Long(42L); - - // Cast to Int (within range) - AssertCastSucceeds(long_literal.CastTo(int32()), TypeId::kInt, 42); - - // Cast to Float - AssertCastSucceeds(long_literal.CastTo(float32()), TypeId::kFloat, 42.0f); - - // Cast to Double - AssertCastSucceeds(long_literal.CastTo(float64()), TypeId::kDouble, 42.0); - - // Cast to Date - AssertCastSucceeds(long_literal.CastTo(date()), TypeId::kDate, 42); - - // Cast to Time - AssertCastSucceeds(long_literal.CastTo(time()), TypeId::kTime, int64_t{42}); - - // Cast to Timestamp - AssertCastSucceeds(long_literal.CastTo(timestamp()), TypeId::kTimestamp, int64_t{42}); +// Cross-type comparison tests +TEST(LiteralTest, CrossTypeComparison) { + auto int_literal = Literal::Int(42); + auto string_literal = Literal::String("42"); - // Cast to TimestampTz - AssertCastSucceeds(long_literal.CastTo(timestamp_tz()), TypeId::kTimestampTz, - int64_t{42}); + // Different types should return unordered + EXPECT_EQ(int_literal <=> string_literal, std::partial_ordering::unordered); } +// Overflow tests TEST(LiteralTest, LongCastToOverflow) { // Test overflow cases auto max_long = @@ -179,64 +135,6 @@ TEST(LiteralTest, LongCastToOverflow) { EXPECT_TRUE(min_result->IsBelowMin()); } -// Float type tests -TEST(LiteralTest, FloatBasics) { - auto float_literal = Literal::Float(3.14f); - auto negative_float = Literal::Float(-2.71f); - - EXPECT_EQ(float_literal.type()->type_id(), TypeId::kFloat); - EXPECT_EQ(negative_float.type()->type_id(), TypeId::kFloat); - - EXPECT_EQ(float_literal.ToString(), "3.140000"); - EXPECT_EQ(negative_float.ToString(), "-2.710000"); -} - -TEST(LiteralTest, FloatComparison) { - auto float1 = Literal::Float(1.5f); - auto float2 = Literal::Float(2.5f); - auto float3 = Literal::Float(1.5f); - - EXPECT_EQ(float1 <=> float3, std::partial_ordering::equivalent); - EXPECT_EQ(float1 <=> float2, std::partial_ordering::less); - EXPECT_EQ(float2 <=> float1, std::partial_ordering::greater); -} - -TEST(LiteralTest, FloatCastTo) { - auto float_literal = Literal::Float(2.0f); - - // Cast to Double - AssertCastSucceeds(float_literal.CastTo(float64()), TypeId::kDouble, double{2.0f}); -} - -// Double type tests -TEST(LiteralTest, DoubleBasics) { - auto double_literal = Literal::Double(std::numbers::pi); - auto negative_double = Literal::Double(-std::numbers::e); - - EXPECT_EQ(double_literal.type()->type_id(), TypeId::kDouble); - EXPECT_EQ(negative_double.type()->type_id(), TypeId::kDouble); - - EXPECT_EQ(double_literal.ToString(), "3.141593"); - EXPECT_EQ(negative_double.ToString(), "-2.718282"); -} - -TEST(LiteralTest, DoubleComparison) { - auto double1 = Literal::Double(1.5); - auto double2 = Literal::Double(2.5); - auto double3 = Literal::Double(1.5); - - EXPECT_EQ(double1 <=> double3, std::partial_ordering::equivalent); - EXPECT_EQ(double1 <=> double2, std::partial_ordering::less); - EXPECT_EQ(double2 <=> double1, std::partial_ordering::greater); -} - -TEST(LiteralTest, DoubleCastTo) { - auto double_literal = Literal::Double(2.0); - - // Cast to Float - AssertCastSucceeds(double_literal.CastTo(float32()), TypeId::kFloat, 2.0f); -} - TEST(LiteralTest, DoubleCastToOverflow) { // Test overflow cases for Double to Float auto max_double = Literal::Double(double{std::numeric_limits::max()} * 2); @@ -251,213 +149,21 @@ TEST(LiteralTest, DoubleCastToOverflow) { EXPECT_TRUE(min_result->IsBelowMin()); } -// String type tests -TEST(LiteralTest, StringBasics) { - auto string_literal = Literal::String("hello world"); - auto empty_string = Literal::String(""); - - EXPECT_EQ(string_literal.type()->type_id(), TypeId::kString); - EXPECT_EQ(empty_string.type()->type_id(), TypeId::kString); - - EXPECT_EQ(string_literal.ToString(), "\"hello world\""); - EXPECT_EQ(empty_string.ToString(), "\"\""); -} - -TEST(LiteralTest, StringComparison) { - auto string1 = Literal::String("apple"); - auto string2 = Literal::String("banana"); - auto string3 = Literal::String("apple"); - - EXPECT_EQ(string1 <=> string3, std::partial_ordering::equivalent); - EXPECT_EQ(string1 <=> string2, std::partial_ordering::less); - EXPECT_EQ(string2 <=> string1, std::partial_ordering::greater); -} - -// Binary type tests -TEST(LiteralTest, BinaryBasics) { - std::vector data = {0x01, 0x02, 0x03, 0xFF}; +// Error cases for casts +TEST(LiteralTest, CastToError) { + std::vector data = {0x01, 0x02, 0x03, 0x04}; auto binary_literal = Literal::Binary(data); - auto empty_binary = Literal::Binary({}); - - EXPECT_EQ(binary_literal.type()->type_id(), TypeId::kBinary); - EXPECT_EQ(empty_binary.type()->type_id(), TypeId::kBinary); - - EXPECT_EQ(binary_literal.ToString(), "X'010203FF'"); - EXPECT_EQ(empty_binary.ToString(), "X''"); -} - -TEST(LiteralTest, BinaryComparison) { - std::vector data1 = {0x01, 0x02}; - std::vector data2 = {0x01, 0x03}; - std::vector data3 = {0x01, 0x02}; - - auto binary1 = Literal::Binary(data1); - auto binary2 = Literal::Binary(data2); - auto binary3 = Literal::Binary(data3); - - EXPECT_EQ(binary1 <=> binary3, std::partial_ordering::equivalent); - EXPECT_EQ(binary1 <=> binary2, std::partial_ordering::less); - EXPECT_EQ(binary2 <=> binary1, std::partial_ordering::greater); -} - -TEST(LiteralTest, BinaryCastTo) { - std::vector data4 = {0x01, 0x02, 0x03, 0x04}; - auto binary_literal = Literal::Binary(data4); - - // Cast to Fixed with matching length - AssertCastSucceeds(binary_literal.CastTo(fixed(4)), TypeId::kFixed, data4); // Cast to Fixed with different length should fail EXPECT_THAT(binary_literal.CastTo(fixed(5)), IsError(ErrorKind::kInvalidArgument)); -} -// Fixed type tests -TEST(LiteralTest, FixedBasics) { - std::vector data = {0x01, 0x02, 0x03, 0xFF}; + data = {0x01, 0x02, 0x03, 0x04}; auto fixed_literal = Literal::Fixed(data); - auto empty_fixed = Literal::Fixed({}); - - EXPECT_EQ(fixed_literal.type()->type_id(), TypeId::kFixed); - EXPECT_EQ(empty_fixed.type()->type_id(), TypeId::kFixed); - - EXPECT_EQ(fixed_literal.ToString(), "X'010203FF'"); - EXPECT_EQ(empty_fixed.ToString(), "X''"); -} - -TEST(LiteralTest, FixedComparison) { - std::vector data1 = {0x01, 0x02}; - std::vector data2 = {0x01, 0x03}; - std::vector data3 = {0x01, 0x02}; - - auto fixed1 = Literal::Fixed(data1); - auto fixed2 = Literal::Fixed(data2); - auto fixed3 = Literal::Fixed(data3); - - EXPECT_EQ(fixed1 <=> fixed3, std::partial_ordering::equivalent); - EXPECT_EQ(fixed1 <=> fixed2, std::partial_ordering::less); - EXPECT_EQ(fixed2 <=> fixed1, std::partial_ordering::greater); -} - -TEST(LiteralTest, FixedCastTo) { - std::vector data4 = {0x01, 0x02, 0x03, 0x04}; - auto fixed_literal = Literal::Fixed(data4); - - // Cast to Binary - AssertCastSucceeds(fixed_literal.CastTo(binary()), TypeId::kBinary, data4); - - // Cast to Fixed with same length - AssertCastSucceeds(fixed_literal.CastTo(fixed(4)), TypeId::kFixed, data4); // Cast to Fixed with different length should fail EXPECT_THAT(fixed_literal.CastTo(fixed(5)), IsError(ErrorKind::kNotSupported)); } -// Date type tests -TEST(LiteralTest, DateBasics) { - auto date_literal = Literal::Date(19489); // May 15, 2023 - auto negative_date = Literal::Date(-1); // December 31, 1969 - - EXPECT_EQ(date_literal.type()->type_id(), TypeId::kDate); - EXPECT_EQ(negative_date.type()->type_id(), TypeId::kDate); - - EXPECT_EQ(date_literal.ToString(), "19489"); - EXPECT_EQ(negative_date.ToString(), "-1"); -} - -TEST(LiteralTest, DateComparison) { - auto date1 = Literal::Date(100); - auto date2 = Literal::Date(200); - auto date3 = Literal::Date(100); - - EXPECT_EQ(date1 <=> date3, std::partial_ordering::equivalent); - EXPECT_EQ(date1 <=> date2, std::partial_ordering::less); - EXPECT_EQ(date2 <=> date1, std::partial_ordering::greater); -} - -// Time type tests -TEST(LiteralTest, TimeBasics) { - auto time_literal = Literal::Time(43200000000LL); // 12:00:00 in microseconds - auto midnight = Literal::Time(0LL); - - EXPECT_EQ(time_literal.type()->type_id(), TypeId::kTime); - EXPECT_EQ(midnight.type()->type_id(), TypeId::kTime); - - EXPECT_EQ(time_literal.ToString(), "43200000000"); - EXPECT_EQ(midnight.ToString(), "0"); -} - -TEST(LiteralTest, TimeComparison) { - auto time1 = Literal::Time(43200000000LL); // 12:00:00 - auto time2 = Literal::Time(86400000000LL); // 24:00:00 (invalid but for testing) - auto time3 = Literal::Time(43200000000LL); - - EXPECT_EQ(time1 <=> time3, std::partial_ordering::equivalent); - EXPECT_EQ(time1 <=> time2, std::partial_ordering::less); - EXPECT_EQ(time2 <=> time1, std::partial_ordering::greater); -} - -// Timestamp type tests -TEST(LiteralTest, TimestampBasics) { - auto timestamp_literal = - Literal::Timestamp(1684137600000000LL); // May 15, 2023 12:00:00 UTC - auto epoch = Literal::Timestamp(0LL); - - EXPECT_EQ(timestamp_literal.type()->type_id(), TypeId::kTimestamp); - EXPECT_EQ(epoch.type()->type_id(), TypeId::kTimestamp); - - EXPECT_EQ(timestamp_literal.ToString(), "1684137600000000"); - EXPECT_EQ(epoch.ToString(), "0"); -} - -TEST(LiteralTest, TimestampComparison) { - auto timestamp1 = Literal::Timestamp(1000000LL); - auto timestamp2 = Literal::Timestamp(2000000LL); - auto timestamp3 = Literal::Timestamp(1000000LL); - - EXPECT_EQ(timestamp1 <=> timestamp3, std::partial_ordering::equivalent); - EXPECT_EQ(timestamp1 <=> timestamp2, std::partial_ordering::less); - EXPECT_EQ(timestamp2 <=> timestamp1, std::partial_ordering::greater); -} - -// TimestampTz type tests -TEST(LiteralTest, TimestampTzBasics) { - auto timestamptz_literal = - Literal::TimestampTz(1684137600000000LL); // May 15, 2023 12:00:00 UTC - auto epoch = Literal::TimestampTz(0LL); - - EXPECT_EQ(timestamptz_literal.type()->type_id(), TypeId::kTimestampTz); - EXPECT_EQ(epoch.type()->type_id(), TypeId::kTimestampTz); - - EXPECT_EQ(timestamptz_literal.ToString(), "1684137600000000"); - EXPECT_EQ(epoch.ToString(), "0"); -} - -TEST(LiteralTest, TimestampTzComparison) { - auto timestamptz1 = Literal::TimestampTz(1000000LL); - auto timestamptz2 = Literal::TimestampTz(2000000LL); - auto timestamptz3 = Literal::TimestampTz(1000000LL); - - EXPECT_EQ(timestamptz1 <=> timestamptz3, std::partial_ordering::equivalent); - EXPECT_EQ(timestamptz1 <=> timestamptz2, std::partial_ordering::less); - EXPECT_EQ(timestamptz2 <=> timestamptz1, std::partial_ordering::greater); -} - -// Cross-type comparison tests -TEST(LiteralTest, CrossTypeComparison) { - auto int_literal = Literal::Int(42); - auto string_literal = Literal::String("42"); - - // Different types should return unordered - EXPECT_EQ(int_literal <=> string_literal, std::partial_ordering::unordered); -} - -// Same type cast tests -TEST(LiteralTest, SameTypeCast) { - auto int_literal = Literal::Int(42); - - AssertCastSucceeds(int_literal.CastTo(int32()), TypeId::kInt, 42); -} - // Special value tests TEST(LiteralTest, SpecialValues) { auto int_literal = Literal::Int(42); @@ -563,7 +269,6 @@ TEST(LiteralTest, DoubleZeroComparison) { auto neg_zero = Literal::Double(-0.0); auto pos_zero = Literal::Double(0.0); - // -0 should be less than +0 EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less); } @@ -770,5 +475,216 @@ TEST(LiteralSerDeTest, TypePromotion) { EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble); EXPECT_DOUBLE_EQ(std::get(double_result->value()), 1.0); } +// Instantiate parameterized tests + +INSTANTIATE_TEST_SUITE_P( + BasicLiteralTestCases, BasicLiteralTest, + ::testing::Values( + BasicLiteralTestParam{.test_name = "BooleanTrue", + .literal = Literal::Boolean(true), + .expected_type_id = TypeId::kBoolean, + .expected_string = "true"}, + BasicLiteralTestParam{.test_name = "BooleanFalse", + .literal = Literal::Boolean(false), + .expected_type_id = TypeId::kBoolean, + .expected_string = "false"}, + BasicLiteralTestParam{.test_name = "IntPositive", + .literal = Literal::Int(42), + .expected_type_id = TypeId::kInt, + .expected_string = "42"}, + BasicLiteralTestParam{.test_name = "IntNegative", + .literal = Literal::Int(-123), + .expected_type_id = TypeId::kInt, + .expected_string = "-123"}, + BasicLiteralTestParam{.test_name = "LongPositive", + .literal = Literal::Long(1234567890L), + .expected_type_id = TypeId::kLong, + .expected_string = "1234567890"}, + BasicLiteralTestParam{.test_name = "LongNegative", + .literal = Literal::Long(-9876543210L), + .expected_type_id = TypeId::kLong, + .expected_string = "-9876543210"}, + BasicLiteralTestParam{.test_name = "Float", + .literal = Literal::Float(3.14f), + .expected_type_id = TypeId::kFloat, + .expected_string = "3.140000"}, + BasicLiteralTestParam{.test_name = "Double", + .literal = Literal::Double(std::numbers::pi), + .expected_type_id = TypeId::kDouble, + .expected_string = "3.141593"}, + BasicLiteralTestParam{.test_name = "String", + .literal = Literal::String("hello world"), + .expected_type_id = TypeId::kString, + .expected_string = "\"hello world\""}, + BasicLiteralTestParam{ + .test_name = "Binary", + .literal = Literal::Binary(std::vector{0x01, 0x02, 0x03, 0xFF}), + .expected_type_id = TypeId::kBinary, + .expected_string = "X'010203FF'"}, + BasicLiteralTestParam{ + .test_name = "Fixed", + .literal = Literal::Fixed(std::vector{0x01, 0x02, 0x03, 0xFF}), + .expected_type_id = TypeId::kFixed, + .expected_string = "X'010203FF'"}, + BasicLiteralTestParam{.test_name = "Date", + .literal = Literal::Date(19489), + .expected_type_id = TypeId::kDate, + .expected_string = "19489"}, + BasicLiteralTestParam{.test_name = "Time", + .literal = Literal::Time(43200000000LL), + .expected_type_id = TypeId::kTime, + .expected_string = "43200000000"}, + BasicLiteralTestParam{.test_name = "Timestamp", + .literal = Literal::Timestamp(1684137600000000LL), + .expected_type_id = TypeId::kTimestamp, + .expected_string = "1684137600000000"}, + BasicLiteralTestParam{.test_name = "TimestampTz", + .literal = Literal::TimestampTz(1684137600000000LL), + .expected_type_id = TypeId::kTimestampTz, + .expected_string = "1684137600000000"}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +INSTANTIATE_TEST_SUITE_P( + ComparisonLiteralTestCases, ComparisonLiteralTest, + ::testing::Values( + ComparisonLiteralTestParam{.test_name = "Boolean", + .small_literal = Literal::Boolean(false), + .large_literal = Literal::Boolean(true), + .equal_literal = Literal::Boolean(false)}, + ComparisonLiteralTestParam{.test_name = "Int", + .small_literal = Literal::Int(10), + .large_literal = Literal::Int(20), + .equal_literal = Literal::Int(10)}, + ComparisonLiteralTestParam{.test_name = "Long", + .small_literal = Literal::Long(100L), + .large_literal = Literal::Long(200L), + .equal_literal = Literal::Long(100L)}, + ComparisonLiteralTestParam{.test_name = "Float", + .small_literal = Literal::Float(1.5f), + .large_literal = Literal::Float(2.5f), + .equal_literal = Literal::Float(1.5f)}, + ComparisonLiteralTestParam{.test_name = "Double", + .small_literal = Literal::Double(1.5), + .large_literal = Literal::Double(2.5), + .equal_literal = Literal::Double(1.5)}, + ComparisonLiteralTestParam{.test_name = "String", + .small_literal = Literal::String("apple"), + .large_literal = Literal::String("banana"), + .equal_literal = Literal::String("apple")}, + ComparisonLiteralTestParam{ + .test_name = "Binary", + .small_literal = Literal::Binary(std::vector{0x01, 0x02}), + .large_literal = Literal::Binary(std::vector{0x01, 0x03}), + .equal_literal = Literal::Binary(std::vector{0x01, 0x02})}, + ComparisonLiteralTestParam{ + .test_name = "Fixed", + .small_literal = Literal::Fixed(std::vector{0x01, 0x02}), + .large_literal = Literal::Fixed(std::vector{0x01, 0x03}), + .equal_literal = Literal::Fixed(std::vector{0x01, 0x02})}, + ComparisonLiteralTestParam{.test_name = "Date", + .small_literal = Literal::Date(100), + .large_literal = Literal::Date(200), + .equal_literal = Literal::Date(100)}, + ComparisonLiteralTestParam{.test_name = "Time", + .small_literal = Literal::Time(43200000000LL), + .large_literal = Literal::Time(86400000000LL), + .equal_literal = Literal::Time(43200000000LL)}, + ComparisonLiteralTestParam{.test_name = "Timestamp", + .small_literal = Literal::Timestamp(1000000LL), + .large_literal = Literal::Timestamp(2000000LL), + .equal_literal = Literal::Timestamp(1000000LL)}, + ComparisonLiteralTestParam{.test_name = "TimestampTz", + .small_literal = Literal::TimestampTz(1000000LL), + .large_literal = Literal::TimestampTz(2000000LL), + .equal_literal = Literal::TimestampTz(1000000LL)}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +INSTANTIATE_TEST_SUITE_P( + CastLiteralTestCases, CastLiteralTest, + ::testing::Values( + // Int cast tests + CastLiteralTestParam{.test_name = "IntToLong", + .source_literal = Literal::Int(42), + .target_type = int64(), + .expected_literal = Literal::Long(42L)}, + CastLiteralTestParam{.test_name = "IntToFloat", + .source_literal = Literal::Int(42), + .target_type = float32(), + .expected_literal = Literal::Float(42.0f)}, + CastLiteralTestParam{.test_name = "IntToDouble", + .source_literal = Literal::Int(42), + .target_type = float64(), + .expected_literal = Literal::Double(42.0)}, + CastLiteralTestParam{.test_name = "IntToDate", + .source_literal = Literal::Int(42), + .target_type = date(), + .expected_literal = Literal::Date(42)}, + // Long cast tests + CastLiteralTestParam{.test_name = "LongToInt", + .source_literal = Literal::Long(42L), + .target_type = int32(), + .expected_literal = Literal::Int(42)}, + CastLiteralTestParam{.test_name = "LongToFloat", + .source_literal = Literal::Long(42L), + .target_type = float32(), + .expected_literal = Literal::Float(42.0f)}, + CastLiteralTestParam{.test_name = "LongToDouble", + .source_literal = Literal::Long(42L), + .target_type = float64(), + .expected_literal = Literal::Double(42.0)}, + CastLiteralTestParam{.test_name = "LongToTime", + .source_literal = Literal::Long(42L), + .target_type = time(), + .expected_literal = Literal::Time(42L)}, + CastLiteralTestParam{.test_name = "LongToTimestamp", + .source_literal = Literal::Long(42L), + .target_type = timestamp(), + .expected_literal = Literal::Timestamp(42L)}, + CastLiteralTestParam{.test_name = "LongToTimestampTz", + .source_literal = Literal::Long(42L), + .target_type = timestamp_tz(), + .expected_literal = Literal::TimestampTz(42L)}, + // Float cast tests + CastLiteralTestParam{.test_name = "FloatToDouble", + .source_literal = Literal::Float(2.0f), + .target_type = float64(), + .expected_literal = Literal::Double(double{2.0f})}, + // Double cast tests + CastLiteralTestParam{.test_name = "DoubleToFloat", + .source_literal = Literal::Double(2.0), + .target_type = float32(), + .expected_literal = Literal::Float(2.0f)}, + // Binary cast tests + CastLiteralTestParam{.test_name = "BinaryToFixed", + .source_literal = Literal::Binary(std::vector{ + 0x01, 0x02, 0x03, 0x04}), + .target_type = fixed(4), + .expected_literal = Literal::Fixed(std::vector{ + 0x01, 0x02, 0x03, 0x04})}, + // Fixed cast tests + CastLiteralTestParam{.test_name = "FixedToBinary", + .source_literal = Literal::Fixed(std::vector{ + 0x01, 0x02, 0x03, 0x04}), + .target_type = binary(), + .expected_literal = Literal::Binary(std::vector{ + 0x01, 0x02, 0x03, 0x04})}, + CastLiteralTestParam{.test_name = "FixedToFixed", + .source_literal = Literal::Fixed(std::vector{ + 0x01, 0x02, 0x03, 0x04}), + .target_type = fixed(4), + .expected_literal = Literal::Fixed(std::vector{ + 0x01, 0x02, 0x03, 0x04})}, + // Same type cast test + CastLiteralTestParam{.test_name = "IntToInt", + .source_literal = Literal::Int(42), + .target_type = int32(), + .expected_literal = Literal::Int(42)}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); } // namespace iceberg From f703643c8a20fa5d267ac819e22ec41ad419708c Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Fri, 10 Oct 2025 16:27:04 +0800 Subject: [PATCH 6/7] remove --- src/iceberg/test/literal_test.cc | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 10a553e0..5fc6cdbb 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -30,22 +30,6 @@ namespace iceberg { -namespace { - -// Helper function to assert that a CastTo operation succeeds and checks -// the resulting type and value. -template -void AssertCastSucceeds(const Result& result, TypeId expected_type_id, - const T& expected_value) { - ASSERT_THAT(result, IsOk()); - ASSERT_EQ(result->type()->type_id(), expected_type_id); - EXPECT_EQ(std::get(result->value()), expected_value) - << "Type mismatch in std::get. Expected type for TypeId " - << static_cast(expected_type_id); -} - -} // namespace - // Parameter struct for basic literal tests struct BasicLiteralTestParam { std::string test_name; From b10b3d9aa65273f20c0736faefab71a13561096e Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Sat, 11 Oct 2025 10:46:06 +0800 Subject: [PATCH 7/7] add comment --- src/iceberg/expression/literal.cc | 4 +--- src/iceberg/test/literal_test.cc | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index fb19456f..5992b298 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -24,11 +24,9 @@ #include #include -#include "iceberg/util/conversions.h" -#include "iceberg/exception.h" #include "iceberg/type_fwd.h" #include "iceberg/util/checked_cast.h" -#include "iceberg/util/macros.h" +#include "iceberg/util/conversions.h" namespace iceberg { diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 5fc6cdbb..f6309c66 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -256,6 +256,7 @@ TEST(LiteralTest, DoubleZeroComparison) { EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less); } +// Parameter struct for literal serialization and deserialization tests struct LiteralParam { std::string test_name; std::vector serialized;