From 3eb725e2c8d02d4c1d9e52eca088c209e8cb9733 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 5 Jun 2025 13:37:12 +0800 Subject: [PATCH 1/2] fix: NaN semantics --- .../aggregates/group_values/multi_group_by/primitive.rs | 3 ++- datafusion/sqllogictest/test_files/aggregate.slt | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 22d5987380a8..afec25fd3d66 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -17,6 +17,7 @@ use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; +use arrow::array::ArrowNativeTypeOp; use arrow::array::{cast::AsArray, Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; @@ -121,7 +122,7 @@ impl GroupColumn // Otherwise, we need to check their values } - *equal_to_result = self.group_values[lhs_row] == array.value(rhs_row); + *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row)); } } diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 52b1e1c22fdf..0b8af2f13c99 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -7031,3 +7031,12 @@ VALUES ); ---- {a: 1, b: 2, c: 3} {a: 1, b: 2, c: 4} + +query TI +SELECT column1, COUNT(DISTINCT column2) FROM ( +VALUES + ('X', arrow_cast('NAN','Float64')), + ('X', arrow_cast('NAN','Float64')) +) GROUP BY 1 ORDER BY 1; +---- +x 1 From cd49e6f8ff56129f5f13bef2ca62f601172fe353 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 5 Jun 2025 13:38:08 +0800 Subject: [PATCH 2/2] update test --- datafusion/sqllogictest/test_files/aggregate.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 0b8af2f13c99..050305582ce0 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -7035,8 +7035,8 @@ VALUES query TI SELECT column1, COUNT(DISTINCT column2) FROM ( VALUES - ('X', arrow_cast('NAN','Float64')), - ('X', arrow_cast('NAN','Float64')) + ('x', arrow_cast('NAN','Float64')), + ('x', arrow_cast('NAN','Float64')) ) GROUP BY 1 ORDER BY 1; ---- x 1