Skip to content

Commit e7fadf6

Browse files
committed
Fix count(null) and count(distinct null)
Use `logical_nulls` when the array data type is `Null`.
1 parent 95ba48b commit e7fadf6

File tree

3 files changed

+60
-18
lines changed

3 files changed

+60
-18
lines changed

datafusion/physical-expr/src/aggregate/count.rs

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,20 @@ impl GroupsAccumulator for CountGroupsAccumulator {
121121
// Add one to each group's counter for each non null, non
122122
// filtered value
123123
self.counts.resize(total_num_groups, 0);
124-
accumulate_indices(
125-
group_indices,
126-
values.nulls(), // ignore values
127-
opt_filter,
128-
|group_index| {
129-
self.counts[group_index] += 1;
130-
},
131-
);
124+
let index_fn = |group_index| {
125+
self.counts[group_index] += 1;
126+
};
127+
128+
if values.data_type() == &DataType::Null {
129+
accumulate_indices(
130+
group_indices,
131+
values.logical_nulls().as_ref(),
132+
opt_filter,
133+
index_fn,
134+
);
135+
} else {
136+
accumulate_indices(group_indices, values.nulls(), opt_filter, index_fn);
137+
}
132138

133139
Ok(())
134140
}
@@ -195,19 +201,35 @@ impl GroupsAccumulator for CountGroupsAccumulator {
195201
/// count null values for multiple columns
196202
/// for each row if one column value is null, then null_count + 1
197203
fn null_count_for_multiple_cols(values: &[ArrayRef]) -> usize {
204+
fn and_nulls(acc: BooleanBuffer, arr: &ArrayRef) -> BooleanBuffer {
205+
if arr.data_type() == &DataType::Null {
206+
if let Some(nulls) = arr.logical_nulls() {
207+
return acc.bitand(nulls.inner());
208+
}
209+
} else if let Some(nulls) = arr.nulls() {
210+
return acc.bitand(nulls.inner());
211+
}
212+
213+
acc
214+
}
215+
198216
if values.len() > 1 {
199-
let result_bool_buf: Option<BooleanBuffer> = values
200-
.iter()
201-
.map(|a| a.nulls())
202-
.fold(None, |acc, b| match (acc, b) {
203-
(Some(acc), Some(b)) => Some(acc.bitand(b.inner())),
204-
(Some(acc), None) => Some(acc),
205-
(None, Some(b)) => Some(b.inner().clone()),
206-
_ => None,
217+
let result_bool_buf: Option<BooleanBuffer> =
218+
values.iter().fold(None, |acc, arr| {
219+
Some(if let Some(acc) = acc {
220+
and_nulls(acc, arr)
221+
} else {
222+
arr.logical_nulls()?.into_inner()
223+
})
207224
});
208225
result_bool_buf.map_or(0, |b| values[0].len() - b.count_set_bits())
209226
} else {
210-
values[0].null_count()
227+
let values = &values[0];
228+
if values.data_type() == &DataType::Null {
229+
values.len()
230+
} else {
231+
values.null_count()
232+
}
211233
}
212234
}
213235

datafusion/physical-expr/src/aggregate/count_distinct.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,12 @@ impl Accumulator for DistinctCountAccumulator {
152152
if values.is_empty() {
153153
return Ok(());
154154
}
155+
155156
let arr = &values[0];
157+
if arr.data_type() == &DataType::Null {
158+
return Ok(());
159+
}
160+
156161
(0..arr.len()).try_for_each(|index| {
157162
if !arr.is_null(index) {
158163
let scalar = ScalarValue::try_from_array(arr, index)?;

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1492,6 +1492,12 @@ SELECT count(c1, c2) FROM test
14921492
----
14931493
3
14941494

1495+
# count_null
1496+
query III
1497+
SELECT count(null), count(null, null), count(distinct null) FROM test
1498+
----
1499+
0 0 0
1500+
14951501
# count_multi_expr_group_by
14961502
query I
14971503
SELECT count(c1, c2) FROM test group by c1 order by c1
@@ -1501,6 +1507,15 @@ SELECT count(c1, c2) FROM test group by c1 order by c1
15011507
2
15021508
0
15031509

1510+
# count_null_group_by
1511+
query III
1512+
SELECT count(null), count(null, null), count(distinct null) FROM test group by c1 order by c1
1513+
----
1514+
0 0 0
1515+
0 0 0
1516+
0 0 0
1517+
0 0 0
1518+
15041519
# aggreggte_with_alias
15051520
query II
15061521
select c1, sum(c2) as `Total Salary` from test group by c1 order by c1
@@ -3241,4 +3256,4 @@ select count(*) from (select count(*) from (select 1));
32413256
query I
32423257
select count(*) from (select count(*) a, count(*) b from (select 1));
32433258
----
3244-
1
3259+
1

0 commit comments

Comments
 (0)