Skip to content

Commit 754c20d

Browse files
committed
feat: support Binary for min/max and regrep_replace
1 parent 6aa423b commit 754c20d

File tree

6 files changed

+33
-9
lines changed

6 files changed

+33
-9
lines changed

datafusion/expr/src/aggregate_function.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ impl AggregateFunction {
342342
.chain(TIMESTAMPS.iter())
343343
.chain(DATES.iter())
344344
.chain(TIMES.iter())
345+
.chain(BINARYS.iter())
345346
.cloned()
346347
.collect::<Vec<_>>();
347348
Signature::uniform(1, valid, Volatility::Immutable)

datafusion/expr/src/built_in_function.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,13 @@ impl BuiltinScalarFunction {
10861086
BuiltinScalarFunction::RegexpReplace => Signature::one_of(
10871087
vec![
10881088
Exact(vec![Utf8, Utf8, Utf8]),
1089+
Exact(vec![Binary, Utf8, Utf8]),
1090+
Exact(vec![Utf8, Binary, Utf8]),
1091+
Exact(vec![Utf8, Utf8, Binary]),
1092+
Exact(vec![Binary, Binary, Utf8]),
1093+
Exact(vec![Binary, Utf8, Binary]),
1094+
Exact(vec![Utf8, Binary, Binary]),
1095+
Exact(vec![Binary, Binary, Binary]),
10891096
Exact(vec![Utf8, Utf8, Utf8, Utf8]),
10901097
],
10911098
self.volatility(),
@@ -1392,8 +1399,8 @@ macro_rules! make_utf8_to_return_type {
13921399
($FUNC:ident, $largeUtf8Type:expr, $utf8Type:expr) => {
13931400
fn $FUNC(arg_type: &DataType, name: &str) -> Result<DataType> {
13941401
Ok(match arg_type {
1395-
DataType::LargeUtf8 => $largeUtf8Type,
1396-
DataType::Utf8 => $utf8Type,
1402+
DataType::LargeUtf8 | DataType::LargeBinary=> $largeUtf8Type,
1403+
DataType::Utf8 | DataType::Binary => $utf8Type,
13971404
DataType::Null => DataType::Null,
13981405
DataType::Dictionary(_, value_type) => match **value_type {
13991406
DataType::LargeUtf8 => $largeUtf8Type,

datafusion/expr/src/type_coercion/aggregates.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ pub static TIMESTAMPS: &[DataType] = &[
7575

7676
pub static DATES: &[DataType] = &[DataType::Date32, DataType::Date64];
7777

78+
pub static BINARYS: &[DataType] = &[DataType::Binary, DataType::LargeBinary];
79+
7880
pub static TIMES: &[DataType] = &[
7981
DataType::Time32(TimeUnit::Second),
8082
DataType::Time32(TimeUnit::Millisecond),

datafusion/physical-expr/src/aggregate/min_max.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ use arrow::datatypes::{
3131
};
3232
use arrow::{
3333
array::{
34-
ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array,
35-
Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
36-
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
37-
Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
38-
TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
39-
UInt64Array, UInt8Array,
34+
ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Float32Array,
35+
Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
36+
LargeStringArray, StringArray, Time32MillisecondArray, Time32SecondArray,
37+
Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
38+
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
39+
UInt16Array, UInt32Array, UInt64Array, UInt8Array,
4040
},
4141
datatypes::Field,
4242
};

datafusion/physical-expr/src/functions.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,21 @@ pub fn create_physical_fun(
690690
let func = specializer_func(args)?;
691691
func(args)
692692
}
693+
DataType::Binary => {
694+
let args: Vec<ColumnarValue> = args
695+
.iter()
696+
.map(|col_value| {
697+
cast_column(col_value, &DataType::Utf8, None).unwrap()
698+
})
699+
.collect();
700+
let specializer_func = invoke_on_columnar_value_if_regex_expressions_feature_flag!(
701+
specialize_regexp_replace,
702+
i32,
703+
"regexp_replace"
704+
);
705+
let func = specializer_func(&args)?;
706+
func(&args)
707+
}
693708
other => internal_err!(
694709
"Unsupported data type {other:?} for function regexp_replace"
695710
),

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1976,7 +1976,6 @@ SELECT
19761976
count(largebinary)
19771977
FROM t
19781978

1979-
19801979
# with groupby
19811980
query error DataFusion error: External error: Internal error: Min/Max accumulator not implemented for type Binary\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
19821981
SELECT

0 commit comments

Comments
 (0)