Skip to content

Commit 4b320e3

Browse files
committed
Use DataFusion repo
1 parent 7c96fa4 commit 4b320e3

File tree

17 files changed

+195
-181
lines changed

17 files changed

+195
-181
lines changed

core/Cargo.lock

Lines changed: 112 additions & 59 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/Cargo.toml

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ include = [
2929

3030
[dependencies]
3131
parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate
32-
arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb", features = ["prettyprint", "ffi", "chrono-tz"] }
33-
arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" }
34-
arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" }
35-
arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" }
36-
arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb" }
37-
parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "d176dbb", default-features = false, features = ["experimental"] }
38-
half = { version = "~2.1", default-features = false }
32+
arrow = { version = "51.0.0", features = ["prettyprint", "ffi", "chrono-tz"] }
33+
arrow-array = { version = "51.0.0" }
34+
arrow-data = { version = "51.0.0" }
35+
arrow-schema = { version = "51.0.0" }
36+
arrow-string = { version = "51.0.0" }
37+
parquet = { version = "51.0.0", default-features = false, features = ["experimental"] }
38+
half = { version = "2.4.1", default-features = false }
3939
futures = "0.3.28"
4040
mimalloc = { version = "*", default-features = false, optional = true }
4141
tokio = { version = "1", features = ["rt-multi-thread"] }
@@ -66,10 +66,12 @@ itertools = "0.11.0"
6666
chrono = { version = "0.4", default-features = false, features = ["clock"] }
6767
chrono-tz = { version = "0.8" }
6868
paste = "1.0.14"
69-
datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81" }
70-
datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", features = ["unicode_expressions", "crypto_expressions"] }
71-
datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", features = ["crypto_expressions"] }
72-
datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "82b6b81", default-features = false, features = ["unicode_expressions"] }
69+
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc" }
70+
datafusion = { default-features = false, git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", features = ["unicode_expressions", "crypto_expressions"] }
71+
datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", features = ["crypto_expressions"] }
72+
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false }
73+
datafusion-physical-expr-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false }
74+
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ad2b1dc", default-features = false }
7375
unicode-segmentation = "^1.10.1"
7476
once_cell = "1.18.0"
7577
regex = "1.9.6"

core/src/execution/datafusion/expressions/bitwise_not.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ impl PhysicalExpr for BitwiseNotExpr {
105105
}
106106
}
107107

108-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
109-
vec![self.arg.clone()]
108+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
109+
vec![&self.arg]
110110
}
111111

112112
fn with_new_children(

core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ impl PhysicalExpr for BloomFilterMightContain {
129129
})
130130
}
131131

132-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
133-
vec![self.bloom_filter_expr.clone(), self.value_expr.clone()]
132+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
133+
vec![&self.bloom_filter_expr, &self.value_expr]
134134
}
135135

136136
fn with_new_children(

core/src/execution/datafusion/expressions/cast.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,8 +1291,8 @@ impl PhysicalExpr for Cast {
12911291
}
12921292
}
12931293

1294-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
1295-
vec![self.child.clone()]
1294+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
1295+
vec![&self.child]
12961296
}
12971297

12981298
fn with_new_children(

core/src/execution/datafusion/expressions/checkoverflow.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ impl PhysicalExpr for CheckOverflow {
165165
}
166166
}
167167

168-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
169-
vec![self.child.clone()]
168+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
169+
vec![&self.child]
170170
}
171171

172172
fn with_new_children(

core/src/execution/datafusion/expressions/if_expr.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,8 @@ impl PhysicalExpr for IfExpr {
110110
Ok(ColumnarValue::Array(current_value))
111111
}
112112

113-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
114-
vec![
115-
self.if_expr.clone(),
116-
self.true_expr.clone(),
117-
self.false_expr.clone(),
118-
]
113+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
114+
vec![&self.if_expr, &self.true_expr, &self.false_expr]
119115
}
120116

121117
fn with_new_children(
@@ -225,8 +221,8 @@ mod tests {
225221
let true_expr = lit(123i32);
226222
let false_expr = lit(999i32);
227223

228-
let expr = if_fn(if_expr, true_expr, false_expr);
229-
let children = expr.unwrap().children();
224+
let expr = if_fn(if_expr, true_expr, false_expr).unwrap();
225+
let children = expr.children();
230226
assert_eq!(children.len(), 3);
231227
assert_eq!(children[0].to_string(), "true");
232228
assert_eq!(children[1].to_string(), "123");

core/src/execution/datafusion/expressions/normalize_nan.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ impl PhysicalExpr for NormalizeNaNAndZero {
7777
}
7878
}
7979

80-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
80+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
8181
self.child.children()
8282
}
8383

core/src/execution/datafusion/expressions/scalar_funcs.rs

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use std::{
1919
any::Any,
2020
cmp::min,
2121
fmt::{Debug, Write},
22-
str::FromStr,
2322
sync::Arc,
2423
};
2524

@@ -35,17 +34,15 @@ use arrow_array::{Array, ArrowNativeTypeOp, Decimal128Array, StringArray};
3534
use arrow_schema::DataType;
3635
use datafusion::{
3736
execution::FunctionRegistry,
38-
logical_expr::{
39-
BuiltinScalarFunction, ScalarFunctionDefinition, ScalarFunctionImplementation,
40-
ScalarUDFImpl, Signature, Volatility,
41-
},
37+
functions::math::round::round,
38+
logical_expr::{ScalarFunctionImplementation, ScalarUDFImpl, Signature, Volatility},
4239
physical_plan::ColumnarValue,
4340
};
4441
use datafusion_common::{
4542
cast::{as_binary_array, as_generic_string_array},
4643
exec_err, internal_err, DataFusionError, Result as DataFusionResult, ScalarValue,
4744
};
48-
use datafusion_physical_expr::{math_expressions, udf::ScalarUDF};
45+
use datafusion_expr::ScalarUDF;
4946
use num::{
5047
integer::{div_ceil, div_floor},
5148
BigInt, Signed, ToPrimitive,
@@ -63,9 +60,7 @@ macro_rules! make_comet_scalar_udf {
6360
$data_type.clone(),
6461
Arc::new(move |args| $func(args, &$data_type)),
6562
);
66-
Ok(ScalarFunctionDefinition::UDF(Arc::new(
67-
ScalarUDF::new_from_impl(scalar_func),
68-
)))
63+
Ok(Arc::new(ScalarUDF::new_from_impl(scalar_func)))
6964
}};
7065
($name:expr, $func:expr, without $data_type:ident) => {{
7166
let scalar_func = CometScalarFunction::new(
@@ -74,9 +69,7 @@ macro_rules! make_comet_scalar_udf {
7469
$data_type,
7570
$func,
7671
);
77-
Ok(ScalarFunctionDefinition::UDF(Arc::new(
78-
ScalarUDF::new_from_impl(scalar_func),
79-
)))
72+
Ok(Arc::new(ScalarUDF::new_from_impl(scalar_func)))
8073
}};
8174
}
8275

@@ -85,7 +78,7 @@ pub fn create_comet_physical_fun(
8578
fun_name: &str,
8679
data_type: DataType,
8780
registry: &dyn FunctionRegistry,
88-
) -> Result<ScalarFunctionDefinition, DataFusionError> {
81+
) -> Result<Arc<ScalarUDF>, DataFusionError> {
8982
let sha2_functions = ["sha224", "sha256", "sha384", "sha512"];
9083
match fun_name {
9184
"ceil" => {
@@ -129,13 +122,11 @@ pub fn create_comet_physical_fun(
129122
let spark_func_name = "spark".to_owned() + sha;
130123
make_comet_scalar_udf!(spark_func_name, wrapped_func, without data_type)
131124
}
132-
_ => {
133-
if let Ok(fun) = BuiltinScalarFunction::from_str(fun_name) {
134-
Ok(ScalarFunctionDefinition::BuiltIn(fun))
135-
} else {
136-
Ok(ScalarFunctionDefinition::UDF(registry.udf(fun_name)?))
137-
}
138-
}
125+
_ => registry.udf(fun_name).map_err(|e| {
126+
DataFusionError::Execution(format!(
127+
"Function {fun_name} not found in the registry: {e}",
128+
))
129+
}),
139130
}
140131
}
141132

@@ -498,9 +489,7 @@ fn spark_round(
498489
make_decimal_array(array, precision, scale, &f)
499490
}
500491
DataType::Float32 | DataType::Float64 => {
501-
Ok(ColumnarValue::Array(math_expressions::round(&[
502-
array.clone()
503-
])?))
492+
Ok(ColumnarValue::Array(round(&[array.clone()])?))
504493
}
505494
dt => exec_err!("Not supported datatype for ROUND: {dt}"),
506495
},
@@ -523,7 +512,7 @@ fn spark_round(
523512
make_decimal_scalar(a, precision, scale, &f)
524513
}
525514
ScalarValue::Float32(_) | ScalarValue::Float64(_) => Ok(ColumnarValue::Scalar(
526-
ScalarValue::try_from_array(&math_expressions::round(&[a.to_array()?])?, 0)?,
515+
ScalarValue::try_from_array(&round(&[a.to_array()?])?, 0)?,
527516
)),
528517
dt => exec_err!("Not supported datatype for ROUND: {dt}"),
529518
},

core/src/execution/datafusion/expressions/strings.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ macro_rules! make_predicate_function {
111111
Ok(ColumnarValue::Array(Arc::new(array)))
112112
}
113113

114-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
115-
vec![self.left.clone(), self.right.clone()]
114+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
115+
vec![&self.left, &self.right]
116116
}
117117

118118
fn with_new_children(
@@ -221,8 +221,8 @@ impl PhysicalExpr for SubstringExec {
221221
}
222222
}
223223

224-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
225-
vec![self.child.clone()]
224+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
225+
vec![&self.child]
226226
}
227227

228228
fn with_new_children(
@@ -286,8 +286,8 @@ impl PhysicalExpr for StringSpaceExec {
286286
}
287287
}
288288

289-
fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
290-
vec![self.child.clone()]
289+
fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
290+
vec![&self.child]
291291
}
292292

293293
fn with_new_children(

0 commit comments

Comments
 (0)