-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Closed
Labels
documentationImprovements or additions to documentationImprovements or additions to documentationenhancementNew feature or requestNew feature or request
Description
Is your feature request related to a problem or challenge?
By default DataFusion doesn't support many operators, for example ->
> set datafusion.sql_parser.dialect = postgres;
0 row(s) fetched.
Elapsed 0.001 seconds.
> select 'foo'->'bar';
This feature is not implemented: Unsupported SQL binary operator ArrowHowever, you can use the ExprPlanner API to support htem
- There is a lower level API that lets you customize how expressions are planned, here: https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.ExprPlanner.html
- https://docs.rs/datafusion/latest/datafusion/execution/trait.FunctionRegistry.html#method.register_expr_planner
However, when @dadepo asked about this in #15264 it was not clear it was supported
After some digging I found an example in our tests:
datafusion/datafusion/core/tests/user_defined/expr_planner.rs
Lines 35 to 124 in 87eec43
| impl ExprPlanner for MyCustomPlanner { | |
| fn plan_binary_op( | |
| &self, | |
| expr: RawBinaryExpr, | |
| _schema: &DFSchema, | |
| ) -> Result<PlannerResult<RawBinaryExpr>> { | |
| match &expr.op { | |
| BinaryOperator::Arrow => { | |
| Ok(PlannerResult::Planned(Expr::BinaryExpr(BinaryExpr { | |
| left: Box::new(expr.left.clone()), | |
| right: Box::new(expr.right.clone()), | |
| op: Operator::StringConcat, | |
| }))) | |
| } | |
| BinaryOperator::LongArrow => { | |
| Ok(PlannerResult::Planned(Expr::BinaryExpr(BinaryExpr { | |
| left: Box::new(expr.left.clone()), | |
| right: Box::new(expr.right.clone()), | |
| op: Operator::Plus, | |
| }))) | |
| } | |
| BinaryOperator::Question => { | |
| Ok(PlannerResult::Planned(Expr::Alias(Alias::new( | |
| Expr::Literal(ScalarValue::Boolean(Some(true))), | |
| None::<&str>, | |
| format!("{} ? {}", expr.left, expr.right), | |
| )))) | |
| } | |
| _ => Ok(PlannerResult::Original(expr)), | |
| } | |
| } | |
| } | |
| async fn plan_and_collect(sql: &str) -> Result<Vec<RecordBatch>> { | |
| let config = | |
| SessionConfig::new().set_str("datafusion.sql_parser.dialect", "postgres"); | |
| let mut ctx = SessionContext::new_with_config(config); | |
| ctx.register_expr_planner(Arc::new(MyCustomPlanner))?; | |
| ctx.sql(sql).await?.collect().await | |
| } | |
| #[tokio::test] | |
| async fn test_custom_operators_arrow() { | |
| let actual = plan_and_collect("select 'foo'->'bar';").await.unwrap(); | |
| let expected = [ | |
| "+----------------------------+", | |
| "| Utf8(\"foo\") || Utf8(\"bar\") |", | |
| "+----------------------------+", | |
| "| foobar |", | |
| "+----------------------------+", | |
| ]; | |
| assert_batches_eq!(&expected, &actual); | |
| } | |
| #[tokio::test] | |
| async fn test_custom_operators_long_arrow() { | |
| let actual = plan_and_collect("select 1->>2;").await.unwrap(); | |
| let expected = [ | |
| "+---------------------+", | |
| "| Int64(1) + Int64(2) |", | |
| "+---------------------+", | |
| "| 3 |", | |
| "+---------------------+", | |
| ]; | |
| assert_batches_eq!(&expected, &actual); | |
| } | |
| #[tokio::test] | |
| async fn test_question_select() { | |
| let actual = plan_and_collect("select a ? 2 from (select 1 as a);") | |
| .await | |
| .unwrap(); | |
| let expected = [ | |
| "+--------------+", | |
| "| a ? Int64(2) |", | |
| "+--------------+", | |
| "| true |", | |
| "+--------------+", | |
| ]; | |
| assert_batches_eq!(&expected, &actual); | |
| } | |
| #[tokio::test] | |
| async fn test_question_filter() { | |
| let actual = plan_and_collect("select a from (select 1 as a) where a ? 2;") | |
| .await | |
| .unwrap(); | |
| let expected = ["+---+", "| a |", "+---+", "| 1 |", "+---+"]; | |
| assert_batches_eq!(&expected, &actual); | |
| } |
Describe the solution you'd like
I would like a secton in in the docs that has an example of how to do this
Perhaps in this page: https://datafusion.apache.org/library-user-guide/adding-udfs.html
Or maybe it needs its own page 🤔
Describe alternatives you've considered
No response
Additional context
No response
Metadata
Metadata
Assignees
Labels
documentationImprovements or additions to documentationImprovements or additions to documentationenhancementNew feature or requestNew feature or request