-
Couldn't load subscription status.
- Fork 1.7k
Move parts of InListSimplifier simplify rules to Simplifier
#9628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ | |
|
|
||
| //! Expression simplification API | ||
|
|
||
| use std::borrow::Cow; | ||
| use std::collections::HashSet; | ||
| use std::ops::Not; | ||
|
|
||
| use super::inlist_simplifier::{InListSimplifier, ShortenInListSimplifier}; | ||
|
|
@@ -38,10 +40,11 @@ use datafusion_common::{ | |
| use datafusion_common::{ | ||
| internal_err, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, | ||
| }; | ||
| use datafusion_expr::expr::{InList, InSubquery}; | ||
| use datafusion_expr::simplify::ExprSimplifyResult; | ||
| use datafusion_expr::{ | ||
| and, lit, or, BinaryExpr, BuiltinScalarFunction, Case, ColumnarValue, Expr, Like, | ||
| ScalarFunctionDefinition, Volatility, | ||
| Operator, ScalarFunctionDefinition, Volatility, | ||
| }; | ||
| use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval}; | ||
| use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps}; | ||
|
|
@@ -189,14 +192,15 @@ impl<S: SimplifyInfo> ExprSimplifier<S> { | |
| .data()? | ||
| .rewrite(&mut inlist_simplifier) | ||
| .data()? | ||
| .rewrite(&mut shorten_in_list_simplifier) | ||
| .data()? | ||
| .rewrite(&mut guarantee_rewriter) | ||
| .data()? | ||
| // run both passes twice to try an minimize simplifications that we missed | ||
| .rewrite(&mut const_evaluator) | ||
| .data()? | ||
| .rewrite(&mut simplifier) | ||
| .data()? | ||
| // shorten inlist should be started after other inlist rules are applied | ||
| .rewrite(&mut shorten_in_list_simplifier) | ||
| .data() | ||
| } | ||
|
|
||
|
|
@@ -1405,12 +1409,116 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { | |
| Transformed::yes(lit(false)) | ||
| } | ||
|
|
||
| // expr IN () --> false | ||
| // expr NOT IN () --> true | ||
| Expr::InList(InList { | ||
| expr, | ||
| list, | ||
| negated, | ||
| }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) => { | ||
| Transformed::yes(lit(negated)) | ||
| } | ||
|
|
||
| // null in (x, y, z) --> null | ||
| // null not in (x, y, z) --> null | ||
| Expr::InList(InList { | ||
| expr, | ||
| list: _, | ||
| negated: _, | ||
| }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()), | ||
|
|
||
| // expr IN ((subquery)) -> expr IN (subquery), see ##5529 | ||
| Expr::InList(InList { | ||
| expr, | ||
| mut list, | ||
| negated, | ||
| }) if list.len() == 1 | ||
| && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) => | ||
| { | ||
| let Expr::ScalarSubquery(subquery) = list.remove(0) else { | ||
| unreachable!() | ||
| }; | ||
|
|
||
| Transformed::yes(Expr::InSubquery(InSubquery::new( | ||
| expr, subquery, negated, | ||
| ))) | ||
| } | ||
|
|
||
| // Combine multiple OR expressions into a single IN list expression if possible | ||
| // | ||
| // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)` | ||
| Expr::BinaryExpr(BinaryExpr { | ||
| left, | ||
| op: Operator::Or, | ||
| right, | ||
| }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After deref pattern is supported in stable rust, we can done the matching easier |
||
| let left = as_inlist(left.as_ref()); | ||
| let right = as_inlist(right.as_ref()); | ||
|
|
||
| let lhs = left.unwrap(); | ||
| let rhs = right.unwrap(); | ||
| let lhs = lhs.into_owned(); | ||
| let rhs = rhs.into_owned(); | ||
| let mut seen: HashSet<Expr> = HashSet::new(); | ||
| let list = lhs | ||
| .list | ||
| .into_iter() | ||
| .chain(rhs.list) | ||
| .filter(|e| seen.insert(e.to_owned())) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| let merged_inlist = InList { | ||
| expr: lhs.expr, | ||
| list, | ||
| negated: false, | ||
| }; | ||
|
|
||
| return Ok(Transformed::yes(Expr::InList(merged_inlist))); | ||
| } | ||
|
|
||
| // no additional rewrites possible | ||
| expr => Transformed::no(expr), | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool { | ||
| let left = as_inlist(left); | ||
| let right = as_inlist(right); | ||
| if let (Some(lhs), Some(rhs)) = (left, right) { | ||
| matches!(lhs.expr.as_ref(), Expr::Column(_)) | ||
| && matches!(rhs.expr.as_ref(), Expr::Column(_)) | ||
| && lhs.expr == rhs.expr | ||
| && !lhs.negated | ||
| && !rhs.negated | ||
| } else { | ||
| false | ||
| } | ||
| } | ||
|
|
||
| /// Try to convert an expression to an in-list expression | ||
| fn as_inlist(expr: &Expr) -> Option<Cow<InList>> { | ||
| match expr { | ||
| Expr::InList(inlist) => Some(Cow::Borrowed(inlist)), | ||
| Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => { | ||
| match (left.as_ref(), right.as_ref()) { | ||
| (Expr::Column(_), Expr::Literal(_)) => Some(Cow::Owned(InList { | ||
| expr: left.clone(), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be great to avoid these There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course, removing cloned is my main goal, but it does not seem trivial now so not done in this PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree -- thank you @jayzhan211 |
||
| list: vec![*right.clone()], | ||
| negated: false, | ||
| })), | ||
| (Expr::Literal(_), Expr::Column(_)) => Some(Cow::Owned(InList { | ||
| expr: right.clone(), | ||
| list: vec![*left.clone()], | ||
| negated: false, | ||
| })), | ||
| _ => None, | ||
| } | ||
| } | ||
| _ => None, | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use std::{ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍