From 7ea8095ecbc124cfa2c7507b4ebe39906e542ff0 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Fri, 15 Mar 2024 22:55:56 +0800 Subject: [PATCH 1/5] move rule 1 Signed-off-by: jayzhan211 --- .../simplify_expressions/expr_simplifier.rs | 36 +++++++++++++++++-- .../simplify_expressions/inlist_simplifier.rs | 26 -------------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 344a738fe84b..994b38a84be6 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -38,10 +38,10 @@ use datafusion_common::{ use datafusion_common::{ internal_err, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, }; +use datafusion_expr::expr::{InList, InSubquery}; use datafusion_expr::simplify::ExprSimplifyResult; use datafusion_expr::{ - and, lit, or, BinaryExpr, BuiltinScalarFunction, Case, ColumnarValue, Expr, Like, - ScalarFunctionDefinition, Volatility, + and, lit, or, BinaryExpr, BuiltinScalarFunction, Case, ColumnarValue, Expr, Like, ScalarFunctionDefinition, Volatility }; use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval}; use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps}; @@ -1405,6 +1405,38 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { Transformed::yes(lit(false)) } + // expr IN () --> false + // expr NOT IN () --> true + Expr::InList(InList { + expr, + list, + negated, + }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) => Transformed::yes(lit(negated)), + + // null in (x, y, z) --> null + // null not in (x, y, z) --> null + Expr::InList(InList { + expr, + list: _, + negated: _, + }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()), + + + // expr IN ((subquery)) -> expr IN (subquery), see ##5529 + Expr::InList(InList { + expr, + mut list, + negated, + }) if list.len() == 1 && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) => { + let Expr::ScalarSubquery(subquery) = list.remove(0) else { + unreachable!() + }; + + Transformed::yes(Expr::InSubquery(InSubquery::new( + expr, subquery, negated, + ))) + } + // no additional rewrites possible expr => Transformed::no(expr), }) diff --git a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs index fa1d7cfc1239..e32c0fc0088c 100644 --- a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs @@ -112,32 +112,6 @@ impl TreeNodeRewriter for InListSimplifier { type Node = Expr; fn f_up(&mut self, expr: Expr) -> Result> { - if let Expr::InList(InList { - expr, - mut list, - negated, - }) = expr.clone() - { - // expr IN () --> false - // expr NOT IN () --> true - if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) { - return Ok(Transformed::yes(lit(negated))); - // null in (x, y, z) --> null - // null not in (x, y, z) --> null - } else if is_null(&expr) { - return Ok(Transformed::yes(lit_bool_null())); - // expr IN ((subquery)) -> expr IN (subquery), see ##5529 - } else if list.len() == 1 - && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) - { - let Expr::ScalarSubquery(subquery) = list.remove(0) else { - unreachable!() - }; - return Ok(Transformed::yes(Expr::InSubquery(InSubquery::new( - expr, subquery, negated, - )))); - } - } // Combine multiple OR expressions into a single IN list expression if possible // // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)` From 6ce4e56e21baf09d855028bce959dac5de48d90d Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 16 Mar 2024 08:08:36 +0800 Subject: [PATCH 2/5] mv orlsit2inlist rule to simplifier Signed-off-by: jayzhan211 --- .../simplify_expressions/expr_simplifier.rs | 78 +++++++++++++++++-- .../simplify_expressions/inlist_simplifier.rs | 57 -------------- 2 files changed, 72 insertions(+), 63 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 994b38a84be6..3db7e7776659 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -17,6 +17,8 @@ //! Expression simplification API +use std::borrow::Cow; +use std::collections::HashSet; use std::ops::Not; use super::inlist_simplifier::{InListSimplifier, ShortenInListSimplifier}; @@ -41,7 +43,8 @@ use datafusion_common::{ use datafusion_expr::expr::{InList, InSubquery}; use datafusion_expr::simplify::ExprSimplifyResult; use datafusion_expr::{ - and, lit, or, BinaryExpr, BuiltinScalarFunction, Case, ColumnarValue, Expr, Like, ScalarFunctionDefinition, Volatility + and, lit, or, BinaryExpr, BuiltinScalarFunction, Case, ColumnarValue, Expr, Like, + Operator, ScalarFunctionDefinition, Volatility, }; use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval}; use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps}; @@ -189,14 +192,14 @@ impl ExprSimplifier { .data()? .rewrite(&mut inlist_simplifier) .data()? - .rewrite(&mut shorten_in_list_simplifier) - .data()? .rewrite(&mut guarantee_rewriter) .data()? // run both passes twice to try an minimize simplifications that we missed .rewrite(&mut const_evaluator) .data()? .rewrite(&mut simplifier) + .data()? + .rewrite(&mut shorten_in_list_simplifier) .data() } @@ -1411,7 +1414,9 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { expr, list, negated, - }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) => Transformed::yes(lit(negated)), + }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) => { + Transformed::yes(lit(negated)) + } // null in (x, y, z) --> null // null not in (x, y, z) --> null @@ -1420,14 +1425,15 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { list: _, negated: _, }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()), - // expr IN ((subquery)) -> expr IN (subquery), see ##5529 Expr::InList(InList { expr, mut list, negated, - }) if list.len() == 1 && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) => { + }) if list.len() == 1 + && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) => + { let Expr::ScalarSubquery(subquery) = list.remove(0) else { unreachable!() }; @@ -1437,12 +1443,72 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { ))) } + // Combine multiple OR expressions into a single IN list expression if possible + // + // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)` + Expr::BinaryExpr(BinaryExpr { left, op, right }) + if op == Operator::Or + && as_inlist(left.as_ref()).is_some_and(|lhs| { + lhs.expr.try_into_col().is_ok() && !lhs.negated + }) + && as_inlist(right.as_ref()).is_some_and(|rhs| { + rhs.expr.try_into_col().is_ok() && !rhs.negated + }) + && as_inlist(left.as_ref()).unwrap().expr + == as_inlist(right.as_ref()).unwrap().expr => + { + let left = as_inlist(left.as_ref()); + let right = as_inlist(right.as_ref()); + + let lhs = left.unwrap(); + let rhs = right.unwrap(); + let lhs = lhs.into_owned(); + let rhs = rhs.into_owned(); + let mut seen: HashSet = HashSet::new(); + let list = lhs + .list + .into_iter() + .chain(rhs.list) + .filter(|e| seen.insert(e.to_owned())) + .collect::>(); + + let merged_inlist = InList { + expr: lhs.expr, + list, + negated: false, + }; + return Ok(Transformed::yes(Expr::InList(merged_inlist))); + } + // no additional rewrites possible expr => Transformed::no(expr), }) } } +/// Try to convert an expression to an in-list expression +fn as_inlist(expr: &Expr) -> Option> { + match expr { + Expr::InList(inlist) => Some(Cow::Borrowed(inlist)), + Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => { + match (left.as_ref(), right.as_ref()) { + (Expr::Column(_), Expr::Literal(_)) => Some(Cow::Owned(InList { + expr: left.clone(), + list: vec![*right.clone()], + negated: false, + })), + (Expr::Literal(_), Expr::Column(_)) => Some(Cow::Owned(InList { + expr: right.clone(), + list: vec![*left.clone()], + negated: false, + })), + _ => None, + } + } + _ => None, + } +} + #[cfg(test)] mod tests { use std::{ diff --git a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs index e32c0fc0088c..8c507f1cea7a 100644 --- a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs @@ -112,40 +112,6 @@ impl TreeNodeRewriter for InListSimplifier { type Node = Expr; fn f_up(&mut self, expr: Expr) -> Result> { - // Combine multiple OR expressions into a single IN list expression if possible - // - // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)` - if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = &expr { - if *op == Operator::Or { - let left = as_inlist(left); - let right = as_inlist(right); - if let (Some(lhs), Some(rhs)) = (left, right) { - if lhs.expr.try_into_col().is_ok() - && rhs.expr.try_into_col().is_ok() - && lhs.expr == rhs.expr - && !lhs.negated - && !rhs.negated - { - let lhs = lhs.into_owned(); - let rhs = rhs.into_owned(); - let mut seen: HashSet = HashSet::new(); - let list = lhs - .list - .into_iter() - .chain(rhs.list) - .filter(|e| seen.insert(e.to_owned())) - .collect::>(); - - let merged_inlist = InList { - expr: lhs.expr, - list, - negated: false, - }; - return Ok(Transformed::yes(Expr::InList(merged_inlist))); - } - } - } - } // Simplify expressions that is guaranteed to be true or false to a literal boolean expression // // Rules: @@ -204,29 +170,6 @@ impl TreeNodeRewriter for InListSimplifier { } } -/// Try to convert an expression to an in-list expression -fn as_inlist(expr: &Expr) -> Option> { - match expr { - Expr::InList(inlist) => Some(Cow::Borrowed(inlist)), - Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => { - match (left.as_ref(), right.as_ref()) { - (Expr::Column(_), Expr::Literal(_)) => Some(Cow::Owned(InList { - expr: left.clone(), - list: vec![*right.clone()], - negated: false, - })), - (Expr::Literal(_), Expr::Column(_)) => Some(Cow::Owned(InList { - expr: right.clone(), - list: vec![*left.clone()], - negated: false, - })), - _ => None, - } - } - _ => None, - } -} - /// Return the union of two inlist expressions /// maintaining the order of the elements in the two lists fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result { From af8e37dc771d306932506624d0c49f11101f834f Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 16 Mar 2024 08:34:18 +0800 Subject: [PATCH 3/5] fmt Signed-off-by: jayzhan211 --- .../simplify_expressions/expr_simplifier.rs | 23 +++++++++++-------- .../simplify_expressions/inlist_simplifier.rs | 6 ++--- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 3db7e7776659..ca82aa731c06 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -199,6 +199,7 @@ impl ExprSimplifier { .data()? .rewrite(&mut simplifier) .data()? + // shorten inlist should be started after other inlist rules are applied .rewrite(&mut shorten_in_list_simplifier) .data() } @@ -1446,16 +1447,17 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { // Combine multiple OR expressions into a single IN list expression if possible // // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)` - Expr::BinaryExpr(BinaryExpr { left, op, right }) - if op == Operator::Or - && as_inlist(left.as_ref()).is_some_and(|lhs| { - lhs.expr.try_into_col().is_ok() && !lhs.negated - }) - && as_inlist(right.as_ref()).is_some_and(|rhs| { - rhs.expr.try_into_col().is_ok() && !rhs.negated - }) - && as_inlist(left.as_ref()).unwrap().expr - == as_inlist(right.as_ref()).unwrap().expr => + Expr::BinaryExpr(BinaryExpr { + left, + op: Operator::Or, + right, + }) if as_inlist(left.as_ref()) + .is_some_and(|lhs| lhs.expr.try_into_col().is_ok() && !lhs.negated) + && as_inlist(right.as_ref()).is_some_and(|rhs| { + rhs.expr.try_into_col().is_ok() && !rhs.negated + }) + && as_inlist(left.as_ref()).unwrap().expr + == as_inlist(right.as_ref()).unwrap().expr => { let left = as_inlist(left.as_ref()); let right = as_inlist(right.as_ref()); @@ -1477,6 +1479,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { list, negated: false, }; + return Ok(Transformed::yes(Expr::InList(merged_inlist))); } diff --git a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs index 8c507f1cea7a..5d1cf27827a9 100644 --- a/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/inlist_simplifier.rs @@ -17,15 +17,13 @@ //! This module implements a rule that simplifies the values for `InList`s -use super::utils::{is_null, lit_bool_null}; use super::THRESHOLD_INLINE_INLIST; -use std::borrow::Cow; use std::collections::HashSet; use datafusion_common::tree_node::{Transformed, TreeNodeRewriter}; -use datafusion_common::{Result, ScalarValue}; -use datafusion_expr::expr::{InList, InSubquery}; +use datafusion_common::Result; +use datafusion_expr::expr::InList; use datafusion_expr::{lit, BinaryExpr, Expr, Operator}; pub(super) struct ShortenInListSimplifier {} From c88808e117f4c34e2c34b9ca4a222bea890c2ecd Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 16 Mar 2024 09:13:36 +0800 Subject: [PATCH 4/5] wrap rule in func Signed-off-by: jayzhan211 --- .../simplify_expressions/expr_simplifier.rs | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index ca82aa731c06..4307df6628ed 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1451,14 +1451,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Operator::Or, right, - }) if as_inlist(left.as_ref()) - .is_some_and(|lhs| lhs.expr.try_into_col().is_ok() && !lhs.negated) - && as_inlist(right.as_ref()).is_some_and(|rhs| { - rhs.expr.try_into_col().is_ok() && !rhs.negated - }) - && as_inlist(left.as_ref()).unwrap().expr - == as_inlist(right.as_ref()).unwrap().expr => - { + }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => { let left = as_inlist(left.as_ref()); let right = as_inlist(right.as_ref()); @@ -1489,6 +1482,20 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { } } +fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool { + let left = as_inlist(left); + let right = as_inlist(right); + if let (Some(lhs), Some(rhs)) = (left, right) { + lhs.expr.try_into_col().is_ok() + && rhs.expr.try_into_col().is_ok() + && lhs.expr == rhs.expr + && !lhs.negated + && !rhs.negated + } else { + false + } +} + /// Try to convert an expression to an in-list expression fn as_inlist(expr: &Expr) -> Option> { match expr { From 9ad8b522784fcb3f6499831210b53a3805aa4de0 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 16 Mar 2024 20:54:04 +0800 Subject: [PATCH 5/5] use matches Signed-off-by: jayzhan211 --- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 4307df6628ed..5b5bca75ddb0 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1486,8 +1486,8 @@ fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool { let left = as_inlist(left); let right = as_inlist(right); if let (Some(lhs), Some(rhs)) = (left, right) { - lhs.expr.try_into_col().is_ok() - && rhs.expr.try_into_col().is_ok() + matches!(lhs.expr.as_ref(), Expr::Column(_)) + && matches!(rhs.expr.as_ref(), Expr::Column(_)) && lhs.expr == rhs.expr && !lhs.negated && !rhs.negated