From 978bad9a870ee45d72e2bbe532c031660f80c12a Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 31 Aug 2020 17:47:52 -0700 Subject: [PATCH 1/2] [SPARK-32721][SQL][FOLLOWUP] Simplify if clauses with null and boolean --- .../spark/sql/catalyst/optimizer/expressions.scala | 2 ++ .../catalyst/optimizer/SimplifyConditionalSuite.scala | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 67f7cd955ee71..b2fc3936e1a29 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -465,6 +465,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue case If(cond, l @ Literal(null, _), FalseLiteral) if !cond.nullable => And(cond, l) case If(cond, l @ Literal(null, _), TrueLiteral) if !cond.nullable => Or(Not(cond), l) + case If(cond, FalseLiteral, l @ Literal(null, _)) if !cond.nullable => And(Not(cond), l) + case If(cond, TrueLiteral, l @ Literal(null, _)) if !cond.nullable => Or(cond, l) case e @ CaseWhen(branches, elseValue) if branches.exists(x => falseOrNullLiteral(x._1)) => // If there are branches that are always false, remove them. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index 7a186a62dec36..45ec657ef28b9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -166,22 +166,28 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P ) } - test("simplify if when then clause is null and else clause is boolean") { + test("simplify if when one clause is null and another is boolean") { val p = IsNull('a) val nullLiteral = Literal(null, BooleanType) assertEquivalent(If(p, nullLiteral, FalseLiteral), And(p, nullLiteral)) assertEquivalent(If(p, nullLiteral, TrueLiteral), Or(IsNotNull('a), nullLiteral)) + assertEquivalent(If(p, FalseLiteral, nullLiteral), And(IsNotNull('a), nullLiteral)) + assertEquivalent(If(p, TrueLiteral, nullLiteral), Or(IsNull('a), nullLiteral)) // the rule should not apply to nullable predicate Seq(TrueLiteral, FalseLiteral).foreach { b => assertEquivalent(If(GreaterThan('a, 42), nullLiteral, b), If(GreaterThan('a, 42), nullLiteral, b)) + assertEquivalent(If(GreaterThan('a, 42), b, nullLiteral), + If(GreaterThan('a, 42), b, nullLiteral)) } // check evaluation also Seq(TrueLiteral, FalseLiteral).foreach { b => checkEvaluation(If(b, nullLiteral, FalseLiteral), And(b, nullLiteral).eval(EmptyRow)) checkEvaluation(If(b, nullLiteral, TrueLiteral), Or(Not(b), nullLiteral).eval(EmptyRow)) + checkEvaluation(If(b, FalseLiteral, nullLiteral), And(Not(b), nullLiteral).eval(EmptyRow)) + checkEvaluation(If(b, TrueLiteral, nullLiteral), Or(b, nullLiteral).eval(EmptyRow)) } // should have no effect on expressions with nullable if condition @@ -189,6 +195,8 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P Seq(TrueLiteral, FalseLiteral).foreach { b => checkEvaluation(If(Factorial(5) > 100L, nullLiteral, b), If(Factorial(5) > 100L, nullLiteral, b).eval(EmptyRow)) + checkEvaluation(If(Factorial(5) > 100L, b, nullLiteral), + If(Factorial(5) > 100L, b, nullLiteral).eval(EmptyRow)) } } } From 34e3d38fd0fb9158acdfae119ad796e33aa928ec Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 31 Aug 2020 22:53:00 -0700 Subject: [PATCH 2/2] Minor nit --- .../spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index 45ec657ef28b9..bac962ced4618 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -172,7 +172,7 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P assertEquivalent(If(p, nullLiteral, FalseLiteral), And(p, nullLiteral)) assertEquivalent(If(p, nullLiteral, TrueLiteral), Or(IsNotNull('a), nullLiteral)) assertEquivalent(If(p, FalseLiteral, nullLiteral), And(IsNotNull('a), nullLiteral)) - assertEquivalent(If(p, TrueLiteral, nullLiteral), Or(IsNull('a), nullLiteral)) + assertEquivalent(If(p, TrueLiteral, nullLiteral), Or(p, nullLiteral)) // the rule should not apply to nullable predicate Seq(TrueLiteral, FalseLiteral).foreach { b =>