Skip to content

Commit 1453a09

Browse files
sunchaodbtsai
authored andcommitted
[SPARK-32721][SQL] Simplify if clauses with null and boolean
### What changes were proposed in this pull request? The following if clause: ```sql if(p, null, false) ``` can be simplified to: ```sql and(p, null) ``` Similarly, the clause: ```sql if(p, null, true) ``` can be simplified to ```sql or(not(p), null) ``` iff the predicate `p` is non-nullable, i.e., can be evaluated to either true or false, but not null. ### Why are the changes needed? Converting if to or/and clauses can better push filters down. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit tests. Closes #29567 from sunchao/SPARK-32721. Authored-by: Chao Sun <[email protected]> Signed-off-by: DB Tsai <[email protected]>
1 parent 806140d commit 1453a09

File tree

3 files changed

+33
-5
lines changed

3 files changed

+33
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
463463
case If(Literal(null, _), _, falseValue) => falseValue
464464
case If(cond, trueValue, falseValue)
465465
if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue
466+
case If(cond, l @ Literal(null, _), FalseLiteral) if !cond.nullable => And(cond, l)
467+
case If(cond, l @ Literal(null, _), TrueLiteral) if !cond.nullable => Or(Not(cond), l)
466468

467469
case e @ CaseWhen(branches, elseValue) if branches.exists(x => falseOrNullLiteral(x._1)) =>
468470
// If there are branches that are always false, remove them.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,14 +221,14 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
221221

222222
test("Complementation Laws - null handling") {
223223
checkCondition('e && !'e,
224-
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
224+
testRelationWithData.where(And(Literal(null, BooleanType), 'e.isNull)).analyze)
225225
checkCondition(!'e && 'e,
226-
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
226+
testRelationWithData.where(And(Literal(null, BooleanType), 'e.isNull)).analyze)
227227

228228
checkCondition('e || !'e,
229-
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
229+
testRelationWithData.where(Or('e.isNotNull, Literal(null, BooleanType))).analyze)
230230
checkCondition(!'e || 'e,
231-
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
231+
testRelationWithData.where(Or('e.isNotNull, Literal(null, BooleanType))).analyze)
232232
}
233233

234234
test("Complementation Laws - negative case") {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.rules._
2828
import org.apache.spark.sql.types.{BooleanType, IntegerType}
2929

3030

31-
class SimplifyConditionalSuite extends PlanTest with PredicateHelper {
31+
class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with PredicateHelper {
3232

3333
object Optimize extends RuleExecutor[LogicalPlan] {
3434
val batches = Batch("SimplifyConditionals", FixedPoint(50),
@@ -165,4 +165,30 @@ class SimplifyConditionalSuite extends PlanTest with PredicateHelper {
165165
Literal(1))
166166
)
167167
}
168+
169+
test("simplify if when then clause is null and else clause is boolean") {
170+
val p = IsNull('a)
171+
val nullLiteral = Literal(null, BooleanType)
172+
assertEquivalent(If(p, nullLiteral, FalseLiteral), And(p, nullLiteral))
173+
assertEquivalent(If(p, nullLiteral, TrueLiteral), Or(IsNotNull('a), nullLiteral))
174+
175+
// the rule should not apply to nullable predicate
176+
Seq(TrueLiteral, FalseLiteral).foreach { b =>
177+
assertEquivalent(If(GreaterThan('a, 42), nullLiteral, b),
178+
If(GreaterThan('a, 42), nullLiteral, b))
179+
}
180+
181+
// check evaluation also
182+
Seq(TrueLiteral, FalseLiteral).foreach { b =>
183+
checkEvaluation(If(b, nullLiteral, FalseLiteral), And(b, nullLiteral).eval(EmptyRow))
184+
checkEvaluation(If(b, nullLiteral, TrueLiteral), Or(Not(b), nullLiteral).eval(EmptyRow))
185+
}
186+
187+
// should have no effect on expressions with nullable if condition
188+
assert((Factorial(5) > 100L).nullable)
189+
Seq(TrueLiteral, FalseLiteral).foreach { b =>
190+
checkEvaluation(If(Factorial(5) > 100L, nullLiteral, b),
191+
If(Factorial(5) > 100L, nullLiteral, b).eval(EmptyRow))
192+
}
193+
}
168194
}

0 commit comments

Comments
 (0)