Skip to content

Commit 9bf0630

Browse files
authored
Allow simplification even when nullable (#12746)
The nullable requirement seem to have been added in #1401 but as far as I can tell they are not needed for these 2 cases. I think this can be shown using this truth table: (generated using datafusion-cli without this patch) ``` > CREATE TABLE t (v BOOLEAN) as values (true), (false), (NULL); > select t.v, t2.v, t.v AND (t.v OR t2.v), t.v OR (t.v AND t2.v) from t cross join t as t2; +-------+-------+---------------------+---------------------+ | v | v | t.v AND t.v OR t2.v | t.v OR t.v AND t2.v | +-------+-------+---------------------+---------------------+ | true | true | true | true | | true | false | true | true | | true | | true | true | | false | true | false | false | | false | false | false | false | | false | | false | false | | | true | | | | | false | | | | | | | | +-------+-------+---------------------+---------------------+ ``` And it seems Spark applies both of these and DuckDB applies only the first one.
1 parent 18f9201 commit 9bf0630

File tree

2 files changed

+21
-35
lines changed

2 files changed

+21
-35
lines changed

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -838,22 +838,18 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
838838
op: Or,
839839
right,
840840
}) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
841-
// A OR (A AND B) --> A (if B not null)
841+
// A OR (A AND B) --> A
842842
Expr::BinaryExpr(BinaryExpr {
843843
left,
844844
op: Or,
845845
right,
846-
}) if !info.nullable(&right)? && is_op_with(And, &right, &left) => {
847-
Transformed::yes(*left)
848-
}
849-
// (A AND B) OR A --> A (if B not null)
846+
}) if is_op_with(And, &right, &left) => Transformed::yes(*left),
847+
// (A AND B) OR A --> A
850848
Expr::BinaryExpr(BinaryExpr {
851849
left,
852850
op: Or,
853851
right,
854-
}) if !info.nullable(&left)? && is_op_with(And, &left, &right) => {
855-
Transformed::yes(*right)
856-
}
852+
}) if is_op_with(And, &left, &right) => Transformed::yes(*right),
857853

858854
//
859855
// Rules for AND
@@ -911,22 +907,18 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
911907
op: And,
912908
right,
913909
}) if expr_contains(&right, &left, And) => Transformed::yes(*right),
914-
// A AND (A OR B) --> A (if B not null)
910+
// A AND (A OR B) --> A
915911
Expr::BinaryExpr(BinaryExpr {
916912
left,
917913
op: And,
918914
right,
919-
}) if !info.nullable(&right)? && is_op_with(Or, &right, &left) => {
920-
Transformed::yes(*left)
921-
}
922-
// (A OR B) AND A --> A (if B not null)
915+
}) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
916+
// (A OR B) AND A --> A
923917
Expr::BinaryExpr(BinaryExpr {
924918
left,
925919
op: And,
926920
right,
927-
}) if !info.nullable(&left)? && is_op_with(Or, &left, &right) => {
928-
Transformed::yes(*right)
929-
}
921+
}) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
930922

931923
//
932924
// Rules for Multiply
@@ -2609,15 +2601,11 @@ mod tests {
26092601
// (c2 > 5) OR ((c1 < 6) AND (c2 > 5))
26102602
let expr = or(l.clone(), r.clone());
26112603

2612-
// no rewrites if c1 can be null
2613-
let expected = expr.clone();
2604+
let expected = l.clone();
26142605
assert_eq!(simplify(expr), expected);
26152606

26162607
// ((c1 < 6) AND (c2 > 5)) OR (c2 > 5)
2617-
let expr = or(l, r);
2618-
2619-
// no rewrites if c1 can be null
2620-
let expected = expr.clone();
2608+
let expr = or(r, l);
26212609
assert_eq!(simplify(expr), expected);
26222610
}
26232611

@@ -2648,13 +2636,11 @@ mod tests {
26482636
// (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
26492637
let expr = and(l.clone(), r.clone());
26502638

2651-
// no rewrites if c1 can be null
2652-
let expected = expr.clone();
2639+
let expected = l.clone();
26532640
assert_eq!(simplify(expr), expected);
26542641

26552642
// ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
2656-
let expr = and(l, r);
2657-
let expected = expr.clone();
2643+
let expr = and(r, l);
26582644
assert_eq!(simplify(expr), expected);
26592645
}
26602646

@@ -3223,7 +3209,7 @@ mod tests {
32233209
)],
32243210
Some(Box::new(col("c2").eq(lit(true)))),
32253211
)))),
3226-
col("c2").or(col("c2").not().and(col("c2"))) // #1716
3212+
col("c2")
32273213
);
32283214

32293215
// CASE WHEN ISNULL(c2) THEN true ELSE c2

datafusion/sqllogictest/test_files/cse.slt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -179,20 +179,20 @@ physical_plan
179179
# Surely only once but also conditionally evaluated expressions
180180
query TT
181181
EXPLAIN SELECT
182-
(a = 1 OR random() = 0) AND a = 1 AS c1,
183-
(a = 2 AND random() = 0) OR a = 2 AS c2,
182+
(a = 1 OR random() = 0) AND a = 2 AS c1,
183+
(a = 2 AND random() = 0) OR a = 1 AS c2,
184184
CASE WHEN a + 3 = 0 THEN a + 3 ELSE 0 END AS c3,
185185
CASE WHEN a + 4 = 0 THEN 0 WHEN a + 4 THEN 0 ELSE 0 END AS c4,
186186
CASE WHEN a + 5 = 0 THEN 0 WHEN random() = 0 THEN a + 5 ELSE 0 END AS c5,
187187
CASE WHEN a + 6 = 0 THEN 0 ELSE a + 6 END AS c6
188188
FROM t1
189189
----
190190
logical_plan
191-
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_1 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
191+
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_2 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_1 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
192192
02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS __common_expr_6
193193
03)----TableScan: t1 projection=[a]
194194
physical_plan
195-
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_1@0 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_2@1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
195+
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_2@1 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_1@0 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
196196
02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 5 as __common_expr_5, a@0 + 6 as __common_expr_6]
197197
03)----MemoryExec: partitions=1, partition_sizes=[0]
198198

@@ -217,17 +217,17 @@ physical_plan
217217
# Only conditionally evaluated expressions
218218
query TT
219219
EXPLAIN SELECT
220-
(random() = 0 OR a = 1) AND a = 1 AS c1,
221-
(random() = 0 AND a = 2) OR a = 2 AS c2,
220+
(random() = 0 OR a = 1) AND a = 2 AS c1,
221+
(random() = 0 AND a = 2) OR a = 1 AS c2,
222222
CASE WHEN random() = 0 THEN a + 3 ELSE a + 3 END AS c3,
223223
CASE WHEN random() = 0 THEN 0 WHEN a + 4 = 0 THEN a + 4 ELSE 0 END AS c4,
224224
CASE WHEN random() = 0 THEN 0 WHEN a + 5 = 0 THEN 0 ELSE a + 5 END AS c5,
225225
CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a + 6 ELSE a + 6 END AS c6
226226
FROM t1
227227
----
228228
logical_plan
229-
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(1) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(2) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
229+
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(2) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(1) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
230230
02)--TableScan: t1 projection=[a]
231231
physical_plan
232-
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 1 as c1, random() = 0 AND a@0 = 2 OR a@0 = 2 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
232+
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 2 as c1, random() = 0 AND a@0 = 2 OR a@0 = 1 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
233233
02)--MemoryExec: partitions=1, partition_sizes=[0]

0 commit comments

Comments
 (0)