Skip to content

Commit 65f09fa

Browse files
author
Evgeny Maruschenko
committed
Fixes
1 parent ceb5434 commit 65f09fa

File tree

5 files changed

+104
-51
lines changed

5 files changed

+104
-51
lines changed

datafusion/optimizer/src/eliminate_nested_union.rs

Lines changed: 95 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,10 @@
1818
//! Optimizer rule to replace nested unions to single union.
1919
use crate::{OptimizerConfig, OptimizerRule};
2020
use datafusion_common::Result;
21-
use datafusion_expr::{
22-
builder::project_with_column_index,
23-
expr_rewriter::coerce_plan_expr_for_schema,
24-
logical_plan::{LogicalPlan, Projection, Union},
25-
};
21+
use datafusion_expr::logical_plan::{LogicalPlan, Union};
2622

2723
use crate::optimizer::ApplyOrder;
24+
use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema;
2825
use std::sync::Arc;
2926

3027
#[derive(Default)]
@@ -38,6 +35,8 @@ impl EliminateNestedUnion {
3835
}
3936
}
4037

38+
pub fn get_union_schema() {}
39+
4140
impl OptimizerRule for EliminateNestedUnion {
4241
fn try_optimize(
4342
&self,
@@ -48,32 +47,24 @@ impl OptimizerRule for EliminateNestedUnion {
4847
LogicalPlan::Union(union) => {
4948
let Union { inputs, schema } = union;
5049

51-
let union_schema = schema.clone();
52-
5350
let inputs = inputs
5451
.into_iter()
55-
.flat_map(|plan| match Arc::as_ref(plan) {
56-
LogicalPlan::Union(Union { inputs, .. }) => inputs.clone(),
57-
_ => vec![Arc::clone(plan)],
58-
})
59-
.map(|plan| {
60-
let plan = coerce_plan_expr_for_schema(&plan, &union_schema)?;
61-
match plan {
62-
LogicalPlan::Projection(Projection {
63-
expr, input, ..
64-
}) => Ok(Arc::new(project_with_column_index(
65-
expr,
66-
input,
67-
union_schema.clone(),
68-
)?)),
69-
_ => Ok(Arc::new(plan)),
70-
}
52+
.flat_map(|plan| match plan.as_ref() {
53+
LogicalPlan::Union(Union { inputs, schema }) => inputs
54+
.into_iter()
55+
.map(|plan| {
56+
Arc::new(
57+
coerce_plan_expr_for_schema(plan, schema).unwrap(),
58+
)
59+
})
60+
.collect::<Vec<_>>(),
61+
_ => vec![plan.clone()],
7162
})
72-
.collect::<Result<Vec<_>>>()?;
63+
.collect::<Vec<_>>();
7364

7465
Ok(Some(LogicalPlan::Union(Union {
7566
inputs,
76-
schema: union_schema,
67+
schema: schema.clone(),
7768
})))
7869
}
7970
_ => Ok(None),
@@ -94,13 +85,13 @@ mod tests {
9485
use super::*;
9586
use crate::test::*;
9687
use arrow::datatypes::{DataType, Field, Schema};
97-
use datafusion_expr::logical_plan::table_scan;
88+
use datafusion_expr::{col, logical_plan::table_scan};
9889

9990
fn schema() -> Schema {
10091
Schema::new(vec![
10192
Field::new("id", DataType::Int32, false),
10293
Field::new("key", DataType::Utf8, false),
103-
Field::new("value", DataType::Int32, false),
94+
Field::new("value", DataType::Float64, false),
10495
])
10596
}
10697

@@ -143,4 +134,81 @@ mod tests {
143134
\n TableScan: table";
144135
assert_optimized_plan_equal(&plan, expected)
145136
}
137+
138+
// We don't need to use project_with_column_index in logical optimizer,
139+
// after LogicalPlanBuilder::union, we already have all equal expression aliases
140+
#[test]
141+
fn eliminate_nested_union_with_projection() -> Result<()> {
142+
let plan_builder = table_scan(Some("table"), &schema(), None)?;
143+
144+
let plan = plan_builder
145+
.clone()
146+
.union(
147+
plan_builder
148+
.clone()
149+
.project(vec![col("id").alias("table_id"), col("key"), col("value")])?
150+
.build()?,
151+
)?
152+
.union(
153+
plan_builder
154+
.clone()
155+
.project(vec![col("id").alias("_id"), col("key"), col("value")])?
156+
.build()?,
157+
)?
158+
.build()?;
159+
160+
let expected = "Union\
161+
\n TableScan: table\
162+
\n Projection: table.id AS id, table.key, table.value\
163+
\n TableScan: table\
164+
\n Projection: table.id AS id, table.key, table.value\
165+
\n TableScan: table";
166+
assert_optimized_plan_equal(&plan, expected)
167+
}
168+
169+
#[test]
170+
fn eliminate_nested_union_with_type_cast_projection() -> Result<()> {
171+
let table_1 = table_scan(
172+
Some("table_1"),
173+
&Schema::new(vec![
174+
Field::new("id", DataType::Int64, false),
175+
Field::new("key", DataType::Utf8, false),
176+
Field::new("value", DataType::Float64, false),
177+
]),
178+
None,
179+
)?;
180+
181+
let table_2 = table_scan(
182+
Some("table_1"),
183+
&Schema::new(vec![
184+
Field::new("id", DataType::Int32, false),
185+
Field::new("key", DataType::Utf8, false),
186+
Field::new("value", DataType::Float32, false),
187+
]),
188+
None,
189+
)?;
190+
191+
let table_3 = table_scan(
192+
Some("table_1"),
193+
&Schema::new(vec![
194+
Field::new("id", DataType::Int16, false),
195+
Field::new("key", DataType::Utf8, false),
196+
Field::new("value", DataType::Float32, false),
197+
]),
198+
None,
199+
)?;
200+
201+
let plan = table_1
202+
.union(table_2.build()?)?
203+
.union(table_3.build()?)?
204+
.build()?;
205+
206+
let expected = "Union\
207+
\n TableScan: table_1\
208+
\n Projection: CAST(table_1.id AS Int64) AS id, table_1.key, CAST(table_1.value AS Float64) AS value\
209+
\n TableScan: table_1\
210+
\n Projection: CAST(table_1.id AS Int64) AS id, table_1.key, CAST(table_1.value AS Float64) AS value\
211+
\n TableScan: table_1";
212+
assert_optimized_plan_equal(&plan, expected)
213+
}
146214
}

datafusion/optimizer/src/eliminate_one_union.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,7 @@ impl OptimizerRule for EliminateOneUnion {
4040
_config: &dyn OptimizerConfig,
4141
) -> Result<Option<LogicalPlan>> {
4242
match plan {
43-
LogicalPlan::Union(union) if union.inputs.len() == 1 => {
44-
let Union { inputs, schema: _ } = union;
45-
43+
LogicalPlan::Union(Union { inputs, .. }) if inputs.len() == 1 => {
4644
Ok(inputs.first().map(|input| input.as_ref().clone()))
4745
}
4846
_ => Ok(None),
@@ -103,7 +101,7 @@ mod tests {
103101
}
104102

105103
#[test]
106-
fn eliminate_nested_union() -> Result<()> {
104+
fn eliminate_one_union() -> Result<()> {
107105
let table_plan = coerce_plan_expr_for_schema(
108106
&table_scan(Some("table"), &schema(), None)?.build()?,
109107
&schema().to_dfschema()?,

datafusion/optimizer/src/optimizer.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,14 +222,14 @@ impl Optimizer {
222222
/// Create a new optimizer using the recommended list of rules
223223
pub fn new() -> Self {
224224
let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
225+
Arc::new(EliminateNestedUnion::new()),
225226
Arc::new(SimplifyExpressions::new()),
226227
Arc::new(UnwrapCastInComparison::new()),
227228
Arc::new(ReplaceDistinctWithAggregate::new()),
228229
Arc::new(EliminateJoin::new()),
229230
Arc::new(DecorrelatePredicateSubquery::new()),
230231
Arc::new(ScalarSubqueryToJoin::new()),
231232
Arc::new(ExtractEquijoinPredicate::new()),
232-
Arc::new(EliminateNestedUnion::new()),
233233
// simplify expressions does not simplify expressions in subqueries, so we
234234
// run it again after running the optimizations that potentially converted
235235
// subqueries to joins
@@ -242,9 +242,10 @@ impl Optimizer {
242242
Arc::new(CommonSubexprEliminate::new()),
243243
Arc::new(EliminateLimit::new()),
244244
Arc::new(PropagateEmptyRelation::new()),
245+
// Must be after PropagateEmptyRelation
246+
Arc::new(EliminateOneUnion::new()),
245247
Arc::new(FilterNullJoinKeys::default()),
246248
Arc::new(EliminateOuterJoin::new()),
247-
Arc::new(EliminateOneUnion::new()),
248249
// Filters can't be pushed down past Limits, we should do PushDownFilter after PushDownLimit
249250
Arc::new(PushDownLimit::new()),
250251
Arc::new(PushDownFilter::new()),

datafusion/sql/tests/sql_integration.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,24 +2053,6 @@ fn union_all() {
20532053
quick_test(sql, expected);
20542054
}
20552055

2056-
#[test]
2057-
fn union_4_combined_in_one() {
2058-
let sql = "SELECT order_id from orders
2059-
UNION ALL SELECT order_id FROM orders
2060-
UNION ALL SELECT order_id FROM orders
2061-
UNION ALL SELECT order_id FROM orders";
2062-
let expected = "Union\
2063-
\n Projection: orders.order_id\
2064-
\n TableScan: orders\
2065-
\n Projection: orders.order_id\
2066-
\n TableScan: orders\
2067-
\n Projection: orders.order_id\
2068-
\n TableScan: orders\
2069-
\n Projection: orders.order_id\
2070-
\n TableScan: orders";
2071-
quick_test(sql, expected);
2072-
}
2073-
20742056
#[test]
20752057
fn union_with_different_column_names() {
20762058
let sql = "SELECT order_id from orders UNION ALL SELECT customer_id FROM orders";

datafusion/sqllogictest/test_files/explain.slt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ logical_plan after inline_table_scan SAME TEXT AS ABOVE
184184
logical_plan after type_coercion SAME TEXT AS ABOVE
185185
logical_plan after count_wildcard_rule SAME TEXT AS ABOVE
186186
analyzed_logical_plan SAME TEXT AS ABOVE
187+
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
187188
logical_plan after simplify_expressions SAME TEXT AS ABOVE
188189
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
189190
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
@@ -200,6 +201,7 @@ logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
200201
logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
201202
logical_plan after eliminate_limit SAME TEXT AS ABOVE
202203
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
204+
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
203205
logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
204206
logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
205207
logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -213,6 +215,7 @@ Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
213215
--TableScan: simple_explain_test projection=[a, b, c]
214216
logical_plan after eliminate_projection TableScan: simple_explain_test projection=[a, b, c]
215217
logical_plan after push_down_limit SAME TEXT AS ABOVE
218+
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
216219
logical_plan after simplify_expressions SAME TEXT AS ABOVE
217220
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
218221
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
@@ -229,6 +232,7 @@ logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
229232
logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
230233
logical_plan after eliminate_limit SAME TEXT AS ABOVE
231234
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
235+
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
232236
logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
233237
logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
234238
logical_plan after push_down_limit SAME TEXT AS ABOVE

0 commit comments

Comments
 (0)