Skip to content

Commit a7bb09f

Browse files
logan-keedealamb
andauthored
move resolve_table_references out of datafusion-catalog` (#14441)
* move out of * forgotten license * Update references --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 16c0686 commit a7bb09f

File tree

9 files changed

+292
-252
lines changed

9 files changed

+292
-252
lines changed

datafusion-examples/examples/sql_frontend.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
19-
use datafusion::common::plan_err;
19+
use datafusion::common::{plan_err, TableReference};
2020
use datafusion::config::ConfigOptions;
2121
use datafusion::error::Result;
2222
use datafusion::logical_expr::{
@@ -29,7 +29,6 @@ use datafusion::optimizer::{
2929
use datafusion::sql::planner::{ContextProvider, SqlToRel};
3030
use datafusion::sql::sqlparser::dialect::PostgreSqlDialect;
3131
use datafusion::sql::sqlparser::parser::Parser;
32-
use datafusion::sql::TableReference;
3332
use std::any::Any;
3433
use std::sync::Arc;
3534

datafusion/catalog/src/lib.rs

Lines changed: 9 additions & 239 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,19 @@
2222
//! * Simple memory based catalog: [`MemoryCatalogProviderList`], [`MemoryCatalogProvider`], [`MemorySchemaProvider`]
2323
2424
pub mod memory;
25+
#[deprecated(
26+
since = "46.0.0",
27+
note = "use datafusion_sql::resolve::resolve_table_references"
28+
)]
29+
pub use datafusion_sql::resolve::resolve_table_references;
30+
#[deprecated(
31+
since = "46.0.0",
32+
note = "use datafusion_common::{ResolvedTableReference, TableReference}"
33+
)]
2534
pub use datafusion_sql::{ResolvedTableReference, TableReference};
2635
pub use memory::{
2736
MemoryCatalogProvider, MemoryCatalogProviderList, MemorySchemaProvider,
2837
};
29-
use std::collections::BTreeSet;
30-
use std::ops::ControlFlow;
31-
3238
mod r#async;
3339
mod catalog;
3440
mod dynamic_file;
@@ -43,239 +49,3 @@ pub use schema::*;
4349
pub use session::*;
4450
pub use table::*;
4551
pub mod streaming;
46-
47-
/// Collects all tables and views referenced in the SQL statement. CTEs are collected separately.
48-
/// This can be used to determine which tables need to be in the catalog for a query to be planned.
49-
///
50-
/// # Returns
51-
///
52-
/// A `(table_refs, ctes)` tuple, the first element contains table and view references and the second
53-
/// element contains any CTE aliases that were defined and possibly referenced.
54-
///
55-
/// ## Example
56-
///
57-
/// ```
58-
/// # use datafusion_sql::parser::DFParser;
59-
/// # use datafusion_catalog::resolve_table_references;
60-
/// let query = "SELECT a FROM foo where x IN (SELECT y FROM bar)";
61-
/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
62-
/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
63-
/// assert_eq!(table_refs.len(), 2);
64-
/// assert_eq!(table_refs[0].to_string(), "bar");
65-
/// assert_eq!(table_refs[1].to_string(), "foo");
66-
/// assert_eq!(ctes.len(), 0);
67-
/// ```
68-
///
69-
/// ## Example with CTEs
70-
///
71-
/// ```
72-
/// # use datafusion_sql::parser::DFParser;
73-
/// # use datafusion_catalog::resolve_table_references;
74-
/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";
75-
/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
76-
/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
77-
/// assert_eq!(table_refs.len(), 0);
78-
/// assert_eq!(ctes.len(), 1);
79-
/// assert_eq!(ctes[0].to_string(), "my_cte");
80-
/// ```
81-
pub fn resolve_table_references(
82-
statement: &datafusion_sql::parser::Statement,
83-
enable_ident_normalization: bool,
84-
) -> datafusion_common::Result<(Vec<TableReference>, Vec<TableReference>)> {
85-
use datafusion_sql::parser::{
86-
CopyToSource, CopyToStatement, Statement as DFStatement,
87-
};
88-
use datafusion_sql::planner::object_name_to_table_reference;
89-
use information_schema::INFORMATION_SCHEMA;
90-
use information_schema::INFORMATION_SCHEMA_TABLES;
91-
use sqlparser::ast::*;
92-
93-
struct RelationVisitor {
94-
relations: BTreeSet<ObjectName>,
95-
all_ctes: BTreeSet<ObjectName>,
96-
ctes_in_scope: Vec<ObjectName>,
97-
}
98-
99-
impl RelationVisitor {
100-
/// Record the reference to `relation`, if it's not a CTE reference.
101-
fn insert_relation(&mut self, relation: &ObjectName) {
102-
if !self.relations.contains(relation)
103-
&& !self.ctes_in_scope.contains(relation)
104-
{
105-
self.relations.insert(relation.clone());
106-
}
107-
}
108-
}
109-
110-
impl Visitor for RelationVisitor {
111-
type Break = ();
112-
113-
fn pre_visit_relation(&mut self, relation: &ObjectName) -> ControlFlow<()> {
114-
self.insert_relation(relation);
115-
ControlFlow::Continue(())
116-
}
117-
118-
fn pre_visit_query(&mut self, q: &Query) -> ControlFlow<Self::Break> {
119-
if let Some(with) = &q.with {
120-
for cte in &with.cte_tables {
121-
// The non-recursive CTE name is not in scope when evaluating the CTE itself, so this is valid:
122-
// `WITH t AS (SELECT * FROM t) SELECT * FROM t`
123-
// Where the first `t` refers to a predefined table. So we are careful here
124-
// to visit the CTE first, before putting it in scope.
125-
if !with.recursive {
126-
// This is a bit hackish as the CTE will be visited again as part of visiting `q`,
127-
// but thankfully `insert_relation` is idempotent.
128-
cte.visit(self);
129-
}
130-
self.ctes_in_scope
131-
.push(ObjectName(vec![cte.alias.name.clone()]));
132-
}
133-
}
134-
ControlFlow::Continue(())
135-
}
136-
137-
fn post_visit_query(&mut self, q: &Query) -> ControlFlow<Self::Break> {
138-
if let Some(with) = &q.with {
139-
for _ in &with.cte_tables {
140-
// Unwrap: We just pushed these in `pre_visit_query`
141-
self.all_ctes.insert(self.ctes_in_scope.pop().unwrap());
142-
}
143-
}
144-
ControlFlow::Continue(())
145-
}
146-
147-
fn pre_visit_statement(&mut self, statement: &Statement) -> ControlFlow<()> {
148-
if let Statement::ShowCreate {
149-
obj_type: ShowCreateObject::Table | ShowCreateObject::View,
150-
obj_name,
151-
} = statement
152-
{
153-
self.insert_relation(obj_name)
154-
}
155-
156-
// SHOW statements will later be rewritten into a SELECT from the information_schema
157-
let requires_information_schema = matches!(
158-
statement,
159-
Statement::ShowFunctions { .. }
160-
| Statement::ShowVariable { .. }
161-
| Statement::ShowStatus { .. }
162-
| Statement::ShowVariables { .. }
163-
| Statement::ShowCreate { .. }
164-
| Statement::ShowColumns { .. }
165-
| Statement::ShowTables { .. }
166-
| Statement::ShowCollation { .. }
167-
);
168-
if requires_information_schema {
169-
for s in INFORMATION_SCHEMA_TABLES {
170-
self.relations.insert(ObjectName(vec![
171-
Ident::new(INFORMATION_SCHEMA),
172-
Ident::new(*s),
173-
]));
174-
}
175-
}
176-
ControlFlow::Continue(())
177-
}
178-
}
179-
180-
let mut visitor = RelationVisitor {
181-
relations: BTreeSet::new(),
182-
all_ctes: BTreeSet::new(),
183-
ctes_in_scope: vec![],
184-
};
185-
186-
fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor) {
187-
match statement {
188-
DFStatement::Statement(s) => {
189-
let _ = s.as_ref().visit(visitor);
190-
}
191-
DFStatement::CreateExternalTable(table) => {
192-
visitor.relations.insert(table.name.clone());
193-
}
194-
DFStatement::CopyTo(CopyToStatement { source, .. }) => match source {
195-
CopyToSource::Relation(table_name) => {
196-
visitor.insert_relation(table_name);
197-
}
198-
CopyToSource::Query(query) => {
199-
query.visit(visitor);
200-
}
201-
},
202-
DFStatement::Explain(explain) => visit_statement(&explain.statement, visitor),
203-
}
204-
}
205-
206-
visit_statement(statement, &mut visitor);
207-
208-
let table_refs = visitor
209-
.relations
210-
.into_iter()
211-
.map(|x| object_name_to_table_reference(x, enable_ident_normalization))
212-
.collect::<datafusion_common::Result<_>>()?;
213-
let ctes = visitor
214-
.all_ctes
215-
.into_iter()
216-
.map(|x| object_name_to_table_reference(x, enable_ident_normalization))
217-
.collect::<datafusion_common::Result<_>>()?;
218-
Ok((table_refs, ctes))
219-
}
220-
221-
#[cfg(test)]
222-
mod tests {
223-
use super::*;
224-
225-
#[test]
226-
fn resolve_table_references_shadowed_cte() {
227-
use datafusion_sql::parser::DFParser;
228-
229-
// An interesting edge case where the `t` name is used both as an ordinary table reference
230-
// and as a CTE reference.
231-
let query = "WITH t AS (SELECT * FROM t) SELECT * FROM t";
232-
let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
233-
let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
234-
assert_eq!(table_refs.len(), 1);
235-
assert_eq!(ctes.len(), 1);
236-
assert_eq!(ctes[0].to_string(), "t");
237-
assert_eq!(table_refs[0].to_string(), "t");
238-
239-
// UNION is a special case where the CTE is not in scope for the second branch.
240-
let query = "(with t as (select 1) select * from t) union (select * from t)";
241-
let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
242-
let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
243-
assert_eq!(table_refs.len(), 1);
244-
assert_eq!(ctes.len(), 1);
245-
assert_eq!(ctes[0].to_string(), "t");
246-
assert_eq!(table_refs[0].to_string(), "t");
247-
248-
// Nested CTEs are also handled.
249-
// Here the first `u` is a CTE, but the second `u` is a table reference.
250-
// While `t` is always a CTE.
251-
let query = "(with t as (with u as (select 1) select * from u) select * from u cross join t)";
252-
let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
253-
let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
254-
assert_eq!(table_refs.len(), 1);
255-
assert_eq!(ctes.len(), 2);
256-
assert_eq!(ctes[0].to_string(), "t");
257-
assert_eq!(ctes[1].to_string(), "u");
258-
assert_eq!(table_refs[0].to_string(), "u");
259-
}
260-
261-
#[test]
262-
fn resolve_table_references_recursive_cte() {
263-
use datafusion_sql::parser::DFParser;
264-
265-
let query = "
266-
WITH RECURSIVE nodes AS (
267-
SELECT 1 as id
268-
UNION ALL
269-
SELECT id + 1 as id
270-
FROM nodes
271-
WHERE id < 10
272-
)
273-
SELECT * FROM nodes
274-
";
275-
let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
276-
let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
277-
assert_eq!(table_refs.len(), 0);
278-
assert_eq!(ctes.len(), 1);
279-
assert_eq!(ctes[0].to_string(), "nodes");
280-
}
281-
}

datafusion/core/tests/optimizer/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
2626
use arrow_schema::{Fields, SchemaBuilder};
2727
use datafusion_common::config::ConfigOptions;
2828
use datafusion_common::tree_node::{TransformedResult, TreeNode};
29-
use datafusion_common::{plan_err, DFSchema, Result, ScalarValue};
29+
use datafusion_common::{plan_err, DFSchema, Result, ScalarValue, TableReference};
3030
use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
3131
use datafusion_expr::{
3232
col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator,
@@ -41,7 +41,6 @@ use datafusion_sql::planner::{ContextProvider, SqlToRel};
4141
use datafusion_sql::sqlparser::ast::Statement;
4242
use datafusion_sql::sqlparser::dialect::GenericDialect;
4343
use datafusion_sql::sqlparser::parser::Parser;
44-
use datafusion_sql::TableReference;
4544

4645
use chrono::DateTime;
4746
use datafusion_functions::datetime;

datafusion/optimizer/tests/optimizer_integration.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::sync::Arc;
2222
use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
2323

2424
use datafusion_common::config::ConfigOptions;
25-
use datafusion_common::{assert_contains, plan_err, Result};
25+
use datafusion_common::{assert_contains, plan_err, Result, TableReference};
2626
use datafusion_expr::sqlparser::dialect::PostgreSqlDialect;
2727
use datafusion_expr::test::function_stub::sum_udaf;
2828
use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF};
@@ -36,7 +36,6 @@ use datafusion_sql::planner::{ContextProvider, SqlToRel};
3636
use datafusion_sql::sqlparser::ast::Statement;
3737
use datafusion_sql::sqlparser::dialect::GenericDialect;
3838
use datafusion_sql::sqlparser::parser::Parser;
39-
use datafusion_sql::TableReference;
4039

4140
#[cfg(test)]
4241
#[ctor::ctor]

datafusion/sql/examples/sql.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use std::{collections::HashMap, sync::Arc};
2020
use arrow_schema::{DataType, Field, Schema};
2121

2222
use datafusion_common::config::ConfigOptions;
23-
use datafusion_common::{plan_err, Result};
23+
use datafusion_common::{plan_err, Result, TableReference};
2424
use datafusion_expr::planner::ExprPlanner;
2525
use datafusion_expr::WindowUDF;
2626
use datafusion_expr::{
@@ -32,7 +32,6 @@ use datafusion_functions_aggregate::sum::sum_udaf;
3232
use datafusion_sql::{
3333
planner::{ContextProvider, SqlToRel},
3434
sqlparser::{dialect::GenericDialect, parser::Parser},
35-
TableReference,
3635
};
3736

3837
fn main() {

datafusion/sql/src/expr/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,11 +1073,10 @@ mod tests {
10731073
use sqlparser::parser::Parser;
10741074

10751075
use datafusion_common::config::ConfigOptions;
1076+
use datafusion_common::TableReference;
10761077
use datafusion_expr::logical_plan::builder::LogicalTableSource;
10771078
use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
10781079

1079-
use crate::TableReference;
1080-
10811080
use super::*;
10821081

10831082
struct TestContextProvider {

datafusion/sql/src/lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pub mod parser;
4141
pub mod planner;
4242
mod query;
4343
mod relation;
44+
pub mod resolve;
4445
mod select;
4546
mod set_expr;
4647
mod stack;
@@ -49,6 +50,9 @@ mod statement;
4950
pub mod unparser;
5051
pub mod utils;
5152
mod values;
52-
53+
#[deprecated(
54+
since = "46.0.0",
55+
note = "use datafusion_common::{ResolvedTableReference, TableReference}"
56+
)]
5357
pub use datafusion_common::{ResolvedTableReference, TableReference};
5458
pub use sqlparser;

0 commit comments

Comments
 (0)