Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions datafusion/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3634,6 +3634,173 @@ mod tests {
assert_eq!(result[0].schema().metadata(), result[1].schema().metadata());
}

#[tokio::test]
async fn normalized_column_identifiers() {
// create local execution context
let mut ctx = ExecutionContext::new();

// register csv file with the execution context
ctx.register_csv(
"case_insensitive_test",
"tests/example.csv",
CsvReadOptions::new(),
)
.await
.unwrap();

let sql = "SELECT A, b FROM case_insensitive_test";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Aliases

let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Order by

let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Where

let sql = "SELECT a, b FROM case_insensitive_test where A IS NOT null";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Group by

let sql = "SELECT a as x, count(*) as c FROM case_insensitive_test GROUP BY X";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | c |",
"+---+---+",
"| 1 | 1 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql =
r#"SELECT a as "X", count(*) as c FROM case_insensitive_test GROUP BY "X""#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | c |",
"+---+---+",
"| 1 | 1 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);
}

struct MyPhysicalPlanner {}

#[async_trait]
Expand Down
18 changes: 6 additions & 12 deletions datafusion/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::logical_plan::{
use crate::optimizer::utils::exprlist_to_columns;
use crate::prelude::JoinType;
use crate::scalar::ScalarValue;
use crate::sql::utils::make_decimal_type;
use crate::sql::utils::{make_decimal_type, normalize_ident};
use crate::{
error::{DataFusionError, Result},
physical_plan::udaf::AggregateUDF,
Expand Down Expand Up @@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
Box::new(self.sql_to_rex(expr, schema)?),
alias.value.clone(),
normalize_ident(alias),
)),
SelectItem::Wildcard => Ok(Expr::Wildcard),
SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
Expand Down Expand Up @@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

SQLExpr::Identifier(ref id) => {
if id.value.starts_with('@') {
// TODO: figure out if ScalarVariables should be insensitive.
let var_names = vec![id.value.clone()];
Ok(Expr::ScalarVariable(var_names))
} else {
Expand All @@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// identifier. (e.g. it is "foo.bar" not foo.bar)
Ok(Expr::Column(Column {
relation: None,
name: id.value.clone(),
name: normalize_ident(id),
}))
}
}
Expand All @@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}

SQLExpr::CompoundIdentifier(ids) => {
let mut var_names: Vec<_> =
ids.iter().map(|id| id.value.clone()).collect();
let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect();

if &var_names[0][0..1] == "@" {
Ok(Expr::ScalarVariable(var_names))
Expand Down Expand Up @@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// (e.g. "foo.bar") for function names yet
function.name.to_string()
} else {
// if there is a quote style, then don't normalize
// the name, otherwise normalize to lowercase
let ident = &function.name.0[0];
match ident.quote_style {
Some(_) => ident.value.clone(),
None => ident.value.to_ascii_lowercase(),
}
normalize_ident(&function.name.0[0])
};

// first, scalar built-in
Expand Down
9 changes: 9 additions & 0 deletions datafusion/src/sql/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! SQL Utility Functions

use arrow::datatypes::DataType;
use sqlparser::ast::Ident;

use crate::logical_plan::{Expr, LogicalPlan};
use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128};
Expand Down Expand Up @@ -532,6 +533,14 @@ pub(crate) fn make_decimal_type(
}
}

// Normalize an identifer to a lowercase string unless the identifier is quoted.
pub(crate) fn normalize_ident(id: &Ident) -> String {
match id.quote_style {
Some(_) => id.value.clone(),
None => id.value.to_ascii_lowercase(),
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down