From a1a4db3774e7e0911e710ed1a99694add29df545 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 2 Jun 2018 00:06:55 +0800 Subject: [PATCH 1/3] Add support for date extract --- .../spark/sql/catalyst/parser/SqlBase.g4 | 11 +++ .../sql/catalyst/parser/AstBuilder.scala | 24 ++++++ .../parser/TableIdentifierParserSuite.scala | 3 +- .../resources/sql-tests/inputs/extract.sql | 17 +++++ .../sql-tests/results/extract.sql.out | 74 +++++++++++++++++++ 5 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/extract.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/extract.sql.out diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 7c54851097af3..5c07b494773f1 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -592,6 +592,7 @@ primaryExpression | identifier #columnReference | base=primaryExpression '.' fieldName=identifier #dereference | '(' expression ')' #parenthesizedExpression + | EXTRACT '(' field=(YEAR | QUARTER | MONTH | WEEK | DAY | HOUR | MINUTE | SECOND) FROM source=valueExpression ')' #extract ; constant @@ -739,6 +740,7 @@ nonReserved | VIEW | REPLACE | IF | POSITION + | EXTRACT | YEAR | QUARTER | MONTH | WEEK | DAY | HOUR | MINUTE | SECOND | NO | DATA | START | TRANSACTION | COMMIT | ROLLBACK | IGNORE | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION @@ -878,6 +880,15 @@ TRAILING: 'TRAILING'; IF: 'IF'; POSITION: 'POSITION'; +EXTRACT: 'EXTRACT'; +YEAR: 'YEAR'; +QUARTER: 'QUARTER'; +MONTH: 'MONTH'; +WEEK: 'WEEK'; +DAY: 'DAY'; +HOUR: 'HOUR'; +MINUTE: 'MINUTE'; +SECOND: 'SECOND'; EQ : '=' | '=='; NSEQ: '<=>'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index b9ece295c2510..5a8117f47f04c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1206,6 +1206,30 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging new StringLocate(expression(ctx.substr), expression(ctx.str)) } + /** + * Create a Extract expression. + */ + override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) { + ctx.field.getType match { + case SqlBaseParser.YEAR => + Year(expression(ctx.source)) + case SqlBaseParser.QUARTER => + Quarter(expression(ctx.source)) + case SqlBaseParser.MONTH => + Month(expression(ctx.source)) + case SqlBaseParser.WEEK => + WeekOfYear(expression(ctx.source)) + case SqlBaseParser.DAY => + DayOfMonth(expression(ctx.source)) + case SqlBaseParser.HOUR => + Hour(expression(ctx.source)) + case SqlBaseParser.MINUTE => + Minute(expression(ctx.source)) + case SqlBaseParser.SECOND => + Second(expression(ctx.source)) + } + } + /** * Create a (windowed) Function expression. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 89903c2825125..039743a7eceae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -51,7 +51,8 @@ class TableIdentifierParserSuite extends SparkFunSuite { "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", - "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing") + "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", + "extract", "year", "quarter", "month", "week", "day", "hour", "minute", "second") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql new file mode 100644 index 0000000000000..702ce9ac42577 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -0,0 +1,17 @@ +CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c; + +select extract(year from c) from t; + +select extract(quarter from c) from t; + +select extract(month from c) from t; + +select extract(week from c) from t; + +select extract(day from c) from t; + +select extract(hour from c) from t; + +select extract(minute from c) from t; + +select extract(second from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out new file mode 100644 index 0000000000000..65a1f9e6cfbca --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -0,0 +1,74 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 9 + + +-- !query 0 +CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +select extract(year from c) from t +-- !query 1 schema +struct +-- !query 1 output +2011 + + +-- !query 2 +select extract(quarter from c) from t +-- !query 2 schema +struct +-- !query 2 output +2 + + +-- !query 3 +select extract(month from c) from t +-- !query 3 schema +struct +-- !query 3 output +5 + + +-- !query 4 +select extract(week from c) from t +-- !query 4 schema +struct +-- !query 4 output +18 + + +-- !query 5 +select extract(day from c) from t +-- !query 5 schema +struct +-- !query 5 output +6 + + +-- !query 6 +select extract(hour from c) from t +-- !query 6 schema +struct +-- !query 6 output +7 + + +-- !query 7 +select extract(minute from c) from t +-- !query 7 schema +struct +-- !query 7 output +8 + + +-- !query 8 +select extract(second from c) from t +-- !query 8 schema +struct +-- !query 8 output +9 From c9d2bc348495669bd4347679547f1437f35367f1 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 2 Jun 2018 06:50:49 +0800 Subject: [PATCH 2/3] Add DOW/dayofweek --- .../spark/sql/catalyst/parser/SqlBase.g4 | 5 ++- .../sql/catalyst/parser/AstBuilder.scala | 45 ++++++++++++------- .../parser/TableIdentifierParserSuite.scala | 2 +- .../resources/sql-tests/inputs/extract.sql | 4 ++ .../sql-tests/results/extract.sql.out | 40 +++++++++++++---- 5 files changed, 67 insertions(+), 29 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 5c07b494773f1..c4c078a9b09ef 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -592,7 +592,7 @@ primaryExpression | identifier #columnReference | base=primaryExpression '.' fieldName=identifier #dereference | '(' expression ')' #parenthesizedExpression - | EXTRACT '(' field=(YEAR | QUARTER | MONTH | WEEK | DAY | HOUR | MINUTE | SECOND) FROM source=valueExpression ')' #extract + | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract ; constant @@ -740,7 +740,7 @@ nonReserved | VIEW | REPLACE | IF | POSITION - | EXTRACT | YEAR | QUARTER | MONTH | WEEK | DAY | HOUR | MINUTE | SECOND + | EXTRACT | YEAR | QUARTER | MONTH | WEEK | DAY | DOW | HOUR | MINUTE | SECOND | NO | DATA | START | TRANSACTION | COMMIT | ROLLBACK | IGNORE | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION @@ -886,6 +886,7 @@ QUARTER: 'QUARTER'; MONTH: 'MONTH'; WEEK: 'WEEK'; DAY: 'DAY'; +DOW: 'DOW'; HOUR: 'HOUR'; MINUTE: 'MINUTE'; SECOND: 'SECOND'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5a8117f47f04c..ddf6fc946bb3a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1210,23 +1210,34 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create a Extract expression. */ override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) { - ctx.field.getType match { - case SqlBaseParser.YEAR => - Year(expression(ctx.source)) - case SqlBaseParser.QUARTER => - Quarter(expression(ctx.source)) - case SqlBaseParser.MONTH => - Month(expression(ctx.source)) - case SqlBaseParser.WEEK => - WeekOfYear(expression(ctx.source)) - case SqlBaseParser.DAY => - DayOfMonth(expression(ctx.source)) - case SqlBaseParser.HOUR => - Hour(expression(ctx.source)) - case SqlBaseParser.MINUTE => - Minute(expression(ctx.source)) - case SqlBaseParser.SECOND => - Second(expression(ctx.source)) + val extractType = ctx.field.getText.toUpperCase(Locale.ROOT) + try { + extractType match { + case "YEAR" => + Year(expression(ctx.source)) + case "QUARTER" => + Quarter(expression(ctx.source)) + case "MONTH" => + Month(expression(ctx.source)) + case "WEEK" => + WeekOfYear(expression(ctx.source)) + case "DAY" => + DayOfMonth(expression(ctx.source)) + case "DOW" => + DayOfWeek(expression(ctx.source)) + case "HOUR" => + Hour(expression(ctx.source)) + case "MINUTE" => + Minute(expression(ctx.source)) + case "SECOND" => + Second(expression(ctx.source)) + case other => + throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) + } + } catch { + case e: IllegalArgumentException => + val message = Option(e.getMessage).getOrElse(s"Exception parsing $extractType") + throw new ParseException(message, ctx) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 039743a7eceae..a2fa08093a29c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -52,7 +52,7 @@ class TableIdentifierParserSuite extends SparkFunSuite { "true", "truncate", "update", "user", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", - "extract", "year", "quarter", "month", "week", "day", "hour", "minute", "second") + "extract", "year", "quarter", "month", "week", "day", "dow", "hour", "minute", "second") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index 702ce9ac42577..49e931211264e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -10,8 +10,12 @@ select extract(week from c) from t; select extract(day from c) from t; +select extract(dow from c) from t; + select extract(hour from c) from t; select extract(minute from c) from t; select extract(second from c) from t; + +select extract(not_supported from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 65a1f9e6cfbca..852920b8ce9f2 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 9 +-- Number of queries: 11 -- !query 0 @@ -51,24 +51,46 @@ struct -- !query 6 -select extract(hour from c) from t +select extract(dow from c) from t -- !query 6 schema -struct +struct -- !query 6 output -7 +6 -- !query 7 -select extract(minute from c) from t +select extract(hour from c) from t -- !query 7 schema -struct +struct -- !query 7 output -8 +7 -- !query 8 -select extract(second from c) from t +select extract(minute from c) from t -- !query 8 schema -struct +struct -- !query 8 output +8 + + +-- !query 9 +select extract(second from c) from t +-- !query 9 schema +struct +-- !query 9 output 9 + + +-- !query 10 +select extract(not_supported from c) from t +-- !query 10 schema +struct<> +-- !query 10 output +org.apache.spark.sql.catalyst.parser.ParseException + +Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) + +== SQL == +select extract(not_supported from c) from t +-------^^^ From 0ad3dd75bc1a74ca88c9ace8899fd2729aaa16b5 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 2 Jun 2018 08:49:03 +0800 Subject: [PATCH 3/3] dow -> dayofweek --- .../spark/sql/catalyst/parser/SqlBase.g4 | 11 +---- .../sql/catalyst/parser/AstBuilder.scala | 49 ++++++++----------- .../parser/TableIdentifierParserSuite.scala | 3 +- .../resources/sql-tests/inputs/extract.sql | 2 +- .../sql-tests/results/extract.sql.out | 2 +- 5 files changed, 25 insertions(+), 42 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index c4c078a9b09ef..3fe00eefde7d8 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -740,7 +740,7 @@ nonReserved | VIEW | REPLACE | IF | POSITION - | EXTRACT | YEAR | QUARTER | MONTH | WEEK | DAY | DOW | HOUR | MINUTE | SECOND + | EXTRACT | NO | DATA | START | TRANSACTION | COMMIT | ROLLBACK | IGNORE | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION @@ -881,15 +881,6 @@ TRAILING: 'TRAILING'; IF: 'IF'; POSITION: 'POSITION'; EXTRACT: 'EXTRACT'; -YEAR: 'YEAR'; -QUARTER: 'QUARTER'; -MONTH: 'MONTH'; -WEEK: 'WEEK'; -DAY: 'DAY'; -DOW: 'DOW'; -HOUR: 'HOUR'; -MINUTE: 'MINUTE'; -SECOND: 'SECOND'; EQ : '=' | '=='; NSEQ: '<=>'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ddf6fc946bb3a..383ebde3229d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1210,34 +1210,27 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create a Extract expression. */ override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) { - val extractType = ctx.field.getText.toUpperCase(Locale.ROOT) - try { - extractType match { - case "YEAR" => - Year(expression(ctx.source)) - case "QUARTER" => - Quarter(expression(ctx.source)) - case "MONTH" => - Month(expression(ctx.source)) - case "WEEK" => - WeekOfYear(expression(ctx.source)) - case "DAY" => - DayOfMonth(expression(ctx.source)) - case "DOW" => - DayOfWeek(expression(ctx.source)) - case "HOUR" => - Hour(expression(ctx.source)) - case "MINUTE" => - Minute(expression(ctx.source)) - case "SECOND" => - Second(expression(ctx.source)) - case other => - throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) - } - } catch { - case e: IllegalArgumentException => - val message = Option(e.getMessage).getOrElse(s"Exception parsing $extractType") - throw new ParseException(message, ctx) + ctx.field.getText.toUpperCase(Locale.ROOT) match { + case "YEAR" => + Year(expression(ctx.source)) + case "QUARTER" => + Quarter(expression(ctx.source)) + case "MONTH" => + Month(expression(ctx.source)) + case "WEEK" => + WeekOfYear(expression(ctx.source)) + case "DAY" => + DayOfMonth(expression(ctx.source)) + case "DAYOFWEEK" => + DayOfWeek(expression(ctx.source)) + case "HOUR" => + Hour(expression(ctx.source)) + case "MINUTE" => + Minute(expression(ctx.source)) + case "SECOND" => + Second(expression(ctx.source)) + case other => + throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index a2fa08093a29c..ff0de0fb7c1f0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -51,8 +51,7 @@ class TableIdentifierParserSuite extends SparkFunSuite { "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", - "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", - "extract", "year", "quarter", "month", "week", "day", "dow", "hour", "minute", "second") + "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index 49e931211264e..9adf5d70056e2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -10,7 +10,7 @@ select extract(week from c) from t; select extract(day from c) from t; -select extract(dow from c) from t; +select extract(dayofweek from c) from t; select extract(hour from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 852920b8ce9f2..160e4c7d78455 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -51,7 +51,7 @@ struct -- !query 6 -select extract(dow from c) from t +select extract(dayofweek from c) from t -- !query 6 schema struct -- !query 6 output