Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ primaryExpression
| identifier #columnReference
| base=primaryExpression '.' fieldName=identifier #dereference
| '(' expression ')' #parenthesizedExpression
| EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon @maropu @wangyum @huaxingao Just realized EXTRACT is not included in https://spark.apache.org/docs/latest/api/sql/index.html Could we fix it in the upcoming built-in function doc page updates?

Copy link
Member

@maropu maropu Apr 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. Nice catch! The python script that we are now working on (#28224) just dumps the entries of ExpressionDescription(ExpressionInfo), so the output unfortunately cannot include a doc entry for EXTRACT now. To document it, there are the three options that I can think of;

  • (the simplest fix) Add some description about EXTRACT in the SELECT syntax page (e.g., the named_expression section), then add a link to date_part in the built-in function page.

  • Add a dummy ExpressionDescription for EXTRACT like this;

@ExpressionDescription(
  usage = "_FUNC_(field FROM source) - Extracts a part of the date/timestamp or interval source.",
  arguments = """ ... """,
  examples = """
    Examples:
      > SELECT _FUNC_(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456');
       2019
  """,
  since = "3.0.0")
case class Extract(...) extends DatePart(field, source, child)
  • Add a new entry for an alias name in ExpressionDescription like this;
@ExpressionDescription(
  usage = "_FUNC_(field FROM source) - Extracts a part of the date/timestamp or interval source.",
  arguments = """... """,
  alias = "extract",
  examples = """
    Examples:
      ...
      > SELECT _FUNC_ALIAS_(seconds FROM interval 5 hours 30 seconds 1 milliseconds 1 microseconds);
       30.001001
  """,
  since = "3.0.0")
case class DatePart(...) extends RuntimeReplaceable {

Which one is preferred, or any other smarter idea?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EXTRACT is not an alias as it has different syntax. The second approach looks good.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the check, @cloud-fan. ok, I'll open a PR to follow that approach.

;

constant
Expand Down Expand Up @@ -739,6 +740,7 @@ nonReserved
| VIEW | REPLACE
| IF
| POSITION
| EXTRACT
| NO | DATA
| START | TRANSACTION | COMMIT | ROLLBACK | IGNORE
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
Expand Down Expand Up @@ -878,6 +880,7 @@ TRAILING: 'TRAILING';

IF: 'IF';
POSITION: 'POSITION';
EXTRACT: 'EXTRACT';

EQ : '=' | '==';
NSEQ: '<=>';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,34 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
new StringLocate(expression(ctx.substr), expression(ctx.str))
}

/**
* Create a Extract expression.
*/
override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) {
ctx.field.getText.toUpperCase(Locale.ROOT) match {
case "YEAR" =>
Year(expression(ctx.source))
case "QUARTER" =>
Quarter(expression(ctx.source))
case "MONTH" =>
Month(expression(ctx.source))
case "WEEK" =>
WeekOfYear(expression(ctx.source))
case "DAY" =>
DayOfMonth(expression(ctx.source))
case "DAYOFWEEK" =>
DayOfWeek(expression(ctx.source))
case "HOUR" =>
Hour(expression(ctx.source))
case "MINUTE" =>
Minute(expression(ctx.source))
case "SECOND" =>
Second(expression(ctx.source))
case other =>
throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
}
}

/**
* Create a (windowed) Function expression.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class TableIdentifierParserSuite extends SparkFunSuite {
"rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
"true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
"bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
"int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing")
"int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract")

val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
"natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
Expand Down
21 changes: 21 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/extract.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c;

select extract(year from c) from t;

select extract(quarter from c) from t;

select extract(month from c) from t;

select extract(week from c) from t;

select extract(day from c) from t;

select extract(dayofweek from c) from t;

select extract(hour from c) from t;

select extract(minute from c) from t;

select extract(second from c) from t;

select extract(not_supported from c) from t;
96 changes: 96 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/extract.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 11


-- !query 0
CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
select extract(year from c) from t
-- !query 1 schema
struct<year(CAST(c AS DATE)):int>
-- !query 1 output
2011


-- !query 2
select extract(quarter from c) from t
-- !query 2 schema
struct<quarter(CAST(c AS DATE)):int>
-- !query 2 output
2


-- !query 3
select extract(month from c) from t
-- !query 3 schema
struct<month(CAST(c AS DATE)):int>
-- !query 3 output
5


-- !query 4
select extract(week from c) from t
-- !query 4 schema
struct<weekofyear(CAST(c AS DATE)):int>
-- !query 4 output
18


-- !query 5
select extract(day from c) from t
-- !query 5 schema
struct<dayofmonth(CAST(c AS DATE)):int>
-- !query 5 output
6


-- !query 6
select extract(dayofweek from c) from t
-- !query 6 schema
struct<dayofweek(CAST(c AS DATE)):int>
-- !query 6 output
6


-- !query 7
select extract(hour from c) from t
-- !query 7 schema
struct<hour(CAST(c AS TIMESTAMP)):int>
-- !query 7 output
7


-- !query 8
select extract(minute from c) from t
-- !query 8 schema
struct<minute(CAST(c AS TIMESTAMP)):int>
-- !query 8 output
8


-- !query 9
select extract(second from c) from t
-- !query 9 schema
struct<second(CAST(c AS TIMESTAMP)):int>
-- !query 9 output
9


-- !query 10
select extract(not_supported from c) from t
-- !query 10 schema
struct<>
-- !query 10 output
org.apache.spark.sql.catalyst.parser.ParseException

Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7)

== SQL ==
select extract(not_supported from c) from t
-------^^^