-
Notifications
You must be signed in to change notification settings - Fork 1.8k
feat: support month and year interval for date_bin on constant data #5982
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
393bf52
ce5a3a5
b81b272
d3d8c89
403bd5d
7117429
ec29f82
0dc4329
75483c0
fd66b16
b45d6e0
279b264
7c7aad5
f5af8a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -421,7 +421,7 @@ SELECT DATE_BIN(INTERVAL '5 microseconds', TIMESTAMP '2022-08-03 14:38:50.000006 | |
| 2022-08-03T14:38:50.000005 | ||
|
|
||
| # Does not support months for Month-Day-Nano interval | ||
| statement error This feature is not implemented: DATE_BIN stride does not support month intervals | ||
| statement error DataFusion error: This feature is not implemented: DATE_BIN stride does not support combination of month, day and nanosecond intervals | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. new error message I have added in this PR |
||
| SELECT DATE_BIN(INTERVAL '1 month 5 nanoseconds', TIMESTAMP '2022-08-03 14:38:50.000000006Z', TIMESTAMP '1970-01-01T00:00:00Z') | ||
|
|
||
| # Can coerce string interval arguments | ||
|
|
@@ -500,6 +500,265 @@ FROM ( | |
| (TIMESTAMP '2021-06-10 17:19:10Z', TIMESTAMP '2001-01-01T00:00:00Z', 0.3) | ||
| ) as t (time, origin, val) | ||
|
|
||
|
|
||
| # month interval with INTERVAL keyword in date_bin with default start time | ||
| query P | ||
| select date_bin(INTERVAL '1 month', column1) | ||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2022-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00') | ||
| ) as sq | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-02-01T00:00:00 | ||
| 2022-02-01T00:00:00 | ||
| 2022-03-01T00:00:00 | ||
|
|
||
|
|
||
| # year interval in date_bin with default start time | ||
| query P | ||
| select date_bin(INTERVAL '1 year', column1) | ||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2023-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00') | ||
| ) as sq | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
| 2023-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
|
|
||
| query P | ||
| SELECT DATE_BIN('1 month', '2022-01-01 00:00:00Z', '1970-01-01T00:00:00Z'); | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
|
|
||
|
|
||
| # Tests without INTERVAL keyword | ||
alamb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # 1-month interval in date_bin with default start time | ||
| query P | ||
| select date_bin('1 month', column1) | ||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2022-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00') | ||
| ) as sq | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-02-01T00:00:00 | ||
| 2022-02-01T00:00:00 | ||
| 2022-03-01T00:00:00 | ||
|
|
||
| # 2-month interval in date_bin with default start time | ||
| query P | ||
| select date_bin('2 month', column1) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2022-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00') | ||
| ) as sq | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-03-01T00:00:00 | ||
|
|
||
|
|
||
| # month interval with start date end of the month plus some minutes | ||
| # | ||
| # The the return of `date_bin` is the start of the bin. The bin width is one year. | ||
| # The source data must be inside the bin. | ||
| # Since the origin is '1970-12-31T00:15:00Z', the start of the bins are | ||
| # '1970-12-31T00:15:00Z', | ||
| # '1971-12-31T00:15:00Z', | ||
| # ..., | ||
| # '2021-12-31T00:15:00Z', | ||
| # '2022-12-31T00:15:00Z', | ||
| # ... | ||
| # | ||
| # Note the datetime '2022-03-31 00:00:00'. Its bin is NOT '2022-03-31 00:15:00' which is after its time | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
| # Its bin is '2022-02-28T00:15:00' | ||
| # | ||
| query P | ||
| select date_bin('1 month', column1, '1970-12-31T00:15:00Z') | ||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2022-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00') | ||
| ) as sq | ||
| ---- | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2022-01-31T00:15:00 | ||
| 2022-01-31T00:15:00 | ||
| 2022-02-28T00:15:00 | ||
|
|
||
| # month interval with start date is end of the month plus some minutes | ||
| query P | ||
| select date_bin('2 months', column1, '1970-12-31T00:15:00Z') | ||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2022-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00') | ||
| ) as sq | ||
| ---- | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2022-02-28T00:15:00 | ||
|
|
||
| # year interval in date_bin with default start time | ||
| query P | ||
| select date_bin('1 year', column1) | ||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2022-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00'), | ||
| (timestamp '2023-10-28 01:33:00') | ||
| ) as sq | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2022-01-01T00:00:00 | ||
| 2023-01-01T00:00:00 | ||
|
|
||
| # year interval with start date is end of the month plus some minutes | ||
| query P | ||
| select date_bin('1 year', column1, '1970-12-31T00:15:00Z') | ||
| from (values | ||
| (timestamp '2022-01-01 00:00:00'), | ||
| (timestamp '2022-01-01 01:00:00'), | ||
| (timestamp '2022-01-02 00:00:00'), | ||
| (timestamp '2022-02-02 00:00:00'), | ||
| (timestamp '2022-02-15 00:00:00'), | ||
| (timestamp '2022-03-31 00:00:00'), | ||
| (timestamp '2023-03-31 00:00:00') | ||
| ) as sq | ||
| ---- | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why the timestamp would be binned into the prior year (2021)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return data is the start of the bin. The bin width is one year. The source data must be inside the bin. Since the origin is '1970-12-31T00:15:00Z', the start of the bins are '1970-12-31T00:15:00Z', '1971-12-31T00:15:00Z', ..., '2021-12-31T00:15:00Z', '2022-12-31T00:15:00Z', ... '2022-01-02 00:00:00' is between '2021-12-31T00:15:00Z' and '2022-12-31T00:15:00Z' so it must be in the bin that starts with '2021-12-31T00:15:00Z' |
||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2021-12-31T00:15:00 | ||
| 2022-12-31T00:15:00 | ||
|
|
||
| # month interval on constant | ||
NGA-TRAN marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| query P | ||
| SELECT DATE_BIN('1 month', '2022-01-01 00:00:00Z'); | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
|
|
||
| # five months interval on constant | ||
| query P | ||
| SELECT DATE_BIN('5 month', '2022-01-01T00:00:00Z'); | ||
| ---- | ||
| 2021-09-01T00:00:00 | ||
|
|
||
| # month interval with default start time | ||
| query P | ||
| SELECT DATE_BIN('1 month', '2022-01-01 00:00:00Z'); | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
|
|
||
| # origin on the first date but not at midnight | ||
| query P | ||
| SELECT DATE_BIN('1 month', '2022-04-01T00:00:00Z', '2021-05-01T00:04:00Z'); | ||
| ---- | ||
| 2022-03-01T00:04:00 | ||
|
|
||
| # origin is May 31 (last date of the month) to produce bin on Feb 28 | ||
| query P | ||
| SELECT DATE_BIN('3 month', '2022-04-01T00:00:00Z', '2021-05-31T00:04:00Z'); | ||
| ---- | ||
| 2022-02-28T00:04:00 | ||
|
|
||
| # origin is on Feb 29 and interval is one month. The bins will be: | ||
| # '2000-02-29T00:00:00' | ||
| # '2000-01-29T00:00:00' | ||
| # '1999-12-29T00:00:00' | ||
| # .... | ||
| # Reason: Even though 29 (or 28 for non-leap year) is the last date of Feb but it | ||
| # is not last date of other month. Months' chrono consider a month before or after that | ||
| # will land on the same 29th date. | ||
| query P | ||
| select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-02-29T00:00:00'); | ||
| ---- | ||
| 2000-01-29T00:00:00 | ||
|
|
||
| # similar for the origin March 29 | ||
| query P | ||
| select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-03-29T00:00:00'); | ||
| ---- | ||
| 2000-01-29T00:00:00 | ||
|
|
||
| # any value of origin | ||
| query P | ||
| SELECT DATE_BIN('3 month', '2022-01-01T00:00:00Z', '2021-05-05T17:56:21Z'); | ||
| ---- | ||
| 2021-11-05T17:56:21 | ||
|
|
||
| # origin is later than source | ||
| query P | ||
| SELECT DATE_BIN('3 month', '2022-01-01T00:00:00Z', '2022-05-05T17:56:21Z'); | ||
| ---- | ||
| 2021-11-05T17:56:21 | ||
|
|
||
| # year interval on constant | ||
| query P | ||
| SELECT DATE_BIN('1 year', '2022-01-01 00:00:00Z'); | ||
| ---- | ||
| 2022-01-01T00:00:00 | ||
|
|
||
| # 3-year interval on constant | ||
| query P | ||
| SELECT DATE_BIN('3 year', '2022-01-01 00:00:00Z'); | ||
| ---- | ||
| 2021-01-01T00:00:00 | ||
|
|
||
| # 3 year 1 months = 37 months | ||
| query P | ||
| SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z'); | ||
| ---- | ||
| 2022-06-01T00:00:00 | ||
|
|
||
| ### | ||
| ## test date_trunc function | ||
| ### | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great test coverage and comments, thanks @NGA-TRAN 💯