Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 91 additions & 50 deletions arrow-arith/src/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,11 +464,15 @@ impl ExtractDatePartExt for PrimitiveArray<IntervalDayTimeType> {
DatePart::Week => Ok(self.unary_opt(|d| Some(d.days / 7))),
DatePart::Day => Ok(self.unary_opt(|d| Some(d.days))),
DatePart::Hour => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 60 * 1_000)))),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

D SELECT datepart('hour', interval '25 hour');
┌───────────────────────────────────────────────┐
│ datepart('hour', CAST('25 hour' AS INTERVAL)) │
│                     int64                     │
├───────────────────────────────────────────────┤
│                      25                       │
└───────────────────────────────────────────────┘
D SELECT datepart('day', interval '25 hour');
┌──────────────────────────────────────────────┐
│ datepart('day', CAST('25 hour' AS INTERVAL)) │
│                    int64                     │
├──────────────────────────────────────────────┤
│                      0                       │
└──────────────────────────────────────────────┘

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, the current arrow behavior is the same it seems:

> SELECT datepart('hour', interval '25 hour');
+--------------------------------------------------------------------------------------------------------------------------+
| date_part(Utf8("hour"),IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 90000000000000 }")) |
+--------------------------------------------------------------------------------------------------------------------------+
| 25                                                                                                                       |
+--------------------------------------------------------------------------------------------------------------------------+
1 row(s) fetched.
Elapsed 0.030 seconds.

> SELECT datepart('day', interval '25 hour');
+-------------------------------------------------------------------------------------------------------------------------+
| date_part(Utf8("day"),IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 90000000000000 }")) |
+-------------------------------------------------------------------------------------------------------------------------+
| 0                                                                                                                       |
+-------------------------------------------------------------------------------------------------------------------------+

DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 1_000)))),
DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds / 1_000))),
DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds))),
DatePart::Microsecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000))),
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000_000))),
DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 1_000) % 60))),
DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds / 1_000 % 60))),
DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds % (60 * 1_000)))),
DatePart::Microsecond => {
Ok(self.unary_opt(|d| (d.milliseconds % (60 * 1_000)).checked_mul(1_000)))
}
DatePart::Nanosecond => {
Ok(self.unary_opt(|d| (d.milliseconds % (60 * 1_000)).checked_mul(1_000_000)))
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

D SELECT datepart('m', interval '1m 61s 33ms 44us');
┌─────────────────────────────────────────────────────┐
│ datepart('m', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│                        int64                        │
├─────────────────────────────────────────────────────┤
│                          2                          │
└─────────────────────────────────────────────────────┘
D SELECT datepart('s', interval '1m 61s 33ms 44us');
┌─────────────────────────────────────────────────────┐
│ datepart('s', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│                        int64                        │
├─────────────────────────────────────────────────────┤
│                          1                          │
└─────────────────────────────────────────────────────┘
D SELECT datepart('ms', interval '1m 61s 33ms 44us');
┌──────────────────────────────────────────────────────┐
│ datepart('ms', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│                        int64                         │
├──────────────────────────────────────────────────────┤
│                         1033                         │
└──────────────────────────────────────────────────────┘
D SELECT datepart('us', interval '1m 61s 33ms 44us');
┌──────────────────────────────────────────────────────┐
│ datepart('us', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│                        int64                         │
├──────────────────────────────────────────────────────┤
│                       1033044                        │
│                    (1.03 million)                    │
└──────────────────────────────────────────────────────┘

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a behavior change for arrow-rs.

duckdb says 1

SELECT datepart('s', interval '1m 61s 33ms 44us'); ┌─────────────────────────────────────────────────────┐ │ datepart('s', CAST('1m 61s 33ms 44us' AS INTERVAL)) │ │ int64 │ ├─────────────────────────────────────────────────────┤ │ 1 │ └─────────────────────────────────────────────────────┘

postgres says almost 1:

postgres=# SELECT extract (seconds from  interval '1m 61s 33ms 44us');
 extract
----------
 1.033044
(1 row)

Arrow says 121 (before this PR)

> SELECT datepart('s', interval '1m 61s 33ms 44us');
+---------------------------------------------------------------------------------------------------------------------+
| date_part(Utf8("s"),IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 121033044000 }")) |
+---------------------------------------------------------------------------------------------------------------------+
| 121                                                                                                                 |
+---------------------------------------------------------------------------------------------------------------------+
1 row(s) fetched.
Elapsed 0.004 seconds.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing for millisecond and microseconds


DatePart::Quarter
| DatePart::Year
Expand All @@ -488,25 +492,31 @@ impl ExtractDatePartExt for PrimitiveArray<IntervalMonthDayNanoType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
match part {
DatePart::Year => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months / 12))),
DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months))),
DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months % 12))),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

D SELECT datepart('year', interval '12 month');
┌────────────────────────────────────────────────┐
│ datepart('year', CAST('12 month' AS INTERVAL)) │
│                     int64                      │
├────────────────────────────────────────────────┤
│                       1                        │
└────────────────────────────────────────────────┘
D SELECT datepart('month', interval '12 month');
┌─────────────────────────────────────────────────┐
│ datepart('month', CAST('12 month' AS INTERVAL)) │
│                      int64                      │
├─────────────────────────────────────────────────┤
│                        0                        │
└─────────────────────────────────────────────────┘

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Postgres agrees

postgres=# SELECT extract (month from interval '12 month');
 extract
---------
       0
(1 row)

Before this PR arrow also currently says 12.

> SELECT datepart('month', interval '12 month');
+---------------------------------------------------------------------------------------------------------------+
| date_part(Utf8("month"),IntervalMonthDayNano("IntervalMonthDayNano { months: 12, days: 0, nanoseconds: 0 }")) |
+---------------------------------------------------------------------------------------------------------------+
| 12                                                                                                            |
+---------------------------------------------------------------------------------------------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds.

I think this change makes sense

DatePart::Week => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days / 7))),
DatePart::Day => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days))),
DatePart::Hour => {
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 60 * 1_000_000_000)).try_into().ok()))
}
DatePart::Minute => {
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000)).try_into().ok()))
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000) % 60).try_into().ok()))
}
DatePart::Second => {
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000_000).try_into().ok()))
Ok(self.unary_opt(|d| ((d.nanoseconds / 1_000_000_000) % 60).try_into().ok()))
}
DatePart::Millisecond => {
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000).try_into().ok()))
}
DatePart::Microsecond => {
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000).try_into().ok()))
DatePart::Millisecond => Ok(self.unary_opt(|d| {
(d.nanoseconds % (60 * 1_000_000_000) / 1_000_000)
.try_into()
.ok()
})),
DatePart::Microsecond => Ok(self.unary_opt(|d| {
(d.nanoseconds % (60 * 1_000_000_000) / 1_000)
.try_into()
.ok()
})),
DatePart::Nanosecond => {
Ok(self.unary_opt(|d| (d.nanoseconds % (60 * 1_000_000_000)).try_into().ok()))
}
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.nanoseconds.try_into().ok())),

DatePart::Quarter
| DatePart::WeekISO
Expand Down Expand Up @@ -1764,9 +1774,14 @@ mod tests {
fn test_interval_day_time_array() {
let input: IntervalDayTimeArray = vec![
IntervalDayTime::ZERO,
IntervalDayTime::new(10, 42),
IntervalDayTime::new(10, 1042),
IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1),
IntervalDayTime::new(10, 42), // 10d, 42ms
IntervalDayTime::new(10, 1042), // 10d, 1s, 42ms
IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1), // 10d, 24h, 1ms
IntervalDayTime::new(
6,
(MILLISECONDS * 60 * 60 * 4 + MILLISECONDS * 60 * 22 + MILLISECONDS * 11 + 3)
as i32,
), // 6d, 4h, 22m, 11s, 3ms
]
.into();

Expand All @@ -1777,58 +1792,65 @@ mod tests {
assert_eq!(10, actual.value(1));
assert_eq!(10, actual.value(2));
assert_eq!(10, actual.value(3));
assert_eq!(6, actual.value(4));

let actual = date_part(&input, DatePart::Week).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(1, actual.value(1));
assert_eq!(1, actual.value(2));
assert_eq!(1, actual.value(3));
assert_eq!(0, actual.value(4));

// Days doesn't affect time.
let actual = date_part(&input, DatePart::Nanosecond).unwrap();
let actual = date_part(&input, DatePart::Hour).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42_000_000, actual.value(1));
assert_eq!(1_042_000_000, actual.value(2));
// Overflow returns zero.
assert_eq!(0, actual.value(1));
assert_eq!(0, actual.value(2));
assert_eq!(24, actual.value(3));
assert_eq!(4, actual.value(4));

let actual = date_part(&input, DatePart::Minute).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(0, actual.value(2));
assert_eq!(0, actual.value(3));
assert_eq!(22, actual.value(4));

let actual = date_part(&input, DatePart::Microsecond).unwrap();
let actual = date_part(&input, DatePart::Second).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42_000, actual.value(1));
assert_eq!(1_042_000, actual.value(2));
// Overflow returns zero.
assert_eq!(0, actual.value(1));
assert_eq!(1, actual.value(2));
assert_eq!(0, actual.value(3));
assert_eq!(11, actual.value(4));

let actual = date_part(&input, DatePart::Millisecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42, actual.value(1));
assert_eq!(1042, actual.value(2));
assert_eq!(MILLISECONDS_IN_DAY as i32 + 1, actual.value(3));

let actual = date_part(&input, DatePart::Second).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(1, actual.value(2));
assert_eq!(24 * 60 * 60, actual.value(3));
assert_eq!(1, actual.value(3));
assert_eq!(11003, actual.value(4));

let actual = date_part(&input, DatePart::Minute).unwrap();
let actual = date_part(&input, DatePart::Microsecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(0, actual.value(2));
assert_eq!(24 * 60, actual.value(3));
assert_eq!(42_000, actual.value(1));
assert_eq!(1_042_000, actual.value(2));
assert_eq!(1_000, actual.value(3));
assert_eq!(11_003_000, actual.value(4));

let actual = date_part(&input, DatePart::Hour).unwrap();
let actual = date_part(&input, DatePart::Nanosecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(0, actual.value(2));
assert_eq!(24, actual.value(3));
assert_eq!(42_000_000, actual.value(1));
assert_eq!(1_042_000_000, actual.value(2));
assert_eq!(1_000_000, actual.value(3));
// Overflow returns zero.
assert_eq!(0, actual.value(4));

// Month and year are not valid (since days in month varies).
assert!(date_part(&input, DatePart::Month).is_err());
Expand All @@ -1841,8 +1863,18 @@ mod tests {
fn test_interval_month_day_nano_array() {
let input: IntervalMonthDayNanoArray = vec![
IntervalMonthDayNano::ZERO,
IntervalMonthDayNano::new(5, 10, 42),
IntervalMonthDayNano::new(16, 35, MILLISECONDS_IN_DAY * 1_000_000 + 1),
IntervalMonthDayNano::new(5, 10, 42), // 5m, 1w, 3d, 42ns
IntervalMonthDayNano::new(16, 35, NANOSECONDS_IN_DAY + 1), // 1y, 4m, 5w, 24h, 1ns
IntervalMonthDayNano::new(
0,
0,
NANOSECONDS * 60 * 60 * 4
+ NANOSECONDS * 60 * 22
+ NANOSECONDS * 11
+ 1_000_000 * 33
+ 1_000 * 44
+ 5,
), // 4hr, 22m, 11s, 33ms, 44us, 5ns
]
.into();

Expand All @@ -1852,64 +1884,73 @@ mod tests {
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(1, actual.value(2));
assert_eq!(0, actual.value(3));

let actual = date_part(&input, DatePart::Month).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(5, actual.value(1));
assert_eq!(16, actual.value(2));
assert_eq!(4, actual.value(2));
assert_eq!(0, actual.value(3));

// Week and day follow from day, but are not affected by months or nanos.
let actual = date_part(&input, DatePart::Week).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(1, actual.value(1));
assert_eq!(5, actual.value(2));
assert_eq!(0, actual.value(3));

let actual = date_part(&input, DatePart::Day).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(10, actual.value(1));
assert_eq!(35, actual.value(2));
assert_eq!(0, actual.value(3));

// Times follow from nanos, but are not affected by months or days.
let actual = date_part(&input, DatePart::Hour).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24, actual.value(2));
assert_eq!(4, actual.value(3));

let actual = date_part(&input, DatePart::Minute).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24 * 60, actual.value(2));
assert_eq!(0, actual.value(2));
assert_eq!(22, actual.value(3));

let actual = date_part(&input, DatePart::Second).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24 * 60 * 60, actual.value(2));
assert_eq!(0, actual.value(2));
assert_eq!(11, actual.value(3));

let actual = date_part(&input, DatePart::Millisecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24 * 60 * 60 * 1_000, actual.value(2));
assert_eq!(0, actual.value(2));
assert_eq!(11_033, actual.value(3));

let actual = date_part(&input, DatePart::Microsecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
// Overflow gives zero.
assert_eq!(0, actual.value(2));
assert_eq!(11_033_044, actual.value(3));

let actual = date_part(&input, DatePart::Nanosecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42, actual.value(1));
// Overflow gives zero.
assert_eq!(0, actual.value(2));
assert_eq!(1, actual.value(2));
// Overflow returns zero.
assert_eq!(0, actual.value(3));
}

#[test]
Expand Down
Loading