@@ -41,7 +41,10 @@ use std::sync::Arc;
4141use crate :: buffer:: MutableBuffer ;
4242use crate :: compute:: kernels:: arithmetic:: { divide, multiply} ;
4343use crate :: compute:: kernels:: arity:: unary;
44- use crate :: compute:: kernels:: cast_utils:: string_to_timestamp_nanos;
44+ use crate :: compute:: kernels:: cast_utils:: {
45+ parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
46+ string_to_timestamp_nanos,
47+ } ;
4548use crate :: datatypes:: * ;
4649use crate :: error:: { ArrowError , Result } ;
4750use crate :: { array:: * , compute:: take} ;
@@ -176,9 +179,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
176179
177180 ( Utf8 , LargeUtf8 ) => true ,
178181 ( LargeUtf8 , Utf8 ) => true ,
179- ( Utf8 , Date32 | Date64 | Timestamp ( TimeUnit :: Nanosecond , None ) ) => true ,
182+ ( Utf8 , Date32 | Date64 | Timestamp ( TimeUnit :: Nanosecond , None ) | Interval ( _ ) ) => true ,
180183 ( Utf8 , _) => DataType :: is_numeric ( to_type) ,
181- ( LargeUtf8 , Date32 | Date64 | Timestamp ( TimeUnit :: Nanosecond , None ) ) => true ,
184+ ( LargeUtf8 , Date32 | Date64 | Timestamp ( TimeUnit :: Nanosecond , None ) | Interval ( _ ) ) => true ,
182185 ( LargeUtf8 , _) => DataType :: is_numeric ( to_type) ,
183186 ( Timestamp ( _, _) , Utf8 ) | ( Timestamp ( _, _) , LargeUtf8 ) => true ,
184187 ( Date32 , Utf8 ) | ( Date32 , LargeUtf8 ) => true ,
@@ -764,6 +767,15 @@ pub fn cast_with_options(
764767 Timestamp ( TimeUnit :: Nanosecond , None ) => {
765768 cast_string_to_timestamp_ns :: < i32 > ( & * * array, cast_options)
766769 }
770+ Interval ( IntervalUnit :: YearMonth ) => {
771+ cast_string_to_year_month_interval :: < i32 > ( & * * array, cast_options)
772+ }
773+ Interval ( IntervalUnit :: DayTime ) => {
774+ cast_string_to_day_time_interval :: < i32 > ( & * * array, cast_options)
775+ }
776+ Interval ( IntervalUnit :: MonthDayNano ) => {
777+ cast_string_to_month_day_nano_interval :: < i32 > ( & * * array, cast_options)
778+ }
767779 _ => Err ( ArrowError :: CastError ( format ! (
768780 "Casting from {:?} to {:?} not supported" ,
769781 from_type, to_type,
@@ -898,6 +910,15 @@ pub fn cast_with_options(
898910 Timestamp ( TimeUnit :: Nanosecond , None ) => {
899911 cast_string_to_timestamp_ns :: < i64 > ( & * * array, cast_options)
900912 }
913+ Interval ( IntervalUnit :: YearMonth ) => {
914+ cast_string_to_year_month_interval :: < i64 > ( & * * array, cast_options)
915+ }
916+ Interval ( IntervalUnit :: DayTime ) => {
917+ cast_string_to_day_time_interval :: < i64 > ( & * * array, cast_options)
918+ }
919+ Interval ( IntervalUnit :: MonthDayNano ) => {
920+ cast_string_to_month_day_nano_interval :: < i64 > ( & * * array, cast_options)
921+ }
901922 _ => Err ( ArrowError :: CastError ( format ! (
902923 "Casting from {:?} to {:?} not supported" ,
903924 from_type, to_type,
@@ -1757,6 +1778,105 @@ fn cast_string_to_timestamp_ns<Offset: StringOffsetSizeTrait>(
17571778 Ok ( Arc :: new ( array) as ArrayRef )
17581779}
17591780
1781+ fn cast_string_to_year_month_interval < Offset : StringOffsetSizeTrait > (
1782+ array : & dyn Array ,
1783+ cast_options : & CastOptions ,
1784+ ) -> Result < ArrayRef > {
1785+ let string_array = array
1786+ . as_any ( )
1787+ . downcast_ref :: < GenericStringArray < Offset > > ( )
1788+ . unwrap ( ) ;
1789+ let interval_array = if cast_options. safe {
1790+ let iter = string_array
1791+ . iter ( )
1792+ . map ( |v| v. and_then ( |v| parse_interval_year_month ( v) . ok ( ) ) ) ;
1793+
1794+ // Benefit:
1795+ // 20% performance improvement
1796+ // Soundness:
1797+ // The iterator is trustedLen because it comes from an `StringArray`.
1798+ unsafe { IntervalYearMonthArray :: from_trusted_len_iter ( iter) }
1799+ } else {
1800+ let vec = string_array
1801+ . iter ( )
1802+ . map ( |v| v. map ( parse_interval_year_month) . transpose ( ) )
1803+ . collect :: < Result < Vec < _ > > > ( ) ?;
1804+
1805+ // Benefit:
1806+ // 20% performance improvement
1807+ // Soundness:
1808+ // The iterator is trustedLen because it comes from an `StringArray`.
1809+ unsafe { IntervalYearMonthArray :: from_trusted_len_iter ( vec) }
1810+ } ;
1811+ Ok ( Arc :: new ( interval_array) as ArrayRef )
1812+ }
1813+
1814+ fn cast_string_to_day_time_interval < Offset : StringOffsetSizeTrait > (
1815+ array : & dyn Array ,
1816+ cast_options : & CastOptions ,
1817+ ) -> Result < ArrayRef > {
1818+ let string_array = array
1819+ . as_any ( )
1820+ . downcast_ref :: < GenericStringArray < Offset > > ( )
1821+ . unwrap ( ) ;
1822+ let interval_array = if cast_options. safe {
1823+ let iter = string_array
1824+ . iter ( )
1825+ . map ( |v| v. and_then ( |v| parse_interval_day_time ( v) . ok ( ) ) ) ;
1826+
1827+ // Benefit:
1828+ // 20% performance improvement
1829+ // Soundness:
1830+ // The iterator is trustedLen because it comes from an `StringArray`.
1831+ unsafe { IntervalDayTimeArray :: from_trusted_len_iter ( iter) }
1832+ } else {
1833+ let vec = string_array
1834+ . iter ( )
1835+ . map ( |v| v. map ( parse_interval_day_time) . transpose ( ) )
1836+ . collect :: < Result < Vec < _ > > > ( ) ?;
1837+
1838+ // Benefit:
1839+ // 20% performance improvement
1840+ // Soundness:
1841+ // The iterator is trustedLen because it comes from an `StringArray`.
1842+ unsafe { IntervalDayTimeArray :: from_trusted_len_iter ( vec) }
1843+ } ;
1844+ Ok ( Arc :: new ( interval_array) as ArrayRef )
1845+ }
1846+
1847+ fn cast_string_to_month_day_nano_interval < Offset : StringOffsetSizeTrait > (
1848+ array : & dyn Array ,
1849+ cast_options : & CastOptions ,
1850+ ) -> Result < ArrayRef > {
1851+ let string_array = array
1852+ . as_any ( )
1853+ . downcast_ref :: < GenericStringArray < Offset > > ( )
1854+ . unwrap ( ) ;
1855+ let interval_array = if cast_options. safe {
1856+ let iter = string_array
1857+ . iter ( )
1858+ . map ( |v| v. and_then ( |v| parse_interval_month_day_nano ( v) . ok ( ) ) ) ;
1859+
1860+ // Benefit:
1861+ // 20% performance improvement
1862+ // Soundness:
1863+ // The iterator is trustedLen because it comes from an `StringArray`.
1864+ unsafe { IntervalMonthDayNanoArray :: from_trusted_len_iter ( iter) }
1865+ } else {
1866+ let vec = string_array
1867+ . iter ( )
1868+ . map ( |v| v. map ( parse_interval_month_day_nano) . transpose ( ) )
1869+ . collect :: < Result < Vec < _ > > > ( ) ?;
1870+
1871+ // Benefit:
1872+ // 20% performance improvement
1873+ // Soundness:
1874+ // The iterator is trustedLen because it comes from an `StringArray`.
1875+ unsafe { IntervalMonthDayNanoArray :: from_trusted_len_iter ( vec) }
1876+ } ;
1877+ Ok ( Arc :: new ( interval_array) as ArrayRef )
1878+ }
1879+
17601880/// Casts Utf8 to Boolean
17611881fn cast_utf8_to_boolean ( from : & ArrayRef , cast_options : & CastOptions ) -> Result < ArrayRef > {
17621882 let array = as_string_array ( from) ;
@@ -3005,6 +3125,157 @@ mod tests {
30053125 }
30063126 }
30073127
3128+ macro_rules! test_safe_string_to_interval {
3129+ ( $data_vec: expr, $interval_unit: expr, $array_ty: ty, $expect_vec: expr) => {
3130+ let source_string_array =
3131+ Arc :: new( StringArray :: from( $data_vec. clone( ) ) ) as ArrayRef ;
3132+
3133+ let options = CastOptions { safe: true } ;
3134+
3135+ let target_interval_array = cast_with_options(
3136+ & source_string_array. clone( ) ,
3137+ & DataType :: Interval ( $interval_unit) ,
3138+ & options,
3139+ )
3140+ . unwrap( ) ;
3141+
3142+ for row in 0 ..target_interval_array. len( ) {
3143+ let interval_str = array_value_to_string( & target_interval_array, row) ;
3144+ assert_eq!(
3145+ $expect_vec[ row] . unwrap_or( "" ) ,
3146+ interval_str. unwrap_or( "" . to_string( ) )
3147+ ) ;
3148+ }
3149+ } ;
3150+ }
3151+
3152+ #[ test]
3153+ fn test_cast_string_to_interval_year_month ( ) {
3154+ test_safe_string_to_interval ! (
3155+ vec![
3156+ Some ( "1 year 1 month" ) ,
3157+ Some ( "1.5 years 13 month" ) ,
3158+ Some ( "30 days" ) ,
3159+ Some ( "31 days" ) ,
3160+ Some ( "2 months 31 days" ) ,
3161+ Some ( "2 months 31 days 1 second" ) ,
3162+ Some ( "foobar" ) ,
3163+ ] ,
3164+ IntervalUnit :: YearMonth ,
3165+ IntervalYearMonthArray ,
3166+ vec![
3167+ Some ( "1 years 1 mons 0 days 0 hours 0 mins 0.000 secs" ) ,
3168+ Some ( "2 years 7 mons 0 days 0 hours 0 mins 0.000 secs" ) ,
3169+ None ,
3170+ None ,
3171+ None ,
3172+ None ,
3173+ None ,
3174+ ]
3175+ ) ;
3176+ }
3177+
3178+ #[ test]
3179+ fn test_cast_string_to_interval_day_time ( ) {
3180+ test_safe_string_to_interval ! (
3181+ vec![
3182+ Some ( "1 year 1 month" ) ,
3183+ Some ( "1.5 years 13 month" ) ,
3184+ Some ( "30 days" ) ,
3185+ Some ( "1 day 2 second 3 milliseconds" ) ,
3186+ Some ( "1 day 2 second 3.5 milliseconds" ) ,
3187+ Some ( "foobar" ) ,
3188+ ] ,
3189+ IntervalUnit :: DayTime ,
3190+ IntervalDayTimeArray ,
3191+ vec![
3192+ Some ( "0 years 0 mons 390 days 0 hours 0 mins 0.000 secs" ) ,
3193+ Some ( "0 years 0 mons 930 days 0 hours 0 mins 0.000 secs" ) ,
3194+ Some ( "0 years 0 mons 30 days 0 hours 0 mins 0.000 secs" ) ,
3195+ Some ( "0 years 0 mons 1 days 0 hours 0 mins 2.003 secs" ) ,
3196+ None ,
3197+ None ,
3198+ ]
3199+ ) ;
3200+ }
3201+
3202+ #[ test]
3203+ fn test_cast_string_to_interval_month_day_nano ( ) {
3204+ test_safe_string_to_interval ! (
3205+ vec![
3206+ Some ( "1 year 1 month 1 day" ) ,
3207+ None ,
3208+ Some ( "1.5 years 13 month 35 days 1.4 milliseconds" ) ,
3209+ Some ( "3 days" ) ,
3210+ Some ( "8 seconds" ) ,
3211+ None ,
3212+ Some ( "1 day 29800 milliseconds" ) ,
3213+ Some ( "3 months 1 second" ) ,
3214+ Some ( "6 minutes 120 second" ) ,
3215+ Some ( "2 years 39 months 9 days 19 hours 1 minute 83 seconds 399222 milliseconds" ) ,
3216+ Some ( "foobar" ) ,
3217+ ] ,
3218+ IntervalUnit :: MonthDayNano ,
3219+ IntervalMonthDayNanoArray ,
3220+ vec![
3221+ Some ( "0 years 13 mons 1 days 0 hours 0 mins 0.000000000 secs" ) ,
3222+ None ,
3223+ Some ( "0 years 31 mons 35 days 0 hours 0 mins 0.001400000 secs" ) ,
3224+ Some ( "0 years 0 mons 3 days 0 hours 0 mins 0.000000000 secs" ) ,
3225+ Some ( "0 years 0 mons 0 days 0 hours 0 mins 8.000000000 secs" ) ,
3226+ None ,
3227+ Some ( "0 years 0 mons 1 days 0 hours 0 mins 29.800000000 secs" ) ,
3228+ Some ( "0 years 3 mons 0 days 0 hours 0 mins 1.000000000 secs" ) ,
3229+ Some ( "0 years 0 mons 0 days 0 hours 8 mins 0.000000000 secs" ) ,
3230+ Some ( "0 years 63 mons 9 days 19 hours 9 mins 2.222000000 secs" ) ,
3231+ None ,
3232+ ]
3233+ ) ;
3234+ }
3235+
3236+ macro_rules! test_unsafe_string_to_interval_err {
3237+ ( $data_vec: expr, $interval_unit: expr, $error_msg: expr) => {
3238+ let string_array = Arc :: new( StringArray :: from( $data_vec. clone( ) ) ) as ArrayRef ;
3239+ let options = CastOptions { safe: false } ;
3240+ let arrow_err = cast_with_options(
3241+ & string_array. clone( ) ,
3242+ & DataType :: Interval ( $interval_unit) ,
3243+ & options,
3244+ )
3245+ . unwrap_err( ) ;
3246+ assert_eq!( $error_msg, arrow_err. to_string( ) ) ;
3247+ } ;
3248+ }
3249+
3250+ #[ test]
3251+ fn test_cast_string_to_interval_err ( ) {
3252+ test_unsafe_string_to_interval_err ! (
3253+ vec![ Some ( "foobar" ) ] ,
3254+ IntervalUnit :: YearMonth ,
3255+ r#"Not yet implemented: Unsupported Interval Expression with value "foobar""#
3256+ ) ;
3257+ test_unsafe_string_to_interval_err ! (
3258+ vec![ Some ( "foobar" ) ] ,
3259+ IntervalUnit :: DayTime ,
3260+ r#"Not yet implemented: Unsupported Interval Expression with value "foobar""#
3261+ ) ;
3262+ test_unsafe_string_to_interval_err ! (
3263+ vec![ Some ( "foobar" ) ] ,
3264+ IntervalUnit :: MonthDayNano ,
3265+ r#"Not yet implemented: Unsupported Interval Expression with value "foobar""#
3266+ ) ;
3267+ test_unsafe_string_to_interval_err ! (
3268+ vec![ Some ( "2 months 31 days 1 second" ) ] ,
3269+ IntervalUnit :: YearMonth ,
3270+ r#"Cast error: Cannot cast 2 months 31 days 1 second to IntervalYearMonth. Only year and month fields are allowed."#
3271+ ) ;
3272+ test_unsafe_string_to_interval_err ! (
3273+ vec![ Some ( "1 day 1.5 milliseconds" ) ] ,
3274+ IntervalUnit :: DayTime ,
3275+ r#"Cast error: Cannot cast 1 day 1.5 milliseconds to IntervalDayTime because the nanos part isn't multiple of milliseconds"#
3276+ ) ;
3277+ }
3278+
30083279 #[ test]
30093280 fn test_cast_date32_to_int32 ( ) {
30103281 let a = Date32Array :: from ( vec ! [ 10000 , 17890 ] ) ;
0 commit comments