@@ -3246,6 +3246,8 @@ impl ScalarValue {
32463246
32473247 /// Retrieve ScalarValue for each row in `array`
32483248 ///
3249+ /// Elements in `array` may be NULL, in which case the corresponding element in the returned vector is None.
3250+ ///
32493251 /// Example 1: Array (ScalarValue::Int32)
32503252 /// ```
32513253 /// use datafusion_common::ScalarValue;
@@ -3262,15 +3264,15 @@ impl ScalarValue {
32623264 /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
32633265 ///
32643266 /// let expected = vec![
3265- /// vec![
3266- /// ScalarValue::Int32(Some(1)),
3267- /// ScalarValue::Int32(Some(2)),
3268- /// ScalarValue::Int32(Some(3)),
3269- /// ] ,
3270- /// vec![
3271- /// ScalarValue::Int32(Some(4)),
3272- /// ScalarValue::Int32(Some(5)),
3273- /// ] ,
3267+ /// Some( vec![
3268+ /// ScalarValue::Int32(Some(1)),
3269+ /// ScalarValue::Int32(Some(2)),
3270+ /// ScalarValue::Int32(Some(3)),
3271+ /// ]) ,
3272+ /// Some( vec![
3273+ /// ScalarValue::Int32(Some(4)),
3274+ /// ScalarValue::Int32(Some(5)),
3275+ /// ]) ,
32743276 /// ];
32753277 ///
32763278 /// assert_eq!(scalar_vec, expected);
@@ -3303,28 +3305,62 @@ impl ScalarValue {
33033305 /// ]);
33043306 ///
33053307 /// let expected = vec![
3306- /// vec![
3308+ /// Some( vec![
33073309 /// ScalarValue::List(Arc::new(l1)),
33083310 /// ScalarValue::List(Arc::new(l2)),
3309- /// ],
3311+ /// ]),
3312+ /// ];
3313+ ///
3314+ /// assert_eq!(scalar_vec, expected);
3315+ /// ```
3316+ ///
3317+ /// Example 3: Nullable array
3318+ /// ```
3319+ /// use datafusion_common::ScalarValue;
3320+ /// use arrow::array::ListArray;
3321+ /// use arrow::datatypes::{DataType, Int32Type};
3322+ ///
3323+ /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3324+ /// Some(vec![Some(1), Some(2), Some(3)]),
3325+ /// None,
3326+ /// Some(vec![Some(4), Some(5)])
3327+ /// ]);
3328+ ///
3329+ /// // Convert the array into Scalar Values for each row
3330+ /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3331+ ///
3332+ /// let expected = vec![
3333+ /// Some(vec![
3334+ /// ScalarValue::Int32(Some(1)),
3335+ /// ScalarValue::Int32(Some(2)),
3336+ /// ScalarValue::Int32(Some(3)),
3337+ /// ]),
3338+ /// None,
3339+ /// Some(vec![
3340+ /// ScalarValue::Int32(Some(4)),
3341+ /// ScalarValue::Int32(Some(5)),
3342+ /// ]),
33103343 /// ];
33113344 ///
33123345 /// assert_eq!(scalar_vec, expected);
33133346 /// ```
3314- pub fn convert_array_to_scalar_vec ( array : & dyn Array ) -> Result < Vec < Vec < Self > > > {
3347+ pub fn convert_array_to_scalar_vec (
3348+ array : & dyn Array ,
3349+ ) -> Result < Vec < Option < Vec < Self > > > > {
33153350 fn generic_collect < OffsetSize : OffsetSizeTrait > (
33163351 array : & dyn Array ,
3317- ) -> Result < Vec < Vec < ScalarValue > > > {
3352+ ) -> Result < Vec < Option < Vec < ScalarValue > > > > {
33183353 array
33193354 . as_list :: < OffsetSize > ( )
33203355 . iter ( )
3321- . map ( |nested_array| match nested_array {
3322- Some ( nested_array) => ( 0 ..nested_array. len ( ) )
3323- . map ( |i| ScalarValue :: try_from_array ( & nested_array, i) )
3324- . collect :: < Result < Vec < _ > > > ( ) ,
3325- // TODO: what can we put for null?
3326- // https://github.com/apache/datafusion/issues/17749
3327- None => Ok ( vec ! [ ] ) ,
3356+ . map ( |nested_array| {
3357+ nested_array
3358+ . map ( |array| {
3359+ ( 0 ..array. len ( ) )
3360+ . map ( |i| ScalarValue :: try_from_array ( & array, i) )
3361+ . collect :: < Result < Vec < _ > > > ( )
3362+ } )
3363+ . transpose ( )
33283364 } )
33293365 . collect ( )
33303366 }
@@ -9021,7 +9057,7 @@ mod tests {
90219057
90229058 #[ test]
90239059 fn test_convert_array_to_scalar_vec ( ) {
9024- // Regular ListArray
9060+ // 1: Regular ListArray
90259061 let list = ListArray :: from_iter_primitive :: < Int64Type , _ , _ > ( vec ! [
90269062 Some ( vec![ Some ( 1 ) , Some ( 2 ) ] ) ,
90279063 None ,
@@ -9031,17 +9067,20 @@ mod tests {
90319067 assert_eq ! (
90329068 converted,
90339069 vec![
9034- vec![ ScalarValue :: Int64 ( Some ( 1 ) ) , ScalarValue :: Int64 ( Some ( 2 ) ) ] ,
9035- vec![ ] ,
9036- vec![
9070+ Some ( vec![
9071+ ScalarValue :: Int64 ( Some ( 1 ) ) ,
9072+ ScalarValue :: Int64 ( Some ( 2 ) )
9073+ ] ) ,
9074+ None ,
9075+ Some ( vec![
90379076 ScalarValue :: Int64 ( Some ( 3 ) ) ,
90389077 ScalarValue :: Int64 ( None ) ,
90399078 ScalarValue :: Int64 ( Some ( 4 ) )
9040- ] ,
9079+ ] ) ,
90419080 ]
90429081 ) ;
90439082
9044- // Regular LargeListArray
9083+ // 2: Regular LargeListArray
90459084 let large_list = LargeListArray :: from_iter_primitive :: < Int64Type , _ , _ > ( vec ! [
90469085 Some ( vec![ Some ( 1 ) , Some ( 2 ) ] ) ,
90479086 None ,
@@ -9051,17 +9090,20 @@ mod tests {
90519090 assert_eq ! (
90529091 converted,
90539092 vec![
9054- vec![ ScalarValue :: Int64 ( Some ( 1 ) ) , ScalarValue :: Int64 ( Some ( 2 ) ) ] ,
9055- vec![ ] ,
9056- vec![
9093+ Some ( vec![
9094+ ScalarValue :: Int64 ( Some ( 1 ) ) ,
9095+ ScalarValue :: Int64 ( Some ( 2 ) )
9096+ ] ) ,
9097+ None ,
9098+ Some ( vec![
90579099 ScalarValue :: Int64 ( Some ( 3 ) ) ,
90589100 ScalarValue :: Int64 ( None ) ,
90599101 ScalarValue :: Int64 ( Some ( 4 ) )
9060- ] ,
9102+ ] ) ,
90619103 ]
90629104 ) ;
90639105
9064- // Funky (null slot has non-zero list offsets)
9106+ // 3: Funky (null slot has non-zero list offsets)
90659107 // Offsets + Values looks like this: [[1, 2], [3, 4], [5]]
90669108 // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
90679109 let funky = ListArray :: new (
@@ -9074,9 +9116,63 @@ mod tests {
90749116 assert_eq ! (
90759117 converted,
90769118 vec![
9077- vec![ ScalarValue :: Int64 ( Some ( 1 ) ) , ScalarValue :: Int64 ( Some ( 2 ) ) ] ,
9078- vec![ ] ,
9079- vec![ ScalarValue :: Int64 ( Some ( 5 ) ) ] ,
9119+ Some ( vec![
9120+ ScalarValue :: Int64 ( Some ( 1 ) ) ,
9121+ ScalarValue :: Int64 ( Some ( 2 ) )
9122+ ] ) ,
9123+ None ,
9124+ Some ( vec![ ScalarValue :: Int64 ( Some ( 5 ) ) ] ) ,
9125+ ]
9126+ ) ;
9127+
9128+ // 4: Offsets + Values looks like this: [[1, 2], [], [5]]
9129+ // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
9130+ // The converted result is: [[1, 2], None, [5]]
9131+ let array4 = ListArray :: new (
9132+ Field :: new_list_field ( DataType :: Int64 , true ) . into ( ) ,
9133+ OffsetBuffer :: new ( vec ! [ 0 , 2 , 2 , 5 ] . into ( ) ) ,
9134+ Arc :: new ( Int64Array :: from ( vec ! [ 1 , 2 , 3 , 4 , 5 , 6 ] ) ) ,
9135+ Some ( NullBuffer :: from ( vec ! [ true , false , true ] ) ) ,
9136+ ) ;
9137+ let converted = ScalarValue :: convert_array_to_scalar_vec ( & array4) . unwrap ( ) ;
9138+ assert_eq ! (
9139+ converted,
9140+ vec![
9141+ Some ( vec![
9142+ ScalarValue :: Int64 ( Some ( 1 ) ) ,
9143+ ScalarValue :: Int64 ( Some ( 2 ) )
9144+ ] ) ,
9145+ None ,
9146+ Some ( vec![
9147+ ScalarValue :: Int64 ( Some ( 3 ) ) ,
9148+ ScalarValue :: Int64 ( Some ( 4 ) ) ,
9149+ ScalarValue :: Int64 ( Some ( 5 ) ) ,
9150+ ] ) ,
9151+ ]
9152+ ) ;
9153+
9154+ // 5: Offsets + Values looks like this: [[1, 2], [], [5]]
9155+ // Same as 4, but the middle array is not null, so after conversion it's empty.
9156+ let array5 = ListArray :: new (
9157+ Field :: new_list_field ( DataType :: Int64 , true ) . into ( ) ,
9158+ OffsetBuffer :: new ( vec ! [ 0 , 2 , 2 , 5 ] . into ( ) ) ,
9159+ Arc :: new ( Int64Array :: from ( vec ! [ 1 , 2 , 3 , 4 , 5 , 6 ] ) ) ,
9160+ Some ( NullBuffer :: from ( vec ! [ true , true , true ] ) ) ,
9161+ ) ;
9162+ let converted = ScalarValue :: convert_array_to_scalar_vec ( & array5) . unwrap ( ) ;
9163+ assert_eq ! (
9164+ converted,
9165+ vec![
9166+ Some ( vec![
9167+ ScalarValue :: Int64 ( Some ( 1 ) ) ,
9168+ ScalarValue :: Int64 ( Some ( 2 ) )
9169+ ] ) ,
9170+ Some ( vec![ ] ) ,
9171+ Some ( vec![
9172+ ScalarValue :: Int64 ( Some ( 3 ) ) ,
9173+ ScalarValue :: Int64 ( Some ( 4 ) ) ,
9174+ ScalarValue :: Int64 ( Some ( 5 ) ) ,
9175+ ] ) ,
90809176 ]
90819177 ) ;
90829178 }
0 commit comments