@@ -30,7 +30,8 @@ use arrow_buffer::NullBuffer;
3030
3131use arrow_schema:: FieldRef ;
3232use datafusion_common:: cast:: {
33- as_generic_string_array, as_int64_array, as_list_array, as_string_array,
33+ as_generic_string_array, as_int64_array, as_large_list_array, as_list_array,
34+ as_string_array,
3435} ;
3536use datafusion_common:: utils:: array_into_list_array;
3637use datafusion_common:: {
@@ -1991,38 +1992,27 @@ pub fn array_intersect(args: &[ArrayRef]) -> Result<ArrayRef> {
19911992 }
19921993}
19931994
1994- /// array_distinct SQL function
1995- /// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
1996- pub fn array_distinct ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1997- assert_eq ! ( args. len( ) , 1 ) ;
1998-
1999- // handle null
2000- if args[ 0 ] . data_type ( ) == & DataType :: Null {
2001- return Ok ( args[ 0 ] . clone ( ) ) ;
2002- }
2003-
2004- let array = as_list_array ( & args[ 0 ] ) ?;
1995+ pub fn general_array_distinct < OffsetSize : OffsetSizeTrait > (
1996+ array : & GenericListArray < OffsetSize > ,
1997+ field : & FieldRef ,
1998+ ) -> Result < ArrayRef > {
20051999 let dt = array. value_type ( ) ;
2006-
2007- let mut offsets = vec ! [ 0 ] ;
2000+ let mut offsets = vec ! [ OffsetSize :: usize_as( 0 ) ] ;
20082001 let mut new_arrays = vec ! [ ] ;
2009-
20102002 let converter = RowConverter :: new ( vec ! [ SortField :: new( dt. clone( ) ) ] ) ?;
20112003 // distinct for each list in ListArray
20122004 for arr in array. iter ( ) . flatten ( ) {
20132005 let values = converter. convert_columns ( & [ arr] ) ?;
2014-
20152006 let mut rows = Vec :: with_capacity ( values. num_rows ( ) ) ;
20162007 // sort elements in list and remove duplicates
20172008 for val in values. iter ( ) . sorted ( ) . dedup ( ) {
20182009 rows. push ( val) ;
20192010 }
2020-
2021- let last_offset: i32 = match offsets. last ( ) . copied ( ) {
2011+ let last_offset: OffsetSize = match offsets. last ( ) . copied ( ) {
20222012 Some ( offset) => offset,
20232013 None => return internal_err ! ( "offsets should not be empty" ) ,
20242014 } ;
2025- offsets. push ( last_offset + rows. len ( ) as i32 ) ;
2015+ offsets. push ( last_offset + OffsetSize :: usize_as ( rows. len ( ) ) ) ;
20262016 let arrays = converter. convert_rows ( rows) ?;
20272017 let array = match arrays. get ( 0 ) {
20282018 Some ( array) => array. clone ( ) ,
@@ -2032,13 +2022,39 @@ pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
20322022 } ;
20332023 new_arrays. push ( array) ;
20342024 }
2035-
2036- let field = Arc :: new ( Field :: new ( "item" , dt, true ) ) ;
20372025 let offsets = OffsetBuffer :: new ( offsets. into ( ) ) ;
20382026 let new_arrays_ref = new_arrays. iter ( ) . map ( |v| v. as_ref ( ) ) . collect :: < Vec < _ > > ( ) ;
20392027 let values = compute:: concat ( & new_arrays_ref) ?;
2040- let arr = Arc :: new ( ListArray :: try_new ( field, offsets, values, None ) ?) ;
2041- Ok ( arr)
2028+ Ok ( Arc :: new ( GenericListArray :: < OffsetSize > :: try_new (
2029+ field. clone ( ) ,
2030+ offsets,
2031+ values,
2032+ None ,
2033+ ) ?) )
2034+ }
2035+
2036+ /// array_distinct SQL function
2037+ /// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
2038+ pub fn array_distinct ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
2039+ assert_eq ! ( args. len( ) , 1 ) ;
2040+
2041+ // handle null
2042+ if args[ 0 ] . data_type ( ) == & DataType :: Null {
2043+ return Ok ( args[ 0 ] . clone ( ) ) ;
2044+ }
2045+
2046+ // handle for list & largelist
2047+ match args[ 0 ] . data_type ( ) {
2048+ DataType :: List ( field) => {
2049+ let array = as_list_array ( & args[ 0 ] ) ?;
2050+ general_array_distinct ( array, field)
2051+ }
2052+ DataType :: LargeList ( field) => {
2053+ let array = as_large_list_array ( & args[ 0 ] ) ?;
2054+ general_array_distinct ( array, field)
2055+ }
2056+ _ => internal_err ! ( "array_distinct only support list array" ) ,
2057+ }
20422058}
20432059
20442060#[ cfg( test) ]
0 commit comments