@@ -35,8 +35,7 @@ use datafusion_common::cast::{
3535} ;
3636use datafusion_common:: utils:: { array_into_list_array, list_ndims} ;
3737use datafusion_common:: {
38- exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err,
39- DataFusionError , Result ,
38+ exec_err, internal_err, not_impl_err, plan_err, DataFusionError , Result ,
4039} ;
4140
4241use itertools:: Itertools ;
@@ -1320,84 +1319,76 @@ fn general_replace(
13201319) -> Result < ArrayRef > {
13211320 // Build up the offsets for the final output array
13221321 let mut offsets: Vec < i32 > = vec ! [ 0 ] ;
1323- let data_type = list_array. value_type ( ) ;
1324- let mut new_values = vec ! [ ] ;
1322+ let values = list_array. values ( ) ;
1323+ let original_data = values. to_data ( ) ;
1324+ let to_data = to_array. to_data ( ) ;
1325+ let capacity = Capacities :: Array ( original_data. len ( ) ) ;
13251326
1326- // n is the number of elements to replace in this row
1327- for ( row_index, ( list_array_row, n) ) in
1328- list_array. iter ( ) . zip ( arr_n. iter ( ) ) . enumerate ( )
1329- {
1330- let last_offset: i32 = offsets
1331- . last ( )
1332- . copied ( )
1333- . ok_or_else ( || internal_datafusion_err ! ( "offsets should not be empty" ) ) ?;
1327+ // First array is the original array, second array is the element to replace with.
1328+ let mut mutable = MutableArrayData :: with_capacities (
1329+ vec ! [ & original_data, & to_data] ,
1330+ false ,
1331+ capacity,
1332+ ) ;
13341333
1335- match list_array_row {
1336- Some ( list_array_row) => {
1337- // Compute all positions in list_row_array (that is itself an
1338- // array) that are equal to `from_array_row`
1339- let eq_array = compare_element_to_list (
1340- & list_array_row,
1341- & from_array,
1342- row_index,
1343- true ,
1344- ) ?;
1334+ let mut valid = BooleanBufferBuilder :: new ( list_array. len ( ) ) ;
13451335
1346- // Use MutableArrayData to build the replaced array
1347- let original_data = list_array_row. to_data ( ) ;
1348- let to_data = to_array. to_data ( ) ;
1349- let capacity = Capacities :: Array ( original_data. len ( ) + to_data. len ( ) ) ;
1336+ for ( row_index, offset_window) in list_array. offsets ( ) . windows ( 2 ) . enumerate ( ) {
1337+ if list_array. is_null ( row_index) {
1338+ offsets. push ( offsets[ row_index] ) ;
1339+ valid. append ( false ) ;
1340+ continue ;
1341+ }
13501342
1351- // First array is the original array, second array is the element to replace with.
1352- let mut mutable = MutableArrayData :: with_capacities (
1353- vec ! [ & original_data, & to_data] ,
1354- false ,
1355- capacity,
1356- ) ;
1357- let original_idx = 0 ;
1358- let replace_idx = 1 ;
1359-
1360- let mut counter = 0 ;
1361- for ( i, to_replace) in eq_array. iter ( ) . enumerate ( ) {
1362- if let Some ( true ) = to_replace {
1363- mutable. extend ( replace_idx, row_index, row_index + 1 ) ;
1364- counter += 1 ;
1365- if counter == * n {
1366- // copy original data for any matches past n
1367- mutable. extend ( original_idx, i + 1 , eq_array. len ( ) ) ;
1368- break ;
1369- }
1370- } else {
1371- // copy original data for false / null matches
1372- mutable. extend ( original_idx, i, i + 1 ) ;
1373- }
1374- }
1343+ let start = offset_window[ 0 ] as usize ;
1344+ let end = offset_window[ 1 ] as usize ;
13751345
1376- let data = mutable. freeze ( ) ;
1377- let replaced_array = arrow_array:: make_array ( data) ;
1346+ let list_array_row = list_array. value ( row_index) ;
13781347
1379- offsets. push ( last_offset + replaced_array. len ( ) as i32 ) ;
1380- new_values. push ( replaced_array) ;
1381- }
1382- None => {
1383- // Null element results in a null row (no new offsets)
1384- offsets. push ( last_offset) ;
1348+ // Compute all positions in list_row_array (that is itself an
1349+ // array) that are equal to `from_array_row`
1350+ let eq_array =
1351+ compare_element_to_list ( & list_array_row, & from_array, row_index, true ) ?;
1352+
1353+ let original_idx = 0 ;
1354+ let replace_idx = 1 ;
1355+ let n = arr_n[ row_index] ;
1356+ let mut counter = 0 ;
1357+
1358+ // All elements are false, no need to replace, just copy original data
1359+ if eq_array. false_count ( ) == eq_array. len ( ) {
1360+ mutable. extend ( original_idx, start, end) ;
1361+ offsets. push ( offsets[ row_index] + ( end - start) as i32 ) ;
1362+ valid. append ( true ) ;
1363+ continue ;
1364+ }
1365+
1366+ for ( i, to_replace) in eq_array. iter ( ) . enumerate ( ) {
1367+ if let Some ( true ) = to_replace {
1368+ mutable. extend ( replace_idx, row_index, row_index + 1 ) ;
1369+ counter += 1 ;
1370+ if counter == n {
1371+ // copy original data for any matches past n
1372+ mutable. extend ( original_idx, start + i + 1 , end) ;
1373+ break ;
1374+ }
1375+ } else {
1376+ // copy original data for false / null matches
1377+ mutable. extend ( original_idx, start + i, start + i + 1 ) ;
13851378 }
13861379 }
1380+
1381+ offsets. push ( offsets[ row_index] + ( end - start) as i32 ) ;
1382+ valid. append ( true ) ;
13871383 }
13881384
1389- let values = if new_values. is_empty ( ) {
1390- new_empty_array ( & data_type)
1391- } else {
1392- let new_values: Vec < _ > = new_values. iter ( ) . map ( |a| a. as_ref ( ) ) . collect ( ) ;
1393- arrow:: compute:: concat ( & new_values) ?
1394- } ;
1385+ let data = mutable. freeze ( ) ;
13951386
13961387 Ok ( Arc :: new ( ListArray :: try_new (
1397- Arc :: new ( Field :: new ( "item" , data_type , true ) ) ,
1388+ Arc :: new ( Field :: new ( "item" , list_array . value_type ( ) , true ) ) ,
13981389 OffsetBuffer :: new ( offsets. into ( ) ) ,
1399- values ,
1400- list_array . nulls ( ) . cloned ( ) ,
1390+ arrow_array :: make_array ( data ) ,
1391+ Some ( NullBuffer :: new ( valid . finish ( ) ) ) ,
14011392 ) ?) )
14021393}
14031394
0 commit comments