Skip to content

Commit ebdb5c2

Browse files
committed
Wip wop
1 parent f434f3f commit ebdb5c2

File tree

2 files changed

+41
-41
lines changed

2 files changed

+41
-41
lines changed

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,6 @@ scalar_expr!(
706706
);
707707
nary_scalar_expr!(ArrayUnion, array_union, "returns an array of the elements in the union of array1 and array2 without duplicates.");
708708

709-
710709
scalar_expr!(
711710
Cardinality,
712711
cardinality,

datafusion/physical-expr/src/array_expressions.rs

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,19 @@
1818
//! Array expressions
1919
2020
use arrow::array::*;
21-
use arrow_array::types::Int64Type;
2221
use arrow::buffer::OffsetBuffer;
2322
use arrow::compute;
2423
use arrow::datatypes::{DataType, Field, UInt64Type};
25-
use arrow::row::{RowConverter, SortField, Row};
24+
use arrow::row::{RowConverter, SortField};
2625
use arrow_buffer::NullBuffer;
2726
use core::any::type_name;
2827
use datafusion_common::cast::{as_generic_string_array, as_int64_array, as_list_array};
2928
use datafusion_common::{exec_err, internal_err, not_impl_err, plan_err, ScalarValue};
3029
use datafusion_common::{DataFusionError, Result};
3130
use datafusion_expr::ColumnarValue;
3231
use itertools::Itertools;
33-
use std::sync::Arc;
3432
use std::collections::HashSet;
33+
use std::sync::Arc;
3534

3635
macro_rules! downcast_arg {
3736
($ARG:expr, $ARRAY_TYPE:ident) => {{
@@ -1488,7 +1487,7 @@ macro_rules! to_string {
14881487
// let arr = list_arr.value(i);
14891488
// let i64arr = as_primitive_array::<Int64Type>(&arr);
14901489
// for v in i64arr.iter() {
1491-
1490+
14921491
// // v is Option<i64>
14931492
// }
14941493
// }
@@ -1504,15 +1503,15 @@ macro_rules! to_string {
15041503
// converted.iter().filter(|row| dedup.insert(*row)).for_each(|row| distinct_rows.push(row));
15051504
// let dedup = row_converter.convert_rows(&distinct_rows)?;
15061505
// let res = make_array(dedup.as_slice())?;
1507-
1506+
15081507
// Ok(res)
15091508
// }
15101509

1511-
fn union_generic_lists<OffsetSize: OffsetSizeTrait>(l: &GenericListArray<OffsetSize>, r: &GenericListArray<OffsetSize>) -> Result<GenericListArray<OffsetSize>, DataFusionError>{
1512-
let converter = RowConverter::new(vec![
1513-
SortField::new(
1514-
l.data_type().clone()
1515-
)])?;
1510+
fn union_generic_lists<OffsetSize: OffsetSizeTrait>(
1511+
l: &GenericListArray<OffsetSize>,
1512+
r: &GenericListArray<OffsetSize>,
1513+
) -> Result<GenericListArray<OffsetSize>, DataFusionError> {
1514+
let converter = RowConverter::new(vec![SortField::new(l.data_type().clone())])?;
15161515
let mut dedup = HashSet::new();
15171516
let nulls = NullBuffer::union(l.nulls(), r.nulls());
15181517
let field = Arc::new(Field::new("union", l.data_type().to_owned(), false));
@@ -1529,13 +1528,13 @@ fn union_generic_lists<OffsetSize: OffsetSizeTrait>(l: &GenericListArray<OffsetS
15291528
for (l_w, r_w) in l.offsets().windows(2).zip(r.offsets().windows(2)) {
15301529
let l_slice = l_w[0].as_usize()..l_w[1].as_usize();
15311530
let r_slice = r_w[0].as_usize()..r_w[1].as_usize();
1532-
for i in l_slice{
1531+
for i in l_slice {
15331532
dedup.insert(l_values.row(i));
15341533
}
1535-
for i in r_slice{
1534+
for i in r_slice {
15361535
dedup.insert(r_values.row(i));
15371536
}
1538-
1537+
15391538
rows.extend(dedup.iter());
15401539
offsets.push(OffsetSize::usize_as(rows.len()));
15411540
dedup.clear();
@@ -1544,45 +1543,48 @@ fn union_generic_lists<OffsetSize: OffsetSizeTrait>(l: &GenericListArray<OffsetS
15441543
let values = converter.convert_rows(rows)?;
15451544
let offsets = OffsetBuffer::new(offsets.into());
15461545
let result = values[0].clone();
1547-
Ok(GenericListArray::<OffsetSize>::new(field, offsets, result, nulls))
1546+
Ok(GenericListArray::<OffsetSize>::new(
1547+
field, offsets, result, nulls,
1548+
))
15481549
}
15491550

1550-
15511551
/// Array_union SQL function
15521552
pub fn array_union(args: &[ArrayRef]) -> Result<ArrayRef> {
15531553
if args.len() != 2 {
1554-
return exec_err!("array_union needs two arguments")
1554+
return exec_err!("array_union needs two arguments");
15551555
}
15561556
let array1 = &args[0];
1557-
let array2= &args[1];
1557+
let array2 = &args[1];
15581558
check_datatypes("array_union", &[&array1, &array2])?;
1559-
match (array1.data_type(), array2.data_type()){
1560-
(DataType::Null, _) => {
1561-
Ok(array2.clone())
1562-
},
1563-
(_, DataType::Null) => {
1564-
Ok(array1.clone())
1559+
match (array1.data_type(), array2.data_type()) {
1560+
(DataType::Null, _) => Ok(array2.clone()),
1561+
(_, DataType::Null) => Ok(array1.clone()),
1562+
(DataType::List(field), _) => {
1563+
if field.data_type().equals_datatype(&DataType::Int32) {
1564+
let list1 = array1.as_list::<i32>();
1565+
let list2 = array2.as_list::<i32>();
1566+
let result = union_generic_lists::<i32>(list1, list2)?;
1567+
Ok(result.values().clone())
1568+
} else if field.data_type().equals_datatype(&DataType::Int64) {
1569+
eprintln!("{:?}", array1);
1570+
let list1 = array1.as_list::<i64>();
1571+
let list2 = array2.as_list::<i64>();
1572+
let result = union_generic_lists::<i64>(list1, list2)?;
1573+
Ok(result.values().clone())
1574+
} else {
1575+
return internal_err!(
1576+
"array_union only support list with offsets of type int32 and int64"
1577+
);
1578+
}
15651579
}
1566-
(DataType::Int32, DataType::Int32) => {
1567-
let list1 = array1.as_list();
1568-
let list2 = array2.as_list();
1569-
let result = union_generic_lists::<i32>(list1, list2)?;
1570-
Ok(result.values().clone())
1571-
},
1572-
(DataType::Int64, DataType::Int64) => {
1573-
let list1 = array1.as_list();
1574-
let list2 = array2.as_list();
1575-
let result = union_generic_lists::<i64>(list1, list2)?;
1576-
Ok(result.values().clone())
1577-
},
15781580
_ => {
1579-
return internal_err!("array_union only support list with offsets of type int32 and int64");
1580-
},
1581-
1581+
return internal_err!(
1582+
"array_union only support list with offsets of type int32 and int64"
1583+
);
1584+
}
15821585
}
15831586
}
15841587

1585-
15861588
/// Array_to_string SQL function
15871589
pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
15881590
let arr = &args[0];
@@ -1694,7 +1696,6 @@ pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
16941696
Ok(Arc::new(StringArray::from(res)))
16951697
}
16961698

1697-
16981699
/// Cardinality SQL function
16991700
pub fn cardinality(args: &[ArrayRef]) -> Result<ArrayRef> {
17001701
let list_array = as_list_array(&args[0])?.clone();

0 commit comments

Comments
 (0)