Skip to content

Commit 8251e56

Browse files
committed
Merge branch 'main' into distinct-on-impl
2 parents 9b41907 + 5634cce commit 8251e56

File tree

29 files changed

+366
-170
lines changed

29 files changed

+366
-170
lines changed

Cargo.toml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,41 @@ arrow-array = { version = "48.0.0", default-features = false, features = ["chron
5454
arrow-buffer = { version = "48.0.0", default-features = false }
5555
arrow-flight = { version = "48.0.0", features = ["flight-sql-experimental"] }
5656
arrow-schema = { version = "48.0.0", default-features = false }
57+
async-trait = "0.1.73"
58+
bigdecimal = "0.4.1"
59+
bytes = "1.4"
60+
ctor = "0.2.0"
61+
datafusion = { path = "datafusion/core" }
62+
datafusion-common = { path = "datafusion/common" }
63+
datafusion-expr = { path = "datafusion/expr" }
64+
datafusion-sql = { path = "datafusion/sql" }
65+
datafusion-optimizer = { path = "datafusion/optimizer" }
66+
datafusion-physical-expr = { path = "datafusion/physical-expr" }
67+
datafusion-physical-plan = { path = "datafusion/physical-plan" }
68+
datafusion-execution = { path = "datafusion/execution" }
69+
datafusion-proto = { path = "datafusion/proto" }
70+
datafusion-sqllogictest = { path = "datafusion/sqllogictest" }
71+
datafusion-substrait = { path = "datafusion/substrait" }
72+
dashmap = "5.4.0"
73+
doc-comment = "0.3"
74+
env_logger = "0.10"
75+
futures = "0.3"
76+
half = "2.2.1"
77+
indexmap = "2.0.0"
78+
itertools = "0.11"
79+
log = "^0.4"
80+
num_cpus = "1.13.0"
81+
object_store = "0.7.0"
82+
parking_lot = "0.12"
5783
parquet = { version = "48.0.0", features = ["arrow", "async", "object_store"] }
84+
rand = "0.8"
85+
rstest = "0.18.0"
86+
serde_json = "1"
5887
sqlparser = { version = "0.39.0", features = ["visitor"] }
88+
tempfile = "3"
89+
thiserror = "1.0.44"
5990
chrono = { version = "0.4.31", default-features = false }
91+
url = "2.2"
6092

6193
[profile.release]
6294
codegen-units = 1

benchmarks/Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ snmalloc = ["snmalloc-rs"]
3636
arrow = { workspace = true }
3737
datafusion = { path = "../datafusion/core", version = "32.0.0" }
3838
datafusion-common = { path = "../datafusion/common", version = "32.0.0" }
39-
env_logger = "0.10"
40-
futures = "0.3"
41-
log = "^0.4"
39+
env_logger = { workspace = true }
40+
futures = { workspace = true }
41+
log = { workspace = true }
4242
mimalloc = { version = "0.1", optional = true, default-features = false }
43-
num_cpus = "1.13.0"
43+
num_cpus = { workspace = true }
4444
parquet = { workspace = true }
4545
serde = { version = "1.0.136", features = ["derive"] }
46-
serde_json = "1.0.78"
46+
serde_json = { workspace = true }
4747
snmalloc-rs = { version = "0.3", optional = true }
4848
structopt = { version = "0.3", default-features = false }
4949
test-utils = { path = "../test-utils/", version = "0.1.0" }

datafusion-examples/Cargo.toml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,26 +33,26 @@ rust-version = { workspace = true }
3333
arrow = { workspace = true }
3434
arrow-flight = { workspace = true }
3535
arrow-schema = { workspace = true }
36-
async-trait = "0.1.41"
37-
bytes = "1.4"
38-
dashmap = "5.4"
36+
async-trait = { workspace = true }
37+
bytes = { workspace = true }
38+
dashmap = { workspace = true }
3939
datafusion = { path = "../datafusion/core", features = ["avro"] }
4040
datafusion-common = { path = "../datafusion/common" }
4141
datafusion-expr = { path = "../datafusion/expr" }
4242
datafusion-optimizer = { path = "../datafusion/optimizer" }
4343
datafusion-sql = { path = "../datafusion/sql" }
44-
env_logger = "0.10"
45-
futures = "0.3"
46-
log = "0.4"
44+
env_logger = { workspace = true }
45+
futures = { workspace = true }
46+
log = { workspace = true }
4747
mimalloc = { version = "0.1", default-features = false }
48-
num_cpus = "1.13.0"
48+
num_cpus = { workspace = true }
4949
object_store = { version = "0.7.0", features = ["aws", "http"] }
5050
prost = { version = "0.12", default-features = false }
5151
prost-derive = { version = "0.11", default-features = false }
5252
serde = { version = "1.0.136", features = ["derive"] }
53-
serde_json = "1.0.82"
54-
tempfile = "3"
53+
serde_json = { workspace = true }
54+
tempfile = { workspace = true }
5555
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
5656
tonic = "0.10"
57-
url = "2.2"
57+
url = { workspace = true }
5858
uuid = "1.2"

datafusion/common/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ arrow-buffer = { workspace = true }
4646
arrow-schema = { workspace = true }
4747
chrono = { workspace = true }
4848
half = { version = "2.1", default-features = false }
49-
num_cpus = "1.13.0"
49+
num_cpus = { workspace = true }
5050
object_store = { version = "0.7.0", default-features = false, optional = true }
5151
parquet = { workspace = true, optional = true }
5252
pyo3 = { version = "0.20.0", optional = true }

datafusion/common/src/scalar.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::cast::{
3030
};
3131
use crate::error::{DataFusionError, Result, _internal_err, _not_impl_err};
3232
use crate::hash_utils::create_hashes;
33-
use crate::utils::wrap_into_list_array;
33+
use crate::utils::array_into_list_array;
3434
use arrow::buffer::{NullBuffer, OffsetBuffer};
3535
use arrow::compute::kernels::numeric::*;
3636
use arrow::datatypes::{i256, FieldRef, Fields, SchemaBuilder};
@@ -1667,7 +1667,7 @@ impl ScalarValue {
16671667
} else {
16681668
Self::iter_to_array(values.iter().cloned()).unwrap()
16691669
};
1670-
Arc::new(wrap_into_list_array(values))
1670+
Arc::new(array_into_list_array(values))
16711671
}
16721672

16731673
/// Converts a scalar value into an array of `size` rows.
@@ -2058,7 +2058,7 @@ impl ScalarValue {
20582058
let list_array = as_list_array(array);
20592059
let nested_array = list_array.value(index);
20602060
// Produces a single element `ListArray` with the value at `index`.
2061-
let arr = Arc::new(wrap_into_list_array(nested_array));
2061+
let arr = Arc::new(array_into_list_array(nested_array));
20622062

20632063
ScalarValue::List(arr)
20642064
}
@@ -2067,7 +2067,7 @@ impl ScalarValue {
20672067
let list_array = as_fixed_size_list_array(array)?;
20682068
let nested_array = list_array.value(index);
20692069
// Produces a single element `ListArray` with the value at `index`.
2070-
let arr = Arc::new(wrap_into_list_array(nested_array));
2070+
let arr = Arc::new(array_into_list_array(nested_array));
20712071

20722072
ScalarValue::List(arr)
20732073
}
@@ -3052,7 +3052,7 @@ mod tests {
30523052

30533053
let array = ScalarValue::new_list(scalars.as_slice(), &DataType::Utf8);
30543054

3055-
let expected = wrap_into_list_array(Arc::new(StringArray::from(vec![
3055+
let expected = array_into_list_array(Arc::new(StringArray::from(vec![
30563056
"rust",
30573057
"arrow",
30583058
"data-fusion",
@@ -3091,9 +3091,9 @@ mod tests {
30913091
#[test]
30923092
fn iter_to_array_string_test() {
30933093
let arr1 =
3094-
wrap_into_list_array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"])));
3094+
array_into_list_array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"])));
30953095
let arr2 =
3096-
wrap_into_list_array(Arc::new(StringArray::from(vec!["rust", "world"])));
3096+
array_into_list_array(Arc::new(StringArray::from(vec!["rust", "world"])));
30973097

30983098
let scalars = vec![
30993099
ScalarValue::List(Arc::new(arr1)),
@@ -4335,13 +4335,13 @@ mod tests {
43354335
// Define list-of-structs scalars
43364336

43374337
let nl0_array = ScalarValue::iter_to_array(vec![s0.clone(), s1.clone()]).unwrap();
4338-
let nl0 = ScalarValue::List(Arc::new(wrap_into_list_array(nl0_array)));
4338+
let nl0 = ScalarValue::List(Arc::new(array_into_list_array(nl0_array)));
43394339

43404340
let nl1_array = ScalarValue::iter_to_array(vec![s2.clone()]).unwrap();
4341-
let nl1 = ScalarValue::List(Arc::new(wrap_into_list_array(nl1_array)));
4341+
let nl1 = ScalarValue::List(Arc::new(array_into_list_array(nl1_array)));
43424342

43434343
let nl2_array = ScalarValue::iter_to_array(vec![s1.clone()]).unwrap();
4344-
let nl2 = ScalarValue::List(Arc::new(wrap_into_list_array(nl2_array)));
4344+
let nl2 = ScalarValue::List(Arc::new(array_into_list_array(nl2_array)));
43454345

43464346
// iter_to_array for list-of-struct
43474347
let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();

datafusion/common/src/utils.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717

1818
//! This module provides the bisect function, which implements binary search.
1919
20+
use crate::error::_internal_err;
2021
use crate::{DataFusionError, Result, ScalarValue};
2122
use arrow::array::{ArrayRef, PrimitiveArray};
2223
use arrow::buffer::OffsetBuffer;
2324
use arrow::compute;
2425
use arrow::compute::{partition, SortColumn, SortOptions};
2526
use arrow::datatypes::{Field, SchemaRef, UInt32Type};
2627
use arrow::record_batch::RecordBatch;
27-
use arrow_array::ListArray;
28+
use arrow_array::{Array, ListArray};
2829
use sqlparser::ast::Ident;
2930
use sqlparser::dialect::GenericDialect;
3031
use sqlparser::parser::Parser;
@@ -338,7 +339,7 @@ pub fn longest_consecutive_prefix<T: Borrow<usize>>(
338339

339340
/// Wrap an array into a single element `ListArray`.
340341
/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
341-
pub fn wrap_into_list_array(arr: ArrayRef) -> ListArray {
342+
pub fn array_into_list_array(arr: ArrayRef) -> ListArray {
342343
let offsets = OffsetBuffer::from_lengths([arr.len()]);
343344
ListArray::new(
344345
Arc::new(Field::new("item", arr.data_type().to_owned(), true)),
@@ -348,6 +349,47 @@ pub fn wrap_into_list_array(arr: ArrayRef) -> ListArray {
348349
)
349350
}
350351

352+
/// Wrap arrays into a single element `ListArray`.
353+
///
354+
/// Example:
355+
/// ```
356+
/// use arrow::array::{Int32Array, ListArray, ArrayRef};
357+
/// use arrow::datatypes::{Int32Type, Field};
358+
/// use std::sync::Arc;
359+
///
360+
/// let arr1 = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
361+
/// let arr2 = Arc::new(Int32Array::from(vec![4, 5, 6])) as ArrayRef;
362+
///
363+
/// let list_arr = datafusion_common::utils::arrays_into_list_array([arr1, arr2]).unwrap();
364+
///
365+
/// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
366+
/// vec![
367+
/// Some(vec![Some(1), Some(2), Some(3)]),
368+
/// Some(vec![Some(4), Some(5), Some(6)]),
369+
/// ]
370+
/// );
371+
///
372+
/// assert_eq!(list_arr, expected);
373+
pub fn arrays_into_list_array(
374+
arr: impl IntoIterator<Item = ArrayRef>,
375+
) -> Result<ListArray> {
376+
let arr = arr.into_iter().collect::<Vec<_>>();
377+
if arr.is_empty() {
378+
return _internal_err!("Cannot wrap empty array into list array");
379+
}
380+
381+
let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
382+
// Assume data type is consistent
383+
let data_type = arr[0].data_type().to_owned();
384+
let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
385+
Ok(ListArray::new(
386+
Arc::new(Field::new("item", data_type, true)),
387+
OffsetBuffer::from_lengths(lens),
388+
arrow::compute::concat(values.as_slice())?,
389+
None,
390+
))
391+
}
392+
351393
/// An extension trait for smart pointers. Provides an interface to get a
352394
/// raw pointer to the data (with metadata stripped away).
353395
///

datafusion/core/Cargo.toml

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -57,61 +57,61 @@ arrow = { workspace = true }
5757
arrow-array = { workspace = true }
5858
arrow-schema = { workspace = true }
5959
async-compression = { version = "0.4.0", features = ["bzip2", "gzip", "xz", "zstd", "futures-io", "tokio"], optional = true }
60-
async-trait = "0.1.73"
61-
bytes = "1.4"
60+
async-trait = { workspace = true }
61+
bytes = { workspace = true }
6262
bzip2 = { version = "0.4.3", optional = true }
6363
chrono = { workspace = true }
64-
dashmap = "5.4.0"
64+
dashmap = { workspace = true }
6565
datafusion-common = { path = "../common", version = "32.0.0", features = ["object_store"], default-features = false }
66-
datafusion-execution = { path = "../execution", version = "32.0.0" }
67-
datafusion-expr = { path = "../expr", version = "32.0.0" }
66+
datafusion-execution = { workspace = true }
67+
datafusion-expr = { workspace = true }
6868
datafusion-optimizer = { path = "../optimizer", version = "32.0.0", default-features = false }
6969
datafusion-physical-expr = { path = "../physical-expr", version = "32.0.0", default-features = false }
70-
datafusion-physical-plan = { path = "../physical-plan", version = "32.0.0", default-features = false }
71-
datafusion-sql = { path = "../sql", version = "32.0.0" }
70+
datafusion-physical-plan = { workspace = true }
71+
datafusion-sql = { workspace = true }
7272
flate2 = { version = "1.0.24", optional = true }
73-
futures = "0.3"
73+
futures = { workspace = true }
7474
glob = "0.3.0"
7575
half = { version = "2.1", default-features = false }
7676
hashbrown = { version = "0.14", features = ["raw"] }
77-
indexmap = "2.0.0"
78-
itertools = "0.11"
79-
log = "^0.4"
77+
indexmap = { workspace = true }
78+
itertools = { workspace = true }
79+
log = { workspace = true }
8080
num-traits = { version = "0.2", optional = true }
81-
num_cpus = "1.13.0"
82-
object_store = "0.7.0"
83-
parking_lot = "0.12"
81+
num_cpus = { workspace = true }
82+
object_store = { workspace = true }
83+
parking_lot = { workspace = true }
8484
parquet = { workspace = true, optional = true }
8585
pin-project-lite = "^0.2.7"
86-
rand = "0.8"
86+
rand = { workspace = true }
8787
sqlparser = { workspace = true }
88-
tempfile = "3"
88+
tempfile = { workspace = true }
8989
tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
9090
tokio-util = { version = "0.7.4", features = ["io"] }
91-
url = "2.2"
91+
url = { workspace = true }
9292
uuid = { version = "1.0", features = ["v4"] }
9393
xz2 = { version = "0.1", optional = true }
9494
zstd = { version = "0.13", optional = true, default-features = false }
9595

9696
[dev-dependencies]
97-
async-trait = "0.1.53"
98-
bigdecimal = "0.4.1"
97+
async-trait = { workspace = true }
98+
bigdecimal = { workspace = true }
9999
criterion = { version = "0.5", features = ["async_tokio"] }
100100
csv = "1.1.6"
101-
ctor = "0.2.0"
102-
doc-comment = "0.3"
103-
env_logger = "0.10"
104-
half = "2.2.1"
101+
ctor = { workspace = true }
102+
doc-comment = { workspace = true }
103+
env_logger = { workspace = true }
104+
half = { workspace = true }
105105
postgres-protocol = "0.6.4"
106106
postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
107107
rand = { version = "0.8", features = ["small_rng"] }
108108
rand_distr = "0.4.3"
109109
regex = "1.5.4"
110-
rstest = "0.18.0"
110+
rstest = { workspace = true }
111111
rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
112-
serde_json = "1"
112+
serde_json = { workspace = true }
113113
test-utils = { path = "../../test-utils" }
114-
thiserror = "1.0.37"
114+
thiserror = { workspace = true }
115115
tokio-postgres = "0.7.7"
116116
[target.'cfg(not(target_os = "windows"))'.dev-dependencies]
117117
nix = { version = "0.27.1", features = ["fs"] }

datafusion/execution/Cargo.toml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ path = "src/lib.rs"
3535
[dependencies]
3636
arrow = { workspace = true }
3737
chrono = { version = "0.4", default-features = false }
38-
dashmap = "5.4.0"
39-
datafusion-common = { path = "../common", version = "32.0.0" }
40-
datafusion-expr = { path = "../expr", version = "32.0.0" }
41-
futures = "0.3"
38+
dashmap = { workspace = true }
39+
datafusion-common = { workspace = true }
40+
datafusion-expr = { workspace = true }
41+
futures = { workspace = true }
4242
hashbrown = { version = "0.14", features = ["raw"] }
43-
log = "^0.4"
44-
object_store = "0.7.0"
45-
parking_lot = "0.12"
46-
rand = "0.8"
47-
tempfile = "3"
48-
url = "2.2"
43+
log = { workspace = true }
44+
object_store = { workspace = true }
45+
parking_lot = { workspace = true }
46+
rand = { workspace = true }
47+
tempfile = { workspace = true }
48+
url = { workspace = true }

datafusion/expr/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ path = "src/lib.rs"
3838
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
3939
arrow = { workspace = true }
4040
arrow-array = { workspace = true }
41-
datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
41+
datafusion-common = { workspace = true }
4242
sqlparser = { workspace = true }
4343
strum = { version = "0.25.0", features = ["derive"] }
4444
strum_macros = "0.25.0"
4545

4646
[dev-dependencies]
47-
ctor = "0.2.0"
48-
env_logger = "0.10"
47+
ctor = { workspace = true }
48+
env_logger = { workspace = true }

0 commit comments

Comments
 (0)