From dadb1bcdc0696c801ce8c9510dec62a2153146d4 Mon Sep 17 00:00:00 2001 From: Eren Avsarogullari Date: Thu, 7 Mar 2024 23:38:19 -0800 Subject: [PATCH 1/3] Issue-9497 - Port StringToArray to function-arrays --- .../tests/dataframe/dataframe_functions.rs | 19 ++++ datafusion/expr/src/built_in_function.rs | 18 --- datafusion/expr/src/expr_fn.rs | 2 - datafusion/functions-array/src/kernels.rs | 103 +++++++++++++++++- datafusion/functions-array/src/lib.rs | 2 + datafusion/functions-array/src/udf.rs | 76 +++++++++++++ .../physical-expr/src/array_expressions.rs | 92 +--------------- datafusion/physical-expr/src/functions.rs | 15 --- datafusion/proto/proto/datafusion.proto | 2 +- datafusion/proto/src/generated/prost.rs | 4 +- .../proto/src/logical_plan/from_proto.rs | 12 +- datafusion/proto/src/logical_plan/to_proto.rs | 1 - .../tests/cases/roundtrip_logical_plan.rs | 1 + docs/source/user-guide/expressions.md | 73 +++++++------ .../source/user-guide/sql/scalar_functions.md | 3 +- 15 files changed, 240 insertions(+), 183 deletions(-) diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index cea701492910..c052c3b97e2f 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -870,3 +870,22 @@ async fn test_fn_array_to_string() -> Result<()> { Ok(()) } + +#[tokio::test] +async fn test_fn_string_to_array() -> Result<()> { + let expr = string_to_array(lit("abc##def##ghi"), lit("##"), lit("!")); + + let expected = [ + "+-------------------------------------------------------------+", + "| string_to_array(Utf8(\"abc##def##ghi\"),Utf8(\"##\"),Utf8(\"!\")) |", + "+-------------------------------------------------------------+", + "| [abc, def, ghi] |", + "| [abc, def, ghi] |", + "| [abc, def, ghi] |", + "| [abc, def, ghi] |", + "+-------------------------------------------------------------+", + ]; + assert_fn_batches!(expr, expected); + + Ok(()) +} diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 6351e877df00..9c2b6683e872 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -210,8 +210,6 @@ pub enum BuiltinScalarFunction { SHA512, /// split_part SplitPart, - /// string_to_array - StringToArray, /// starts_with StartsWith, /// strpos @@ -383,7 +381,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::SHA512 => Volatility::Immutable, BuiltinScalarFunction::Digest => Volatility::Immutable, BuiltinScalarFunction::SplitPart => Volatility::Immutable, - BuiltinScalarFunction::StringToArray => Volatility::Immutable, BuiltinScalarFunction::StartsWith => Volatility::Immutable, BuiltinScalarFunction::Strpos => Volatility::Immutable, BuiltinScalarFunction::Substr => Volatility::Immutable, @@ -556,11 +553,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::SplitPart => { utf8_to_str_type(&input_expr_types[0], "split_part") } - BuiltinScalarFunction::StringToArray => Ok(List(Arc::new(Field::new( - "item", - input_expr_types[0].clone(), - true, - )))), BuiltinScalarFunction::StartsWith => Ok(Boolean), BuiltinScalarFunction::EndsWith => Ok(Boolean), BuiltinScalarFunction::Strpos => { @@ -833,13 +825,6 @@ impl BuiltinScalarFunction { ], self.volatility(), ), - BuiltinScalarFunction::StringToArray => Signature::one_of( - vec![ - TypeSignature::Uniform(2, vec![Utf8, LargeUtf8]), - TypeSignature::Uniform(3, vec![Utf8, LargeUtf8]), - ], - self.volatility(), - ), BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos @@ -1087,9 +1072,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Rpad => &["rpad"], BuiltinScalarFunction::Rtrim => &["rtrim"], BuiltinScalarFunction::SplitPart => &["split_part"], - BuiltinScalarFunction::StringToArray => { - &["string_to_array", "string_to_list"] - } BuiltinScalarFunction::StartsWith => &["starts_with"], BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"], BuiltinScalarFunction::Substr => &["substr"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index d1ae06d68f13..5239c67b52bc 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -754,7 +754,6 @@ scalar_expr!(SHA256, sha256, string, "SHA-256 hash"); scalar_expr!(SHA384, sha384, string, "SHA-384 hash"); scalar_expr!(SHA512, sha512, string, "SHA-512 hash"); scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index."); -scalar_expr!(StringToArray, string_to_array, string delimiter null_string, "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`"); scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`"); scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`"); scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`"); @@ -1275,7 +1274,6 @@ mod test { test_scalar_expr!(SHA384, sha384, string); test_scalar_expr!(SHA512, sha512, string); test_scalar_expr!(SplitPart, split_part, expr, delimiter, index); - test_scalar_expr!(StringToArray, string_to_array, expr, delimiter, null_value); test_scalar_expr!(StartsWith, starts_with, string, characters); test_scalar_expr!(EndsWith, ends_with, string, characters); test_scalar_expr!(Strpos, strpos, string, substring); diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index bb5c4ef53e43..af24ee47c1a8 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -19,20 +19,19 @@ use arrow::array::{ Array, ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, - GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, - OffsetSizeTrait, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, + GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeListArray, + LargeStringArray, ListArray, ListBuilder, OffsetSizeTrait, StringArray, UInt16Array, + UInt32Array, UInt64Array, UInt8Array, StringBuilder }; -use arrow::array::{LargeListArray, ListArray}; use arrow::buffer::OffsetBuffer; use arrow::datatypes::Field; use arrow::datatypes::UInt64Type; use arrow::datatypes::{DataType, Date32Type, IntervalMonthDayNanoType}; use datafusion_common::cast::{ - as_date32_array, as_generic_list_array, as_int64_array, as_interval_mdn_array, + as_date32_array, as_generic_string_array, as_generic_list_array, as_int64_array, as_interval_mdn_array, as_large_list_array, as_list_array, as_null_array, as_string_array, }; -use datafusion_common::DataFusionError; -use datafusion_common::{exec_err, not_impl_datafusion_err, Result}; +use datafusion_common::{DataFusionError, exec_err, not_impl_datafusion_err, Result}; use std::any::type_name; use std::sync::Arc; @@ -261,6 +260,98 @@ pub(super) fn array_to_string(args: &[ArrayRef]) -> Result { Ok(Arc::new(string_arr)) } +/// Splits string at occurrences of delimiter and returns an array of parts +/// string_to_array('abc~@~def~@~ghi', '~@~') = '["abc", "def", "ghi"]' +pub fn string_to_array(args: &[ArrayRef]) -> Result { + if args.len() < 2 || args.len() > 3 { + return exec_err!("string_to_array expects two or three arguments"); + } + let string_array = as_generic_string_array::(&args[0])?; + let delimiter_array = as_generic_string_array::(&args[1])?; + + let mut list_builder = ListBuilder::new(StringBuilder::with_capacity( + string_array.len(), + string_array.get_buffer_memory_size(), + )); + + match args.len() { + 2 => { + string_array.iter().zip(delimiter_array.iter()).for_each( + |(string, delimiter)| { + match (string, delimiter) { + (Some(string), Some("")) => { + list_builder.values().append_value(string); + list_builder.append(true); + } + (Some(string), Some(delimiter)) => { + string.split(delimiter).for_each(|s| { + list_builder.values().append_value(s); + }); + list_builder.append(true); + } + (Some(string), None) => { + string.chars().map(|c| c.to_string()).for_each(|c| { + list_builder.values().append_value(c); + }); + list_builder.append(true); + } + _ => list_builder.append(false), // null value + } + }, + ); + } + + 3 => { + let null_value_array = as_generic_string_array::(&args[2])?; + string_array + .iter() + .zip(delimiter_array.iter()) + .zip(null_value_array.iter()) + .for_each(|((string, delimiter), null_value)| { + match (string, delimiter) { + (Some(string), Some("")) => { + if Some(string) == null_value { + list_builder.values().append_null(); + } else { + list_builder.values().append_value(string); + } + list_builder.append(true); + } + (Some(string), Some(delimiter)) => { + string.split(delimiter).for_each(|s| { + if Some(s) == null_value { + list_builder.values().append_null(); + } else { + list_builder.values().append_value(s); + } + }); + list_builder.append(true); + } + (Some(string), None) => { + string.chars().map(|c| c.to_string()).for_each(|c| { + if Some(c.as_str()) == null_value { + list_builder.values().append_null(); + } else { + list_builder.values().append_value(c); + } + }); + list_builder.append(true); + } + _ => list_builder.append(false), // null value + } + }); + } + _ => { + return exec_err!( + "Expect string_to_array function to take two or three parameters" + ) + } + } + + let list_array = list_builder.finish(); + Ok(Arc::new(list_array) as ArrayRef) +} + /// Generates an array of integers from start to stop with a given step. /// /// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values. diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index cf1e35d60841..cdcaa9e16288 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -55,6 +55,7 @@ pub mod expr_fn { pub use super::udf::array_length; pub use super::udf::array_ndims; pub use super::udf::array_to_string; + pub use super::udf::string_to_array; pub use super::udf::cardinality; pub use super::udf::flatten; pub use super::udf::gen_series; @@ -65,6 +66,7 @@ pub mod expr_fn { pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { let functions: Vec> = vec![ udf::array_to_string_udf(), + udf::string_to_array_udf(), udf::range_udf(), udf::gen_series_udf(), udf::array_dims_udf(), diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 854535c237b9..fbff3b9d0f84 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -29,6 +29,7 @@ use datafusion_expr::TypeSignature; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; use std::sync::Arc; +use arrow::array::{NullArray, StringArray}; // Create static instances of ScalarUDFs for each function make_udf_function!(ArrayToString, @@ -89,6 +90,81 @@ impl ScalarUDFImpl for ArrayToString { } } +make_udf_function!(StringToArray, + string_to_array, + string delimiter null_string, // arg name + "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`", // doc + string_to_array_udf // internal function name +); +#[derive(Debug)] +pub(super) struct StringToArray { + signature: Signature, + aliases: Vec, +} + +impl StringToArray { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + aliases: vec![ + String::from("string_to_array"), + String::from("string_to_list"), + ], + } + } +} + +impl ScalarUDFImpl for StringToArray { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "string_to_array" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(match arg_types[0] { + Utf8 | LargeUtf8 => List(Arc::new(Field::new( + "item", + arg_types[0].clone(), + true, + ))), + _ => { + return plan_err!("The string_to_array function can only accept Utf8 or LargeUtf8."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let mut args = ColumnarValue::values_to_arrays(args)?; + // Case: delimiter is NULL, needs to be handled as well. + if args[1].as_any().is::() { + args[1] = Arc::new(StringArray::new_null(args[1].len())); + }; + + match args[0].data_type() { + arrow::datatypes::DataType::Utf8 => { + crate::kernels::string_to_array::(&args).map(ColumnarValue::Array) + } + arrow::datatypes::DataType::LargeUtf8 => { + crate::kernels::string_to_array::(&args).map(ColumnarValue::Array) + } + other => { + exec_err!("unsupported type for string_to_array function as {other}") + } + } + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + make_udf_function!( Range, range, diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 3f7ea57df28e..ff030845aaba 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -30,8 +30,7 @@ use arrow_buffer::{ArrowNativeType, NullBuffer}; use arrow_schema::{FieldRef, SortOptions}; use datafusion_common::cast::{ - as_generic_list_array, as_generic_string_array, as_int64_array, as_large_list_array, - as_list_array, as_string_array, + as_generic_list_array, as_int64_array, as_large_list_array, as_list_array, as_string_array, }; use datafusion_common::utils::array_into_list_array; use datafusion_common::{ @@ -1587,95 +1586,6 @@ pub fn array_intersect(args: &[ArrayRef]) -> Result { general_set_op(array1, array2, SetOp::Intersect) } -/// Splits string at occurrences of delimiter and returns an array of parts -/// string_to_array('abc~@~def~@~ghi', '~@~') = '["abc", "def", "ghi"]' -pub fn string_to_array(args: &[ArrayRef]) -> Result { - let string_array = as_generic_string_array::(&args[0])?; - let delimiter_array = as_generic_string_array::(&args[1])?; - - let mut list_builder = ListBuilder::new(StringBuilder::with_capacity( - string_array.len(), - string_array.get_buffer_memory_size(), - )); - - match args.len() { - 2 => { - string_array.iter().zip(delimiter_array.iter()).for_each( - |(string, delimiter)| { - match (string, delimiter) { - (Some(string), Some("")) => { - list_builder.values().append_value(string); - list_builder.append(true); - } - (Some(string), Some(delimiter)) => { - string.split(delimiter).for_each(|s| { - list_builder.values().append_value(s); - }); - list_builder.append(true); - } - (Some(string), None) => { - string.chars().map(|c| c.to_string()).for_each(|c| { - list_builder.values().append_value(c); - }); - list_builder.append(true); - } - _ => list_builder.append(false), // null value - } - }, - ); - } - - 3 => { - let null_value_array = as_generic_string_array::(&args[2])?; - string_array - .iter() - .zip(delimiter_array.iter()) - .zip(null_value_array.iter()) - .for_each(|((string, delimiter), null_value)| { - match (string, delimiter) { - (Some(string), Some("")) => { - if Some(string) == null_value { - list_builder.values().append_null(); - } else { - list_builder.values().append_value(string); - } - list_builder.append(true); - } - (Some(string), Some(delimiter)) => { - string.split(delimiter).for_each(|s| { - if Some(s) == null_value { - list_builder.values().append_null(); - } else { - list_builder.values().append_value(s); - } - }); - list_builder.append(true); - } - (Some(string), None) => { - string.chars().map(|c| c.to_string()).for_each(|c| { - if Some(c.as_str()) == null_value { - list_builder.values().append_null(); - } else { - list_builder.values().append_value(c); - } - }); - list_builder.append(true); - } - _ => list_builder.append(false), // null value - } - }); - } - _ => { - return exec_err!( - "Expect string_to_array function to take two or three parameters" - ) - } - } - - let list_array = list_builder.finish(); - Ok(Arc::new(list_array) as ArrayRef) -} - pub fn general_array_distinct( array: &GenericListArray, field: &FieldRef, diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index e9ac9bd2d6a2..db38e358db96 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -602,21 +602,6 @@ pub fn create_physical_fun( exec_err!("Unsupported data type {other:?} for function split_part") } }), - BuiltinScalarFunction::StringToArray => { - Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => make_scalar_function_inner( - array_expressions::string_to_array::, - )(args), - DataType::LargeUtf8 => make_scalar_function_inner( - array_expressions::string_to_array::, - )(args), - other => { - exec_err!( - "Unsupported data type {other:?} for function string_to_array" - ) - } - }) - } BuiltinScalarFunction::StartsWith => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { make_scalar_function_inner(string_expressions::starts_with::)(args) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index c2a36af2e72d..8d8ae3691a81 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -664,7 +664,7 @@ enum ScalarFunction { Iszero = 114; // 115 was ArrayEmpty ArrayPopBack = 116; - StringToArray = 117; + // 117 was StringToArray // 118 was ToTimestampNanos ArrayIntersect = 119; ArrayUnion = 120; diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 9b34b084c95d..5ee6aa46084f 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2736,7 +2736,7 @@ pub enum ScalarFunction { Iszero = 114, /// 115 was ArrayEmpty ArrayPopBack = 116, - StringToArray = 117, + // 117 was StringToArray /// 118 was ToTimestampNanos ArrayIntersect = 119, ArrayUnion = 120, @@ -2855,7 +2855,6 @@ impl ScalarFunction { ScalarFunction::Nanvl => "Nanvl", ScalarFunction::Iszero => "Iszero", ScalarFunction::ArrayPopBack => "ArrayPopBack", - ScalarFunction::StringToArray => "StringToArray", ScalarFunction::ArrayIntersect => "ArrayIntersect", ScalarFunction::ArrayUnion => "ArrayUnion", ScalarFunction::OverLay => "OverLay", @@ -2964,7 +2963,6 @@ impl ScalarFunction { "Nanvl" => Some(Self::Nanvl), "Iszero" => Some(Self::Iszero), "ArrayPopBack" => Some(Self::ArrayPopBack), - "StringToArray" => Some(Self::StringToArray), "ArrayIntersect" => Some(Self::ArrayIntersect), "ArrayUnion" => Some(Self::ArrayUnion), "OverLay" => Some(Self::OverLay), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 8dba553b4801..8dc8658631c9 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -60,9 +60,9 @@ use datafusion_expr::{ logical_plan::{PlanType, StringifiedPlan}, lower, lpad, ltrim, md5, nanvl, now, octet_length, overlay, pi, power, radians, random, repeat, replace, reverse, right, round, rpad, rtrim, sha224, sha256, sha384, - sha512, signum, sin, sinh, split_part, sqrt, starts_with, string_to_array, strpos, - struct_fun, substr, substr_index, substring, tan, tanh, to_hex, translate, trim, - trunc, upper, uuid, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, + sha512, signum, sin, sinh, split_part, sqrt, starts_with, strpos, struct_fun, substr, + substr_index, substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid, + AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet, GroupingSet::GroupingSets, @@ -521,7 +521,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Right => Self::Right, ScalarFunction::Rpad => Self::Rpad, ScalarFunction::SplitPart => Self::SplitPart, - ScalarFunction::StringToArray => Self::StringToArray, ScalarFunction::StartsWith => Self::StartsWith, ScalarFunction::Strpos => Self::Strpos, ScalarFunction::Substr => Self::Substr, @@ -1742,11 +1741,6 @@ pub fn parse_expr( ScalarFunction::ArrowTypeof => { Ok(arrow_typeof(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::StringToArray => Ok(string_to_array( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - parse_expr(&args[2], registry, codec)?, - )), ScalarFunction::OverLay => Ok(overlay( args.to_owned() .iter() diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 393cc7826771..a9867b8f03bb 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1502,7 +1502,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Right => Self::Right, BuiltinScalarFunction::Rpad => Self::Rpad, BuiltinScalarFunction::SplitPart => Self::SplitPart, - BuiltinScalarFunction::StringToArray => Self::StringToArray, BuiltinScalarFunction::StartsWith => Self::StartsWith, BuiltinScalarFunction::Strpos => Self::Strpos, BuiltinScalarFunction::Substr => Self::Substr, diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 76402604acf0..3899f64a37f8 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -586,6 +586,7 @@ async fn roundtrip_expr_api() -> Result<()> { array_dims(make_array(vec![lit(1), lit(2), lit(3)])), array_ndims(make_array(vec![lit(1), lit(2), lit(3)])), cardinality(make_array(vec![lit(1), lit(2), lit(3)])), + string_to_array(lit("abc#def#ghl"), lit("#"), lit(",")), range(lit(1), lit(10), lit(2)), gen_series(lit(1), lit(10), lit(2)), array_append(make_array(vec![lit(1), lit(2), lit(3)]), lit(4)), diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index dcb599b9b3b2..0c08e00703ae 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -207,42 +207,43 @@ select log(-1), log(0), sqrt(-1); ## Array Expressions -| Syntax | Description | -| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| array_append(array, element) | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]` | -| array_concat(array[, ..., array_n]) | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]` | -| array_has(array, element) | Returns true if the array contains the element `array_has([1,2,3], 1) -> true` | -| array_has_all(array, sub-array) | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true` | -| array_has_any(array, sub-array) | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true` | -| array_dims(array) | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]` | -| array_distinct(array) | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]` | -| array_element(array, index) | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3` | -| flatten(array) | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]` | -| array_length(array, dimension) | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5` | -| array_ndims(array) | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2` | -| array_pop_front(array) | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]` | -| array_pop_back(array) | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]` | -| array_position(array, element) | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2` | -| array_positions(array, element) | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]` | -| array_prepend(array, element) | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]` | -| array_repeat(element, count) | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]` | -| array_remove(array, element) | Removes the first element from the array equal to the given value. `array_remove([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 2, 3, 2, 1, 4]` | -| array_remove_n(array, element, max) | Removes the first `max` elements from the array equal to the given value. `array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2) -> [1, 3, 2, 1, 4]` | -| array_remove_all(array, element) | Removes all elements from the array equal to the given value. `array_remove_all([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 3, 1, 4]` | -| array_replace(array, from, to) | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]` | -| array_replace_n(array, from, to, max) | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]` | -| array_replace_all(array, from, to) | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]` | -| array_slice(array, begin,end) | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]` | -| array_slice(array, begin, end, stride) | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]` | -| array_to_string(array, delimiter) | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4` | -| array_intersect(array1, array2) | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | -| array_union(array1, array2) | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]` | -| array_except(array1, array2) | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | -| array_resize(array, size, value) | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 4, 5, 6]` | -| cardinality(array) | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6` | -| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]` | -| range(start [, stop, step]) | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]` | -| trim_array(array, n) | Deprecated | +| Syntax | Description | +|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| array_append(array, element) | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]` | +| array_concat(array[, ..., array_n]) | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]` | +| array_has(array, element) | Returns true if the array contains the element `array_has([1,2,3], 1) -> true` | +| array_has_all(array, sub-array) | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true` | +| array_has_any(array, sub-array) | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true` | +| array_dims(array) | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]` | +| array_distinct(array) | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]` | +| array_element(array, index) | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3` | +| flatten(array) | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]` | +| array_length(array, dimension) | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5` | +| array_ndims(array) | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2` | +| array_pop_front(array) | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]` | +| array_pop_back(array) | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]` | +| array_position(array, element) | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2` | +| array_positions(array, element) | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]` | +| array_prepend(array, element) | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]` | +| array_repeat(element, count) | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]` | +| array_remove(array, element) | Removes the first element from the array equal to the given value. `array_remove([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 2, 3, 2, 1, 4]` | +| array_remove_n(array, element, max) | Removes the first `max` elements from the array equal to the given value. `array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2) -> [1, 3, 2, 1, 4]` | +| array_remove_all(array, element) | Removes all elements from the array equal to the given value. `array_remove_all([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 3, 1, 4]` | +| array_replace(array, from, to) | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]` | +| array_replace_n(array, from, to, max) | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]` | +| array_replace_all(array, from, to) | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]` | +| array_slice(array, begin,end) | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]` | +| array_slice(array, begin, end, stride) | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]` | +| array_to_string(array, delimiter) | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4` | +| string_to_array(array, delimiter, null_string) | Splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`. `string_to_array('abc#def#ghi', '#', ' ') -> ['abc', 'def', 'ghi']` | +| array_intersect(array1, array2) | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | +| array_union(array1, array2) | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]` | +| array_except(array1, array2) | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | +| array_resize(array, size, value) | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 4, 5, 6]` | +| cardinality(array) | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6` | +| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]` | +| range(start [, stop, step]) | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]` | +| trim_array(array, n) | Deprecated | ## Regular Expressions diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index b0385b492365..5f30ae7db8b8 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3112,7 +3112,7 @@ _Alias of [make_array](#make_array)._ ### `string_to_array` -Splits a string in to an array of substrings based on a delimiter. Any substrings matching the optional `null_str` argument are replaced with NULL. +Splits a string in to an array of substrings based on a delimiter. Any substrings matching the optional `null_str` argument are replaced with NULL. `SELECT string_to_array('abc##def', '##')` or `SELECT string_to_array('abc def', ' ', 'def')` ``` starts_with(str, delimiter[, null_str]) @@ -3124,6 +3124,7 @@ starts_with(str, delimiter[, null_str]) - **delimiter**: Delimiter string to split on. - **null_str**: Substring values to be replaced with `NULL` + #### Aliases - string_to_list From f27df27f0c156e6debb2bd15dac831c36215d4e9 Mon Sep 17 00:00:00 2001 From: Eren Avsarogullari Date: Sun, 10 Mar 2024 12:23:53 -0700 Subject: [PATCH 2/3] Issue-9497 - Fix formatting issues --- datafusion/functions-array/src/kernels.rs | 11 ++++++----- datafusion/functions-array/src/lib.rs | 2 +- datafusion/functions-array/src/udf.rs | 14 +++++++------- datafusion/physical-expr/src/array_expressions.rs | 3 ++- datafusion/proto/src/generated/pbjson.rs | 3 --- datafusion/proto/src/logical_plan/from_proto.rs | 5 ++--- docs/source/user-guide/expressions.md | 2 +- docs/source/user-guide/sql/scalar_functions.md | 4 ++-- 8 files changed, 21 insertions(+), 23 deletions(-) diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index af24ee47c1a8..6d843aa4bb05 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -20,18 +20,19 @@ use arrow::array::{ Array, ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeListArray, - LargeStringArray, ListArray, ListBuilder, OffsetSizeTrait, StringArray, UInt16Array, - UInt32Array, UInt64Array, UInt8Array, StringBuilder + LargeStringArray, ListArray, ListBuilder, OffsetSizeTrait, StringArray, + StringBuilder, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; use arrow::buffer::OffsetBuffer; use arrow::datatypes::Field; use arrow::datatypes::UInt64Type; use arrow::datatypes::{DataType, Date32Type, IntervalMonthDayNanoType}; use datafusion_common::cast::{ - as_date32_array, as_generic_string_array, as_generic_list_array, as_int64_array, as_interval_mdn_array, - as_large_list_array, as_list_array, as_null_array, as_string_array, + as_date32_array, as_generic_list_array, as_generic_string_array, as_int64_array, + as_interval_mdn_array, as_large_list_array, as_list_array, as_null_array, + as_string_array, }; -use datafusion_common::{DataFusionError, exec_err, not_impl_datafusion_err, Result}; +use datafusion_common::{exec_err, not_impl_datafusion_err, DataFusionError, Result}; use std::any::type_name; use std::sync::Arc; diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index cdcaa9e16288..0f395f227027 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -55,11 +55,11 @@ pub mod expr_fn { pub use super::udf::array_length; pub use super::udf::array_ndims; pub use super::udf::array_to_string; - pub use super::udf::string_to_array; pub use super::udf::cardinality; pub use super::udf::flatten; pub use super::udf::gen_series; pub use super::udf::range; + pub use super::udf::string_to_array; } /// Registers all enabled packages with a [`FunctionRegistry`] diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index fbff3b9d0f84..fc1cc281bcc3 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -17,6 +17,7 @@ //! [`ScalarUDFImpl`] definitions for array functions. +use arrow::array::{NullArray, StringArray}; use arrow::datatypes::DataType; use arrow::datatypes::Field; use arrow::datatypes::IntervalUnit::MonthDayNano; @@ -29,7 +30,6 @@ use datafusion_expr::TypeSignature; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; use std::sync::Arc; -use arrow::array::{NullArray, StringArray}; // Create static instances of ScalarUDFs for each function make_udf_function!(ArrayToString, @@ -129,13 +129,13 @@ impl ScalarUDFImpl for StringToArray { fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { use DataType::*; Ok(match arg_types[0] { - Utf8 | LargeUtf8 => List(Arc::new(Field::new( - "item", - arg_types[0].clone(), - true, - ))), + Utf8 | LargeUtf8 => { + List(Arc::new(Field::new("item", arg_types[0].clone(), true))) + } _ => { - return plan_err!("The string_to_array function can only accept Utf8 or LargeUtf8."); + return plan_err!( + "The string_to_array function can only accept Utf8 or LargeUtf8." + ); } }) } diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index ff030845aaba..ed656660abba 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -30,7 +30,8 @@ use arrow_buffer::{ArrowNativeType, NullBuffer}; use arrow_schema::{FieldRef, SortOptions}; use datafusion_common::cast::{ - as_generic_list_array, as_int64_array, as_large_list_array, as_list_array, as_string_array, + as_generic_list_array, as_int64_array, as_large_list_array, as_list_array, + as_string_array, }; use datafusion_common::utils::array_into_list_array; use datafusion_common::{ diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 0ec6de8f4072..961a626463cd 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22185,7 +22185,6 @@ impl serde::Serialize for ScalarFunction { Self::Nanvl => "Nanvl", Self::Iszero => "Iszero", Self::ArrayPopBack => "ArrayPopBack", - Self::StringToArray => "StringToArray", Self::ArrayIntersect => "ArrayIntersect", Self::ArrayUnion => "ArrayUnion", Self::OverLay => "OverLay", @@ -22300,7 +22299,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Nanvl", "Iszero", "ArrayPopBack", - "StringToArray", "ArrayIntersect", "ArrayUnion", "OverLay", @@ -22444,7 +22442,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Nanvl" => Ok(ScalarFunction::Nanvl), "Iszero" => Ok(ScalarFunction::Iszero), "ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack), - "StringToArray" => Ok(ScalarFunction::StringToArray), "ArrayIntersect" => Ok(ScalarFunction::ArrayIntersect), "ArrayUnion" => Ok(ScalarFunction::ArrayUnion), "OverLay" => Ok(ScalarFunction::OverLay), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 8dc8658631c9..739503a94219 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -62,9 +62,8 @@ use datafusion_expr::{ random, repeat, replace, reverse, right, round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part, sqrt, starts_with, strpos, struct_fun, substr, substr_index, substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid, - AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, - BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, - GroupingSet, + AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, + Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet, GroupingSet::GroupingSets, JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index 0c08e00703ae..62931301987c 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -235,7 +235,6 @@ select log(-1), log(0), sqrt(-1); | array_slice(array, begin,end) | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]` | | array_slice(array, begin, end, stride) | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]` | | array_to_string(array, delimiter) | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4` | -| string_to_array(array, delimiter, null_string) | Splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`. `string_to_array('abc#def#ghi', '#', ' ') -> ['abc', 'def', 'ghi']` | | array_intersect(array1, array2) | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | | array_union(array1, array2) | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]` | | array_except(array1, array2) | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | @@ -243,6 +242,7 @@ select log(-1), log(0), sqrt(-1); | cardinality(array) | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6` | | make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]` | | range(start [, stop, step]) | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]` | +| string_to_array(array, delimiter, null_string) | Splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`. `string_to_array('abc#def#ghi', '#', ' ') -> ['abc', 'def', 'ghi']` | | trim_array(array, n) | Deprecated | ## Regular Expressions diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 5f30ae7db8b8..7496039116a5 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3112,7 +3112,8 @@ _Alias of [make_array](#make_array)._ ### `string_to_array` -Splits a string in to an array of substrings based on a delimiter. Any substrings matching the optional `null_str` argument are replaced with NULL. `SELECT string_to_array('abc##def', '##')` or `SELECT string_to_array('abc def', ' ', 'def')` +Splits a string in to an array of substrings based on a delimiter. Any substrings matching the optional `null_str` argument are replaced with NULL. +`SELECT string_to_array('abc##def', '##')` or `SELECT string_to_array('abc def', ' ', 'def')` ``` starts_with(str, delimiter[, null_str]) @@ -3124,7 +3125,6 @@ starts_with(str, delimiter[, null_str]) - **delimiter**: Delimiter string to split on. - **null_str**: Substring values to be replaced with `NULL` - #### Aliases - string_to_list From 424862c8e945dfc78364e8c65dcd8c3d3271b586 Mon Sep 17 00:00:00 2001 From: Eren Avsarogullari Date: Sun, 10 Mar 2024 18:03:57 -0700 Subject: [PATCH 3/3] Issue-9497 - Format expressions.md documentation --- .../tests/dataframe/dataframe_functions.rs | 19 ------------------- datafusion/proto/src/generated/prost.rs | 2 +- docs/source/user-guide/expressions.md | 2 +- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index c052c3b97e2f..cea701492910 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -870,22 +870,3 @@ async fn test_fn_array_to_string() -> Result<()> { Ok(()) } - -#[tokio::test] -async fn test_fn_string_to_array() -> Result<()> { - let expr = string_to_array(lit("abc##def##ghi"), lit("##"), lit("!")); - - let expected = [ - "+-------------------------------------------------------------+", - "| string_to_array(Utf8(\"abc##def##ghi\"),Utf8(\"##\"),Utf8(\"!\")) |", - "+-------------------------------------------------------------+", - "| [abc, def, ghi] |", - "| [abc, def, ghi] |", - "| [abc, def, ghi] |", - "| [abc, def, ghi] |", - "+-------------------------------------------------------------+", - ]; - assert_fn_batches!(expr, expected); - - Ok(()) -} diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 5ee6aa46084f..deaa977faaa7 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2736,7 +2736,7 @@ pub enum ScalarFunction { Iszero = 114, /// 115 was ArrayEmpty ArrayPopBack = 116, - // 117 was StringToArray + /// 117 was StringToArray /// 118 was ToTimestampNanos ArrayIntersect = 119, ArrayUnion = 120, diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index 62931301987c..17da8c3fc266 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -208,7 +208,7 @@ select log(-1), log(0), sqrt(-1); ## Array Expressions | Syntax | Description | -|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | array_append(array, element) | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]` | | array_concat(array[, ..., array_n]) | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]` | | array_has(array, element) | Returns true if the array contains the element `array_has([1,2,3], 1) -> true` |