From c15262e615c641370dae552d2383d36e0f8cc87a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 06:46:47 -0400 Subject: [PATCH 1/3] Improve documentation on `StringArrayType` trait --- datafusion/functions/src/string/common.rs | 58 +++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 54aebb039046..9d62cb5e9b5b 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Common utilities for implementing string functions + use std::fmt::{Display, Formatter}; use std::sync::Arc; @@ -252,7 +254,63 @@ impl<'a> ColumnarValueRef<'a> { } } +/// Abstracts iteration over different types of string arrays. +/// +/// This trait helps write generic code for string functions that can work with +/// different types of string arrays. +/// +/// Currently three types are supported: +/// - [`StringArray`] +/// - [`LargeStringArray`] +/// - [`StringViewArray`] +/// +/// It is inspired / copied from arrow-rs: +/// +/// +/// # Examples +/// Generic function that works for both [`StringArray`] and [`StringViewArray`]: +/// ``` +/// # use arrow::array::{StringArray, LargeStringArray, StringViewArray}; +/// # use datafusion_functions::string::common::StringArrayType; +/// +/// /// Combines string values for any StringArrayType type. It can be invoked on +/// /// and combination of `StringArray`, `LargeStringArray` or `StringViewArray` +/// fn combine_values<'a, S1, S2>(array1: S1, array2: S2) -> Vec +/// where S1: StringArrayType<'a>, S2: StringArrayType<'a> +/// { +/// array1 +/// .iter() +/// .zip(array2.iter()) +/// .map(|(s1, s2)| { +/// if let (Some(s1), Some(s2)) = (s1, s2) { +/// format!("{s1}{s2}") +/// } else { +/// "None".to_string() +/// } +/// }) +/// .collect() +/// } +/// +/// let string_array = StringArray::from(vec!["foo", "bar"]); +/// let large_string_array = LargeStringArray::from(vec!["foo2", "bar2"]); +/// let string_view_array = StringViewArray::from(vec!["foo3", "bar3"]); +/// +/// // can invoke this function a string array and large string array +/// assert_eq!( +/// combine_values(&string_array, &large_string_array), +/// vec![String::from("foofoo2"), String::from("barbar2")] +/// ); +/// +/// // Can call the same function with string array and string view array +/// assert_eq!( +/// combine_values(&string_array, &string_view_array), +/// vec![String::from("foofoo3"), String::from("barbar3")] +/// ); +/// ``` pub trait StringArrayType<'a>: ArrayAccessor + Sized { + /// Return an [`ArrayIter`] over the values of the array. + /// + /// This iterator iterates returns `Option<&str>` for each item in the array. fn iter(&self) -> ArrayIter; } From 05d4f416a17d4e4cdc4ffce88792619d90d6b4ef Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 06:49:05 -0400 Subject: [PATCH 2/3] tweaks --- datafusion/functions/src/string/common.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 9d62cb5e9b5b..4b9ad50c306b 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -264,11 +264,13 @@ impl<'a> ColumnarValueRef<'a> { /// - [`LargeStringArray`] /// - [`StringViewArray`] /// -/// It is inspired / copied from arrow-rs: -/// +/// It is inspired / copied from [arrow-rs]. +/// +/// [arrow-rs]: https://github.com/apache/arrow-rs/blob/bf0ea9129e617e4a3cf915a900b747cc5485315f/arrow-string/src/like.rs#L151-L157 /// /// # Examples -/// Generic function that works for both [`StringArray`] and [`StringViewArray`]: +/// Generic function that works for [`StringArray`], [`LargeStringArray`] +/// and [`StringViewArray`]: /// ``` /// # use arrow::array::{StringArray, LargeStringArray, StringViewArray}; /// # use datafusion_functions::string::common::StringArrayType; @@ -278,10 +280,12 @@ impl<'a> ColumnarValueRef<'a> { /// fn combine_values<'a, S1, S2>(array1: S1, array2: S2) -> Vec /// where S1: StringArrayType<'a>, S2: StringArrayType<'a> /// { +/// // iterate over the elements of the 2 arrays in parallel /// array1 /// .iter() /// .zip(array2.iter()) /// .map(|(s1, s2)| { +/// // if both values are non null, combine them /// if let (Some(s1), Some(s2)) = (s1, s2) { /// format!("{s1}{s2}") /// } else { @@ -307,6 +311,8 @@ impl<'a> ColumnarValueRef<'a> { /// vec![String::from("foofoo3"), String::from("barbar3")] /// ); /// ``` +/// +/// [`LargeStringArray`]: arrow::array::LargeStringArray pub trait StringArrayType<'a>: ArrayAccessor + Sized { /// Return an [`ArrayIter`] over the values of the array. /// From da1899bf56c7237a6df3b781454f07b43d31ce93 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 21 Aug 2024 14:58:18 -0400 Subject: [PATCH 3/3] Update datafusion/functions/src/string/common.rs Co-authored-by: Oleks V --- datafusion/functions/src/string/common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 4b9ad50c306b..6f23a5ddd236 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -256,7 +256,7 @@ impl<'a> ColumnarValueRef<'a> { /// Abstracts iteration over different types of string arrays. /// -/// This trait helps write generic code for string functions that can work with +/// The [`StringArrayType`] trait helps write generic code for string functions that can work with /// different types of string arrays. /// /// Currently three types are supported: