| 
 | 1 | +// Licensed to the Apache Software Foundation (ASF) under one  | 
 | 2 | +// or more contributor license agreements.  See the NOTICE file  | 
 | 3 | +// distributed with this work for additional information  | 
 | 4 | +// regarding copyright ownership.  The ASF licenses this file  | 
 | 5 | +// to you under the Apache License, Version 2.0 (the  | 
 | 6 | +// "License"); you may not use this file except in compliance  | 
 | 7 | +// with the License.  You may obtain a copy of the License at  | 
 | 8 | +//  | 
 | 9 | +//   http://www.apache.org/licenses/LICENSE-2.0  | 
 | 10 | +//  | 
 | 11 | +// Unless required by applicable law or agreed to in writing,  | 
 | 12 | +// software distributed under the License is distributed on an  | 
 | 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY  | 
 | 14 | +// KIND, either express or implied.  See the License for the  | 
 | 15 | +// specific language governing permissions and limitations  | 
 | 16 | +// under the License.  | 
 | 17 | + | 
 | 18 | +//! [`StringAgg`] and [`StringAggAccumulator`] accumulator for the `string_agg` function  | 
 | 19 | +
  | 
 | 20 | +use arrow::array::ArrayRef;  | 
 | 21 | +use arrow_schema::DataType;  | 
 | 22 | +use datafusion_common::cast::as_generic_string_array;  | 
 | 23 | +use datafusion_common::Result;  | 
 | 24 | +use datafusion_common::{not_impl_err, ScalarValue};  | 
 | 25 | +use datafusion_expr::function::AccumulatorArgs;  | 
 | 26 | +use datafusion_expr::{  | 
 | 27 | +    Accumulator, AggregateUDFImpl, Expr, Signature, TypeSignature, Volatility,  | 
 | 28 | +};  | 
 | 29 | +use std::any::Any;  | 
 | 30 | + | 
 | 31 | +make_udaf_expr_and_func!(  | 
 | 32 | +    StringAgg,  | 
 | 33 | +    string_agg,  | 
 | 34 | +    expr delimiter,  | 
 | 35 | +    "Concatenates the values of string expressions and places separator values between them",  | 
 | 36 | +    string_agg_udaf  | 
 | 37 | +);  | 
 | 38 | + | 
 | 39 | +/// STRING_AGG aggregate expression  | 
 | 40 | +#[derive(Debug)]  | 
 | 41 | +pub struct StringAgg {  | 
 | 42 | +    signature: Signature,  | 
 | 43 | +}  | 
 | 44 | + | 
 | 45 | +impl StringAgg {  | 
 | 46 | +    /// Create a new StringAgg aggregate function  | 
 | 47 | +    pub fn new() -> Self {  | 
 | 48 | +        Self {  | 
 | 49 | +            signature: Signature::one_of(  | 
 | 50 | +                vec![  | 
 | 51 | +                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),  | 
 | 52 | +                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),  | 
 | 53 | +                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Null]),  | 
 | 54 | +                ],  | 
 | 55 | +                Volatility::Immutable,  | 
 | 56 | +            ),  | 
 | 57 | +        }  | 
 | 58 | +    }  | 
 | 59 | +}  | 
 | 60 | + | 
 | 61 | +impl Default for StringAgg {  | 
 | 62 | +    fn default() -> Self {  | 
 | 63 | +        Self::new()  | 
 | 64 | +    }  | 
 | 65 | +}  | 
 | 66 | + | 
 | 67 | +impl AggregateUDFImpl for StringAgg {  | 
 | 68 | +    fn as_any(&self) -> &dyn Any {  | 
 | 69 | +        self  | 
 | 70 | +    }  | 
 | 71 | + | 
 | 72 | +    fn name(&self) -> &str {  | 
 | 73 | +        "string_agg"  | 
 | 74 | +    }  | 
 | 75 | + | 
 | 76 | +    fn signature(&self) -> &Signature {  | 
 | 77 | +        &self.signature  | 
 | 78 | +    }  | 
 | 79 | + | 
 | 80 | +    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {  | 
 | 81 | +        Ok(DataType::LargeUtf8)  | 
 | 82 | +    }  | 
 | 83 | + | 
 | 84 | +    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {  | 
 | 85 | +        match &acc_args.input_exprs[1] {  | 
 | 86 | +            Expr::Literal(ScalarValue::Utf8(Some(delimiter)))  | 
 | 87 | +            | Expr::Literal(ScalarValue::LargeUtf8(Some(delimiter))) => {  | 
 | 88 | +                Ok(Box::new(StringAggAccumulator::new(delimiter)))  | 
 | 89 | +            }  | 
 | 90 | +            Expr::Literal(ScalarValue::Utf8(None))  | 
 | 91 | +            | Expr::Literal(ScalarValue::LargeUtf8(None))  | 
 | 92 | +            | Expr::Literal(ScalarValue::Null) => {  | 
 | 93 | +                Ok(Box::new(StringAggAccumulator::new("")))  | 
 | 94 | +            }  | 
 | 95 | +            _ => not_impl_err!(  | 
 | 96 | +                "StringAgg not supported for delimiter {}",  | 
 | 97 | +                &acc_args.input_exprs[1]  | 
 | 98 | +            ),  | 
 | 99 | +        }  | 
 | 100 | +    }  | 
 | 101 | +}  | 
 | 102 | + | 
 | 103 | +#[derive(Debug)]  | 
 | 104 | +pub(crate) struct StringAggAccumulator {  | 
 | 105 | +    values: Option<String>,  | 
 | 106 | +    delimiter: String,  | 
 | 107 | +}  | 
 | 108 | + | 
 | 109 | +impl StringAggAccumulator {  | 
 | 110 | +    pub fn new(delimiter: &str) -> Self {  | 
 | 111 | +        Self {  | 
 | 112 | +            values: None,  | 
 | 113 | +            delimiter: delimiter.to_string(),  | 
 | 114 | +        }  | 
 | 115 | +    }  | 
 | 116 | +}  | 
 | 117 | + | 
 | 118 | +impl Accumulator for StringAggAccumulator {  | 
 | 119 | +    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {  | 
 | 120 | +        let string_array: Vec<_> = as_generic_string_array::<i64>(&values[0])?  | 
 | 121 | +            .iter()  | 
 | 122 | +            .filter_map(|v| v.as_ref().map(ToString::to_string))  | 
 | 123 | +            .collect();  | 
 | 124 | +        if !string_array.is_empty() {  | 
 | 125 | +            let s = string_array.join(self.delimiter.as_str());  | 
 | 126 | +            let v = self.values.get_or_insert("".to_string());  | 
 | 127 | +            if !v.is_empty() {  | 
 | 128 | +                v.push_str(self.delimiter.as_str());  | 
 | 129 | +            }  | 
 | 130 | +            v.push_str(s.as_str());  | 
 | 131 | +        }  | 
 | 132 | +        Ok(())  | 
 | 133 | +    }  | 
 | 134 | + | 
 | 135 | +    fn merge_batch(&mut self, values: &[ArrayRef]) -> Result<()> {  | 
 | 136 | +        self.update_batch(values)?;  | 
 | 137 | +        Ok(())  | 
 | 138 | +    }  | 
 | 139 | + | 
 | 140 | +    fn state(&mut self) -> Result<Vec<ScalarValue>> {  | 
 | 141 | +        Ok(vec![self.evaluate()?])  | 
 | 142 | +    }  | 
 | 143 | + | 
 | 144 | +    fn evaluate(&mut self) -> Result<ScalarValue> {  | 
 | 145 | +        Ok(ScalarValue::LargeUtf8(self.values.clone()))  | 
 | 146 | +    }  | 
 | 147 | + | 
 | 148 | +    fn size(&self) -> usize {  | 
 | 149 | +        std::mem::size_of_val(self)  | 
 | 150 | +            + self.values.as_ref().map(|v| v.capacity()).unwrap_or(0)  | 
 | 151 | +            + self.delimiter.capacity()  | 
 | 152 | +    }  | 
 | 153 | +}  | 
0 commit comments