Skip to content

Commit 0809f45

Browse files
alamb2010YOUY01
andauthored
Add documentation about type signatures, and export TIMEZONE_WILDCARD (#7726)
* Add documentation and export `TIMEZONE_WILDCARD` * improve example * Apply suggestions from code review Co-authored-by: Yongting You <[email protected]> --------- Co-authored-by: Yongting You <[email protected]>
1 parent c0409a7 commit 0809f45

File tree

4 files changed

+95
-62
lines changed

4 files changed

+95
-62
lines changed

datafusion/expr/src/built_in_function.rs

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
//! Built-in functions module contains all the built-in functions definitions.
1919
2020
use crate::nullif::SUPPORTED_NULLIF_TYPES;
21-
use crate::type_coercion::functions::{data_types, TIMEZONE_PLACEHOLDER};
21+
use crate::signature::TIMEZONE_WILDCARD;
22+
use crate::type_coercion::functions::data_types;
2223
use crate::{
2324
conditional_expressions, struct_expressions, utils, Signature, TypeSignature,
2425
Volatility,
@@ -1029,22 +1030,22 @@ impl BuiltinScalarFunction {
10291030
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
10301031
Exact(vec![
10311032
Utf8,
1032-
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
1033+
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
10331034
]),
10341035
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
10351036
Exact(vec![
10361037
Utf8,
1037-
Timestamp(Microsecond, Some(TIMEZONE_PLACEHOLDER.into())),
1038+
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
10381039
]),
10391040
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
10401041
Exact(vec![
10411042
Utf8,
1042-
Timestamp(Millisecond, Some(TIMEZONE_PLACEHOLDER.into())),
1043+
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
10431044
]),
10441045
Exact(vec![Utf8, Timestamp(Second, None)]),
10451046
Exact(vec![
10461047
Utf8,
1047-
Timestamp(Second, Some(TIMEZONE_PLACEHOLDER.into())),
1048+
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
10481049
]),
10491050
],
10501051
self.volatility(),
@@ -1059,11 +1060,8 @@ impl BuiltinScalarFunction {
10591060
]),
10601061
Exact(vec![
10611062
Interval(MonthDayNano),
1062-
Timestamp(
1063-
array_type.clone(),
1064-
Some(TIMEZONE_PLACEHOLDER.into()),
1065-
),
1066-
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
1063+
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
1064+
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
10671065
]),
10681066
Exact(vec![
10691067
Interval(DayTime),
@@ -1072,30 +1070,24 @@ impl BuiltinScalarFunction {
10721070
]),
10731071
Exact(vec![
10741072
Interval(DayTime),
1075-
Timestamp(
1076-
array_type.clone(),
1077-
Some(TIMEZONE_PLACEHOLDER.into()),
1078-
),
1079-
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
1073+
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
1074+
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
10801075
]),
10811076
Exact(vec![
10821077
Interval(MonthDayNano),
10831078
Timestamp(array_type.clone(), None),
10841079
]),
10851080
Exact(vec![
10861081
Interval(MonthDayNano),
1087-
Timestamp(
1088-
array_type.clone(),
1089-
Some(TIMEZONE_PLACEHOLDER.into()),
1090-
),
1082+
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
10911083
]),
10921084
Exact(vec![
10931085
Interval(DayTime),
10941086
Timestamp(array_type.clone(), None),
10951087
]),
10961088
Exact(vec![
10971089
Interval(DayTime),
1098-
Timestamp(array_type, Some(TIMEZONE_PLACEHOLDER.into())),
1090+
Timestamp(array_type, Some(TIMEZONE_WILDCARD.into())),
10991091
]),
11001092
]
11011093
};
@@ -1115,22 +1107,22 @@ impl BuiltinScalarFunction {
11151107
Exact(vec![Utf8, Timestamp(Second, None)]),
11161108
Exact(vec![
11171109
Utf8,
1118-
Timestamp(Second, Some(TIMEZONE_PLACEHOLDER.into())),
1110+
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
11191111
]),
11201112
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
11211113
Exact(vec![
11221114
Utf8,
1123-
Timestamp(Microsecond, Some(TIMEZONE_PLACEHOLDER.into())),
1115+
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
11241116
]),
11251117
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
11261118
Exact(vec![
11271119
Utf8,
1128-
Timestamp(Millisecond, Some(TIMEZONE_PLACEHOLDER.into())),
1120+
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
11291121
]),
11301122
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
11311123
Exact(vec![
11321124
Utf8,
1133-
Timestamp(Nanosecond, Some(TIMEZONE_PLACEHOLDER.into())),
1125+
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
11341126
]),
11351127
],
11361128
self.volatility(),

datafusion/expr/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ pub use logical_plan::*;
7474
pub use nullif::SUPPORTED_NULLIF_TYPES;
7575
pub use operator::Operator;
7676
pub use partition_evaluator::PartitionEvaluator;
77-
pub use signature::{Signature, TypeSignature, Volatility};
77+
pub use signature::{Signature, TypeSignature, Volatility, TIMEZONE_WILDCARD};
7878
pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
7979
pub use udaf::AggregateUDF;
8080
pub use udf::ScalarUDF;

datafusion/expr/src/signature.rs

Lines changed: 76 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,35 +20,82 @@
2020
2121
use arrow::datatypes::DataType;
2222

23+
/// Constant that is used as a placeholder for any valid timezone.
24+
/// This is used where a function can accept a timestamp type with any
25+
/// valid timezone, it exists to avoid the need to enumerate all possible
26+
/// timezones. See [`TypeSignature`] for more details.
27+
///
28+
/// Type coercion always ensures that functions will be executed using
29+
/// timestamp arrays that have a valid time zone. Functions must never
30+
/// return results with this timezone.
31+
pub const TIMEZONE_WILDCARD: &str = "+TZ";
32+
2333
///A function's volatility, which defines the functions eligibility for certain optimizations
2434
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
2535
pub enum Volatility {
26-
/// Immutable - An immutable function will always return the same output when given the same
27-
/// input. An example of this is [super::BuiltinScalarFunction::Cos].
36+
/// An immutable function will always return the same output when given the same
37+
/// input. An example of this is [super::BuiltinScalarFunction::Cos]. DataFusion
38+
/// will attempt to inline immutable functions during planning.
2839
Immutable,
29-
/// Stable - A stable function may return different values given the same input across different
40+
/// A stable function may return different values given the same input across different
3041
/// queries but must return the same value for a given input within a query. An example of
31-
/// this is [super::BuiltinScalarFunction::Now].
42+
/// this is [super::BuiltinScalarFunction::Now]. DataFusion
43+
/// will attempt to inline `Stable` functions during planning, when possible.
44+
/// For query `select col1, now() from t1`, it might take a while to execute but
45+
/// `now()` column will be the same for each output row, which is evaluated
46+
/// during planning.
3247
Stable,
33-
/// Volatile - A volatile function may change the return value from evaluation to evaluation.
48+
/// A volatile function may change the return value from evaluation to evaluation.
3449
/// Multiple invocations of a volatile function may return different results when used in the
35-
/// same query. An example of this is [super::BuiltinScalarFunction::Random].
50+
/// same query. An example of this is [super::BuiltinScalarFunction::Random]. DataFusion
51+
/// can not evaluate such functions during planning.
52+
/// In the query `select col1, random() from t1`, `random()` function will be evaluated
53+
/// for each output row, resulting in a unique random value for each row.
3654
Volatile,
3755
}
3856

39-
/// A function's type signature, which defines the function's supported argument types.
57+
/// A function's type signature defines the types of arguments the function supports.
58+
///
59+
/// Functions typically support only a few different types of arguments compared to the
60+
/// different datatypes in Arrow. To make functions easy to use, when possible DataFusion
61+
/// automatically coerces (add casts to) function arguments so they match the type signature.
62+
///
63+
/// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
64+
/// that calles `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
65+
/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
66+
///
67+
/// # Data Types
68+
/// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable
69+
/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
70+
/// the [`TIMEZONE_WILDCARD`]. For example:
71+
///
72+
/// ```
73+
/// # use arrow::datatypes::{DataType, TimeUnit};
74+
/// # use datafusion_expr::{TIMEZONE_WILDCARD, TypeSignature};
75+
/// let type_signature = TypeSignature::Exact(vec![
76+
/// // A nanosecond precision timestamp with ANY timezone
77+
/// // matches Timestamp(Nanosecond, Some("+0:00"))
78+
/// // matches Timestamp(Nanosecond, Some("+5:00"))
79+
/// // does not match Timestamp(Nanosecond, None)
80+
/// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
81+
/// ]);
82+
/// ```
4083
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
4184
pub enum TypeSignature {
42-
/// arbitrary number of arguments of an common type out of a list of valid types
43-
// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])`
85+
/// arbitrary number of arguments of an common type out of a list of valid types.
86+
///
87+
/// # Examples
88+
/// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])`
4489
Variadic(Vec<DataType>),
45-
/// arbitrary number of arguments of an arbitrary but equal type
46-
// A function such as `array` is `VariadicEqual`
47-
// The first argument decides the type used for coercion
90+
/// arbitrary number of arguments of an arbitrary but equal type.
91+
/// DataFusion attempts to coerce all argument types to match the first argument's type
92+
///
93+
/// # Examples
94+
/// A function such as `array` is `VariadicEqual`
4895
VariadicEqual,
4996
/// arbitrary number of arguments with arbitrary types
5097
VariadicAny,
51-
/// fixed number of arguments of an arbitrary but equal type out of a list of valid types
98+
/// fixed number of arguments of an arbitrary but equal type out of a list of valid types.
5299
///
53100
/// # Examples
54101
/// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
@@ -58,7 +105,8 @@ pub enum TypeSignature {
58105
Exact(Vec<DataType>),
59106
/// fixed number of arguments of arbitrary types
60107
Any(usize),
61-
/// One of a list of signatures
108+
/// Matches exactly one of a list of [`TypeSignature`]s. Coercion is attempted to match
109+
/// the signatures in order, and stops after the first success, if any.
62110
OneOf(Vec<TypeSignature>),
63111
}
64112

@@ -104,46 +152,48 @@ impl TypeSignature {
104152
}
105153
}
106154

107-
/// The signature of a function defines the supported argument types
108-
/// and its volatility.
155+
/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
156+
///
157+
/// DataFusion will automatically coerce (cast) argument types to one of the supported
158+
/// function signatures, if possible.
109159
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
110160
pub struct Signature {
111-
/// type_signature - The types that the function accepts. See [TypeSignature] for more information.
161+
/// The data types that the function accepts. See [TypeSignature] for more information.
112162
pub type_signature: TypeSignature,
113-
/// volatility - The volatility of the function. See [Volatility] for more information.
163+
/// The volatility of the function. See [Volatility] for more information.
114164
pub volatility: Volatility,
115165
}
116166

117167
impl Signature {
118-
/// new - Creates a new Signature from any type signature and the volatility.
168+
/// Creates a new Signature from a given type signature and volatility.
119169
pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
120170
Signature {
121171
type_signature,
122172
volatility,
123173
}
124174
}
125-
/// variadic - Creates a variadic signature that represents an arbitrary number of arguments all from a type in common_types.
175+
/// An arbitrary number of arguments with the same type, from those listed in `common_types`.
126176
pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
127177
Self {
128178
type_signature: TypeSignature::Variadic(common_types),
129179
volatility,
130180
}
131181
}
132-
/// variadic_equal - Creates a variadic signature that represents an arbitrary number of arguments of the same type.
182+
/// An arbitrary number of arguments of the same type.
133183
pub fn variadic_equal(volatility: Volatility) -> Self {
134184
Self {
135185
type_signature: TypeSignature::VariadicEqual,
136186
volatility,
137187
}
138188
}
139-
/// variadic_any - Creates a variadic signature that represents an arbitrary number of arguments of any type.
189+
/// An arbitrary number of arguments of any type.
140190
pub fn variadic_any(volatility: Volatility) -> Self {
141191
Self {
142192
type_signature: TypeSignature::VariadicAny,
143193
volatility,
144194
}
145195
}
146-
/// uniform - Creates a function with a fixed number of arguments of the same type, which must be from valid_types.
196+
/// A fixed number of arguments of the same type, from those listed in `valid_types`.
147197
pub fn uniform(
148198
arg_count: usize,
149199
valid_types: Vec<DataType>,
@@ -154,21 +204,21 @@ impl Signature {
154204
volatility,
155205
}
156206
}
157-
/// exact - Creates a signature which must match the types in exact_types in order.
207+
/// Exactly matches the types in `exact_types`, in order.
158208
pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
159209
Signature {
160210
type_signature: TypeSignature::Exact(exact_types),
161211
volatility,
162212
}
163213
}
164-
/// any - Creates a signature which can a be made of any type but of a specified number
214+
/// A specified number of arguments of any type
165215
pub fn any(arg_count: usize, volatility: Volatility) -> Self {
166216
Signature {
167217
type_signature: TypeSignature::Any(arg_count),
168218
volatility,
169219
}
170220
}
171-
/// one_of Creates a signature which can match any of the [TypeSignature]s which are passed in.
221+
/// Any one of a list of [TypeSignature]s.
172222
pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
173223
Signature {
174224
type_signature: TypeSignature::OneOf(type_signatures),

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,14 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use crate::signature::TIMEZONE_WILDCARD;
1819
use crate::{Signature, TypeSignature};
1920
use arrow::{
2021
compute::can_cast_types,
2122
datatypes::{DataType, TimeUnit},
2223
};
2324
use datafusion_common::{plan_err, DataFusionError, Result};
2425

25-
/// Constant that is used as a placeholder for any valid timezone.
26-
/// This is used where a function can accept a timestamp type with any
27-
/// valid timezone, it exists to avoid the need to enumerate all possible
28-
/// timezones.
29-
///
30-
/// Type coercion always ensures that functions will be executed using
31-
/// timestamp arrays that have a valid time zone. Functions must never
32-
/// return results with this timezone.
33-
pub(crate) const TIMEZONE_PLACEHOLDER: &str = "+TZ";
34-
3526
/// Performs type coercion for function arguments.
3627
///
3728
/// Returns the data types to which each argument must be coerced to
@@ -232,7 +223,7 @@ fn coerced_from<'a>(
232223
Utf8 | LargeUtf8 => Some(type_into.clone()),
233224
Null if can_cast_types(type_from, type_into) => Some(type_into.clone()),
234225

235-
Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_PLACEHOLDER => {
226+
Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => {
236227
match type_from {
237228
Timestamp(_, Some(from_tz)) => {
238229
Some(Timestamp(unit.clone(), Some(from_tz.clone())))

0 commit comments

Comments
 (0)