Skip to content

Commit 34f58f9

Browse files
authored
Add validation for Decimal256 (#2113)
* Add validation for decimal256 * Add validation in array data
1 parent c3e019f commit 34f58f9

File tree

4 files changed

+225
-5
lines changed

4 files changed

+225
-5
lines changed

arrow/src/array/array_decimal.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ impl From<ArrayData> for Decimal256Array {
352352
assert_eq!(
353353
data.buffers().len(),
354354
1,
355-
"Decimal128Array data should contain 1 buffer only (values)"
355+
"Decimal256Array data should contain 1 buffer only (values)"
356356
);
357357
let values = data.buffers()[0].as_ptr();
358358
let (precision, scale) = match data.data_type() {

arrow/src/array/builder/decimal_builder.rs

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use num::BigInt;
1819
use std::any::Any;
1920
use std::sync::Arc;
2021

@@ -25,7 +26,7 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder};
2526

2627
use crate::error::{ArrowError, Result};
2728

28-
use crate::datatypes::validate_decimal_precision;
29+
use crate::datatypes::{validate_decimal256_precision, validate_decimal_precision};
2930
use crate::util::decimal::{BasicDecimal, Decimal256};
3031

3132
/// Array Builder for [`Decimal128Array`]
@@ -51,6 +52,10 @@ pub struct Decimal256Builder {
5152
builder: FixedSizeBinaryBuilder,
5253
precision: usize,
5354
scale: usize,
55+
56+
/// Should decimal values be validated for compatibility with scale and precision?
57+
/// defaults to true
58+
value_validation: bool,
5459
}
5560

5661
impl Decimal128Builder {
@@ -163,14 +168,35 @@ impl Decimal256Builder {
163168
builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH),
164169
precision,
165170
scale,
171+
value_validation: true,
166172
}
167173
}
168174

175+
/// Disable validation
176+
///
177+
/// # Safety
178+
///
179+
/// After disabling validation, caller must ensure that appended values are compatible
180+
/// for the specified precision and scale.
181+
pub unsafe fn disable_value_validation(&mut self) {
182+
self.value_validation = false;
183+
}
184+
169185
/// Appends a [`Decimal256`] number into the builder.
170186
///
171187
/// Returns an error if `value` has different precision, scale or length in bytes than this builder
172188
#[inline]
173189
pub fn append_value(&mut self, value: &Decimal256) -> Result<()> {
190+
let value = if self.value_validation {
191+
let raw_bytes = value.raw_value();
192+
let integer = BigInt::from_signed_bytes_le(raw_bytes);
193+
let value_str = integer.to_string();
194+
validate_decimal256_precision(&value_str, self.precision)?;
195+
value
196+
} else {
197+
value
198+
};
199+
174200
if self.precision != value.precision() || self.scale != value.scale() {
175201
return Err(ArrowError::InvalidArgumentError(
176202
"Decimal value does not have the same precision or scale as Decimal256Builder".to_string()
@@ -206,9 +232,10 @@ impl Decimal256Builder {
206232
#[cfg(test)]
207233
mod tests {
208234
use super::*;
235+
use num::Num;
209236

210237
use crate::array::array_decimal::{BasicDecimalArray, Decimal128Array};
211-
use crate::array::Array;
238+
use crate::array::{array_decimal, Array};
212239
use crate::datatypes::DataType;
213240
use crate::util::decimal::Decimal128;
214241

@@ -298,4 +325,40 @@ mod tests {
298325
let value = Decimal256::try_new_from_bytes(40, 6, bytes.as_slice()).unwrap();
299326
builder.append_value(&value).unwrap();
300327
}
328+
329+
#[test]
330+
#[should_panic(
331+
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 76. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
332+
)]
333+
fn test_decimal256_builder_out_of_range_precision_scale() {
334+
let mut builder = Decimal256Builder::new(30, 76, 6);
335+
336+
let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
337+
let bytes = big_value.to_signed_bytes_le();
338+
let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
339+
builder.append_value(&value).unwrap();
340+
}
341+
342+
#[test]
343+
#[should_panic(
344+
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 76. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
345+
)]
346+
fn test_decimal256_data_validation() {
347+
let mut builder = Decimal256Builder::new(30, 76, 6);
348+
// Disable validation at builder
349+
unsafe {
350+
builder.disable_value_validation();
351+
}
352+
353+
let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
354+
let bytes = big_value.to_signed_bytes_le();
355+
let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
356+
builder
357+
.append_value(&value)
358+
.expect("should not validate invalid value at builder");
359+
360+
let array = builder.finish();
361+
let array_data = array_decimal::BasicDecimalArray::data(&array);
362+
array_data.validate_values().unwrap();
363+
}
301364
}

arrow/src/array/data.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,18 @@
1818
//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
1919
//! common attributes and operations for Arrow array.
2020
21-
use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit, UnionMode};
21+
use crate::datatypes::{
22+
validate_decimal256_precision, validate_decimal_precision, DataType, IntervalUnit,
23+
UnionMode,
24+
};
2225
use crate::error::{ArrowError, Result};
2326
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
2427
use crate::{
2528
buffer::{Buffer, MutableBuffer},
2629
util::bit_util,
2730
};
2831
use half::f16;
32+
use num::BigInt;
2933
use std::convert::TryInto;
3034
use std::mem;
3135
use std::ops::Range;
@@ -1018,6 +1022,17 @@ impl ArrayData {
10181022
}
10191023
Ok(())
10201024
}
1025+
DataType::Decimal256(p, _) => {
1026+
let values = self.buffers()[0].as_slice();
1027+
for pos in 0..self.len() {
1028+
let offset = pos * 32;
1029+
let raw_bytes = &values[offset..offset + 32];
1030+
let integer = BigInt::from_signed_bytes_le(raw_bytes);
1031+
let value_str = integer.to_string();
1032+
validate_decimal256_precision(&value_str, *p)?;
1033+
}
1034+
Ok(())
1035+
}
10211036
DataType::Utf8 => self.validate_utf8::<i32>(),
10221037
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
10231038
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),

arrow/src/datatypes/datatype.rs

Lines changed: 143 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use num::{BigInt, Num, ToPrimitive};
1819
use std::fmt;
1920

2021
use serde_derive::{Deserialize, Serialize};
@@ -300,6 +301,49 @@ pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
300301
99999999999999999999999999999999999999,
301302
];
302303

304+
/// `MAX_DECIMAL_FOR_LARGER_PRECISION[p]` holds the maximum integer value
305+
/// that can be stored in [DataType::Decimal256] value of precision `p` > 38
306+
pub const MAX_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
307+
"99999999999999999999999999999999999999",
308+
"999999999999999999999999999999999999999",
309+
"9999999999999999999999999999999999999999",
310+
"99999999999999999999999999999999999999999",
311+
"999999999999999999999999999999999999999999",
312+
"9999999999999999999999999999999999999999999",
313+
"99999999999999999999999999999999999999999999",
314+
"999999999999999999999999999999999999999999999",
315+
"9999999999999999999999999999999999999999999999",
316+
"99999999999999999999999999999999999999999999999",
317+
"999999999999999999999999999999999999999999999999",
318+
"9999999999999999999999999999999999999999999999999",
319+
"99999999999999999999999999999999999999999999999999",
320+
"999999999999999999999999999999999999999999999999999",
321+
"9999999999999999999999999999999999999999999999999999",
322+
"99999999999999999999999999999999999999999999999999999",
323+
"999999999999999999999999999999999999999999999999999999",
324+
"9999999999999999999999999999999999999999999999999999999",
325+
"99999999999999999999999999999999999999999999999999999999",
326+
"999999999999999999999999999999999999999999999999999999999",
327+
"9999999999999999999999999999999999999999999999999999999999",
328+
"99999999999999999999999999999999999999999999999999999999999",
329+
"999999999999999999999999999999999999999999999999999999999999",
330+
"9999999999999999999999999999999999999999999999999999999999999",
331+
"99999999999999999999999999999999999999999999999999999999999999",
332+
"999999999999999999999999999999999999999999999999999999999999999",
333+
"9999999999999999999999999999999999999999999999999999999999999999",
334+
"99999999999999999999999999999999999999999999999999999999999999999",
335+
"999999999999999999999999999999999999999999999999999999999999999999",
336+
"9999999999999999999999999999999999999999999999999999999999999999999",
337+
"99999999999999999999999999999999999999999999999999999999999999999999",
338+
"999999999999999999999999999999999999999999999999999999999999999999999",
339+
"9999999999999999999999999999999999999999999999999999999999999999999999",
340+
"99999999999999999999999999999999999999999999999999999999999999999999999",
341+
"999999999999999999999999999999999999999999999999999999999999999999999999",
342+
"9999999999999999999999999999999999999999999999999999999999999999999999999",
343+
"99999999999999999999999999999999999999999999999999999999999999999999999999",
344+
"999999999999999999999999999999999999999999999999999999999999999999999999999",
345+
];
346+
303347
/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value
304348
/// that can be stored in a [DataType::Decimal] value of precision `p`
305349
pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
@@ -343,13 +387,62 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
343387
-99999999999999999999999999999999999999,
344388
];
345389

390+
/// `MIN_DECIMAL_FOR_LARGER_PRECISION[p]` holds the minimum integer value
391+
/// that can be stored in a [DataType::Decimal256] value of precision `p` > 38
392+
pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
393+
"-99999999999999999999999999999999999999",
394+
"-999999999999999999999999999999999999999",
395+
"-9999999999999999999999999999999999999999",
396+
"-99999999999999999999999999999999999999999",
397+
"-999999999999999999999999999999999999999999",
398+
"-9999999999999999999999999999999999999999999",
399+
"-99999999999999999999999999999999999999999999",
400+
"-999999999999999999999999999999999999999999999",
401+
"-9999999999999999999999999999999999999999999999",
402+
"-99999999999999999999999999999999999999999999999",
403+
"-999999999999999999999999999999999999999999999999",
404+
"-9999999999999999999999999999999999999999999999999",
405+
"-99999999999999999999999999999999999999999999999999",
406+
"-999999999999999999999999999999999999999999999999999",
407+
"-9999999999999999999999999999999999999999999999999999",
408+
"-99999999999999999999999999999999999999999999999999999",
409+
"-999999999999999999999999999999999999999999999999999999",
410+
"-9999999999999999999999999999999999999999999999999999999",
411+
"-99999999999999999999999999999999999999999999999999999999",
412+
"-999999999999999999999999999999999999999999999999999999999",
413+
"-9999999999999999999999999999999999999999999999999999999999",
414+
"-99999999999999999999999999999999999999999999999999999999999",
415+
"-999999999999999999999999999999999999999999999999999999999999",
416+
"-9999999999999999999999999999999999999999999999999999999999999",
417+
"-99999999999999999999999999999999999999999999999999999999999999",
418+
"-999999999999999999999999999999999999999999999999999999999999999",
419+
"-9999999999999999999999999999999999999999999999999999999999999999",
420+
"-99999999999999999999999999999999999999999999999999999999999999999",
421+
"-999999999999999999999999999999999999999999999999999999999999999999",
422+
"-9999999999999999999999999999999999999999999999999999999999999999999",
423+
"-99999999999999999999999999999999999999999999999999999999999999999999",
424+
"-999999999999999999999999999999999999999999999999999999999999999999999",
425+
"-9999999999999999999999999999999999999999999999999999999999999999999999",
426+
"-99999999999999999999999999999999999999999999999999999999999999999999999",
427+
"-999999999999999999999999999999999999999999999999999999999999999999999999",
428+
"-9999999999999999999999999999999999999999999999999999999999999999999999999",
429+
"-99999999999999999999999999999999999999999999999999999999999999999999999999",
430+
"-999999999999999999999999999999999999999999999999999999999999999999999999999",
431+
];
432+
346433
/// The maximum precision for [DataType::Decimal] values
347434
pub const DECIMAL_MAX_PRECISION: usize = 38;
348435

349436
/// The maximum scale for [DataType::Decimal] values
350437
pub const DECIMAL_MAX_SCALE: usize = 38;
351438

352-
/// The default scale for [DataType::Decimal] values
439+
/// The maximum precision for [DataType::Decimal256] values
440+
pub const DECIMAL256_MAX_PRECISION: usize = 76;
441+
442+
/// The maximum scale for [DataType::Decimal256] values
443+
pub const DECIMAL256_MAX_SCALE: usize = 76;
444+
445+
/// The default scale for [DataType::Decimal] and [DataType::Decimal256] values
353446
pub const DECIMAL_DEFAULT_SCALE: usize = 10;
354447

355448
/// Validates that the specified `i128` value can be properly
@@ -379,6 +472,55 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul
379472
}
380473
}
381474

475+
/// Validates that the specified string value can be properly
476+
/// interpreted as a Decimal256 number with precision `precision`
477+
#[inline]
478+
pub(crate) fn validate_decimal256_precision(
479+
value: &str,
480+
precision: usize,
481+
) -> Result<BigInt> {
482+
if precision > 38 {
483+
let max_str = MAX_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
484+
let min_str = MIN_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
485+
486+
let max = BigInt::from_str_radix(max_str, 10).unwrap();
487+
let min = BigInt::from_str_radix(min_str, 10).unwrap();
488+
489+
let value = BigInt::from_str_radix(value, 10).unwrap();
490+
if value > max {
491+
Err(ArrowError::InvalidArgumentError(format!(
492+
"{} is too large to store in a Decimal256 of precision {}. Max is {}",
493+
value, precision, max
494+
)))
495+
} else if value < min {
496+
Err(ArrowError::InvalidArgumentError(format!(
497+
"{} is too small to store in a Decimal256 of precision {}. Min is {}",
498+
value, precision, min
499+
)))
500+
} else {
501+
Ok(value)
502+
}
503+
} else {
504+
let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1];
505+
let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1];
506+
let value = BigInt::from_str_radix(value, 10).unwrap();
507+
508+
if value.to_i128().unwrap() > max {
509+
Err(ArrowError::InvalidArgumentError(format!(
510+
"{} is too large to store in a Decimal256 of precision {}. Max is {}",
511+
value, precision, max
512+
)))
513+
} else if value.to_i128().unwrap() < min {
514+
Err(ArrowError::InvalidArgumentError(format!(
515+
"{} is too small to store in a Decimal256 of precision {}. Min is {}",
516+
value, precision, min
517+
)))
518+
} else {
519+
Ok(value)
520+
}
521+
}
522+
}
523+
382524
impl DataType {
383525
/// Parse a data type from a JSON representation.
384526
pub(crate) fn from(json: &Value) -> Result<DataType> {

0 commit comments

Comments
 (0)