Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3527,9 +3527,36 @@ impl ScalarValue {
}
}

pub fn copy_array_data(data: &ArrayData) -> ArrayData {
let mut copy = MutableArrayData::new(vec![&data], true, data.len());
copy.extend(0, 0, data.len());
/// Compacts the data of an `ArrayData` into a new `ArrayData`.
///
/// This is useful when you want to minimize the memory footprint of an
/// `ArrayData`. For example, the value returned by [`Array::slice`] still
/// points at the same underlying data buffers as the original array, which may
/// hold many more values. Calling `copy_array_data` on the sliced array will
/// create a new, smaller, `ArrayData` that only contains the data for the
/// sliced array.
///
/// # Example
/// ```
/// # use arrow::array::{make_array, Array, Int32Array};
/// use datafusion_common::scalar::copy_array_data;
/// let array = Int32Array::from_iter_values(0..8192);
/// // Take only the first 2 elements
/// let sliced_array = array.slice(0, 2);
/// // The memory footprint of `sliced_array` is close to 8192 * 4 bytes
/// assert_eq!(32864, sliced_array.get_array_memory_size());
/// // however, we can copy the data to a new `ArrayData`
/// let new_array = make_array(copy_array_data(&sliced_array.into_data()));
/// // The memory footprint of `new_array` is now only 2 * 4 bytes
/// // and overhead:
/// assert_eq!(160, new_array.get_array_memory_size());
/// ```
///
/// See also [`ScalarValue::compact`] which applies to `ScalarValue` instances
/// as necessary.
pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
copy.extend(0, 0, src_data.len());
copy.freeze()
}

Expand Down