2020use crate :: VariantArray ;
2121use arrow:: array:: { ArrayRef , BinaryViewArray , BinaryViewBuilder , NullBufferBuilder , StructArray } ;
2222use arrow_schema:: { DataType , Field , Fields } ;
23- use parquet_variant:: { Variant , VariantBuilder } ;
23+ use parquet_variant:: { ListBuilder , ObjectBuilder , Variant , VariantBuilder , VariantBuilderExt } ;
2424use std:: sync:: Arc ;
2525
2626/// A builder for [`VariantArray`]
@@ -37,13 +37,13 @@ use std::sync::Arc;
3737/// ## Example:
3838/// ```
3939/// # use arrow::array::Array;
40- /// # use parquet_variant::{Variant, VariantBuilder};
40+ /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt };
4141/// # use parquet_variant_compute::VariantArrayBuilder;
4242/// // Create a new VariantArrayBuilder with a capacity of 100 rows
4343/// let mut builder = VariantArrayBuilder::new(100);
4444/// // append variant values
4545/// builder.append_variant(Variant::from(42));
46- /// // append a null row
46+ /// // append a null row (note not a Variant::Null)
4747/// builder.append_null();
4848/// // append a pre-constructed metadata and value buffers
4949/// let (metadata, value) = {
@@ -55,9 +55,14 @@ use std::sync::Arc;
5555/// };
5656/// builder.append_variant_buffers(&metadata, &value);
5757///
58+ /// // Use `variant_builder` method to write values directly to the output array
59+ /// let mut vb = builder.variant_builder();
60+ /// vb.append_value("Hello, World!");
61+ /// vb.finish(); // Note: call finish to write the variant to the buffers
62+ ///
5863/// // create the final VariantArray
5964/// let variant_array = builder.build();
60- /// assert_eq!(variant_array.len(), 3 );
65+ /// assert_eq!(variant_array.len(), 4 );
6166/// // // Access the values
6267/// // row 1 is not null and is an integer
6368/// assert!(!variant_array.is_null(0));
@@ -67,6 +72,9 @@ use std::sync::Arc;
6772/// // row 2 is not null and is an object
6873/// assert!(!variant_array.is_null(2));
6974/// assert!(variant_array.value(2).as_object().is_some());
75+ /// // row 3 is a string
76+ /// assert!(!variant_array.is_null(3));
77+ /// assert_eq!(variant_array.value(3), Variant::from("Hello, World!"));
7078/// ```
7179#[ derive( Debug ) ]
7280pub struct VariantArrayBuilder {
@@ -147,11 +155,9 @@ impl VariantArrayBuilder {
147155
148156 /// Append the [`Variant`] to the builder as the next row
149157 pub fn append_variant ( & mut self , variant : Variant ) {
150- // TODO make this more efficient by avoiding the intermediate buffers
151- let mut variant_builder = VariantBuilder :: new ( ) ;
152- variant_builder. append_value ( variant) ;
153- let ( metadata, value) = variant_builder. finish ( ) ;
154- self . append_variant_buffers ( & metadata, & value) ;
158+ let mut direct_builder = self . variant_builder ( ) ;
159+ direct_builder. variant_builder . append_value ( variant) ;
160+ direct_builder. finish ( )
155161 }
156162
157163 /// Append a metadata and values buffer to the builder
@@ -168,7 +174,169 @@ impl VariantArrayBuilder {
168174 self . value_buffer . extend_from_slice ( value) ;
169175 }
170176
171- // TODO: Return a Variant builder that will write to the underlying buffers (TODO)
177+ /// Return a `VariantArrayVariantBuilder` that writes directly to the
178+ /// buffers of this builder.
179+ ///
180+ /// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder
181+ ///
182+ /// # Example
183+ /// ```
184+ /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
185+ /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder};
186+ /// let mut array_builder = VariantArrayBuilder::new(10);
187+ ///
188+ /// // First row has a string
189+ /// let mut variant_builder = array_builder.variant_builder();
190+ /// variant_builder.append_value("Hello, World!");
191+ /// // must call finish to write the variant to the buffers
192+ /// variant_builder.finish();
193+ ///
194+ /// // Second row is an object
195+ /// let mut variant_builder = array_builder.variant_builder();
196+ /// let mut obj = variant_builder.new_object();
197+ /// obj.insert("my_field", 42i64);
198+ /// obj.finish().unwrap();
199+ /// variant_builder.finish();
200+ ///
201+ /// // finalize the array
202+ /// let variant_array: VariantArray = array_builder.build();
203+ ///
204+ /// // verify what we wrote is still there
205+ /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!"));
206+ /// assert!(variant_array.value(1).as_object().is_some());
207+ /// ```
208+ pub fn variant_builder ( & mut self ) -> VariantArrayVariantBuilder {
209+ // append directly into the metadata and value buffers
210+ let metadata_buffer = std:: mem:: take ( & mut self . metadata_buffer ) ;
211+ let value_buffer = std:: mem:: take ( & mut self . value_buffer ) ;
212+ VariantArrayVariantBuilder :: new ( self , metadata_buffer, value_buffer)
213+ }
214+ }
215+
216+ /// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`.
217+ ///
218+ /// Note this struct implements [`VariantBuilderExt`], so it can be used
219+ /// as a drop-in replacement for [`VariantBuilder`] in most cases.
220+ ///
221+ /// If [`Self::finish`] is not called, any changes will be rolled back
222+ ///
223+ /// See [`VariantArrayBuilder::variant_builder`] for an example
224+ pub struct VariantArrayVariantBuilder < ' a > {
225+ /// was finish called?
226+ finished : bool ,
227+ /// starting offset in the variant_builder's `metadata` buffer
228+ metadata_offset : usize ,
229+ /// starting offset in the variant_builder's `value` buffer
230+ value_offset : usize ,
231+ /// Parent array builder that this variant builder writes to. Buffers
232+ /// have been moved into the variant builder, and must be returned on
233+ /// drop
234+ array_builder : & ' a mut VariantArrayBuilder ,
235+ /// Builder for the in progress variant value, temporarily owns the buffers
236+ /// from `array_builder`
237+ variant_builder : VariantBuilder ,
238+ }
239+
240+ impl < ' a > VariantBuilderExt for VariantArrayVariantBuilder < ' a > {
241+ fn append_value < ' m , ' v > ( & mut self , value : impl Into < Variant < ' m , ' v > > ) {
242+ self . variant_builder . append_value ( value) ;
243+ }
244+
245+ fn new_list ( & mut self ) -> ListBuilder {
246+ self . variant_builder . new_list ( )
247+ }
248+
249+ fn new_object ( & mut self ) -> ObjectBuilder {
250+ self . variant_builder . new_object ( )
251+ }
252+ }
253+
254+ impl < ' a > VariantArrayVariantBuilder < ' a > {
255+ /// Constructs a new VariantArrayVariantBuilder
256+ ///
257+ /// Note this is not public as this is a structure that is logically
258+ /// part of the [`VariantArrayBuilder`] and relies on its internal structure
259+ fn new (
260+ array_builder : & ' a mut VariantArrayBuilder ,
261+ metadata_buffer : Vec < u8 > ,
262+ value_buffer : Vec < u8 > ,
263+ ) -> Self {
264+ let metadata_offset = metadata_buffer. len ( ) ;
265+ let value_offset = value_buffer. len ( ) ;
266+ VariantArrayVariantBuilder {
267+ finished : false ,
268+ metadata_offset,
269+ value_offset,
270+ variant_builder : VariantBuilder :: new_with_buffers ( metadata_buffer, value_buffer) ,
271+ array_builder,
272+ }
273+ }
274+
275+ /// Return a reference to the underlying `VariantBuilder`
276+ pub fn inner ( & self ) -> & VariantBuilder {
277+ & self . variant_builder
278+ }
279+
280+ /// Return a mutable reference to the underlying `VariantBuilder`
281+ pub fn inner_mut ( & mut self ) -> & mut VariantBuilder {
282+ & mut self . variant_builder
283+ }
284+
285+ /// Called to finish the in progress variant and write it to the underlying
286+ /// buffers
287+ ///
288+ /// Note if you do not call finish, on drop any changes made to the
289+ /// underlying buffers will be rolled back.
290+ pub fn finish ( mut self ) {
291+ self . finished = true ;
292+ // Note: buffers are returned and replaced in the drop impl
293+ }
294+ }
295+
296+ impl < ' a > Drop for VariantArrayVariantBuilder < ' a > {
297+ /// If the builder was not finished, roll back any changes made to the
298+ /// underlying buffers (by truncating them)
299+ fn drop ( & mut self ) {
300+ let metadata_offset = self . metadata_offset ;
301+ let value_offset = self . value_offset ;
302+
303+ // get the buffers back from the variant builder
304+ let ( mut metadata_buffer, mut value_buffer) =
305+ std:: mem:: take ( & mut self . variant_builder ) . finish ( ) ;
306+
307+ // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost)
308+ let metadata_len = metadata_buffer
309+ . len ( )
310+ . checked_sub ( metadata_offset)
311+ . expect ( "metadata length decreased unexpectedly" ) ;
312+ let value_len = value_buffer
313+ . len ( )
314+ . checked_sub ( value_offset)
315+ . expect ( "value length decreased unexpectedly" ) ;
316+
317+ if self . finished {
318+ // if the object was finished, commit the changes by putting the
319+ // offsets and lengths into the parent array builder.
320+ self . array_builder
321+ . metadata_locations
322+ . push ( ( metadata_offset, metadata_len) ) ;
323+ self . array_builder
324+ . value_locations
325+ . push ( ( value_offset, value_len) ) ;
326+ self . array_builder . nulls . append_non_null ( ) ;
327+ } else {
328+ // if the object was not finished, truncate the buffers to the
329+ // original offsets to roll back any changes. Note this is fast
330+ // because truncate doesn't free any memory: it just has to drop
331+ // elements (and u8 doesn't have a destructor)
332+ metadata_buffer. truncate ( metadata_offset) ;
333+ value_buffer. truncate ( value_offset) ;
334+ }
335+
336+ // put the buffers back into the array builder
337+ self . array_builder . metadata_buffer = metadata_buffer;
338+ self . array_builder . value_buffer = value_buffer;
339+ }
172340}
173341
174342fn binary_view_array_from_buffers (
@@ -220,4 +388,82 @@ mod test {
220388 ) ;
221389 }
222390 }
391+
392+ /// Test using sub builders to append variants
393+ #[ test]
394+ fn test_variant_array_builder_variant_builder ( ) {
395+ let mut builder = VariantArrayBuilder :: new ( 10 ) ;
396+ builder. append_null ( ) ; // should not panic
397+ builder. append_variant ( Variant :: from ( 42i32 ) ) ;
398+
399+ // let's make a sub-object in the next row
400+ let mut sub_builder = builder. variant_builder ( ) ;
401+ let mut obj = sub_builder. new_object ( ) ;
402+ obj. insert ( "foo" , "bar" ) ;
403+ obj. finish ( ) . unwrap ( ) ;
404+ sub_builder. finish ( ) ; // must call finish to write the variant to the buffers
405+
406+ // append a new list
407+ let mut sub_builder = builder. variant_builder ( ) ;
408+ let mut list_builder = sub_builder. new_list ( ) ;
409+ list_builder. append_value ( Variant :: from ( 1i32 ) ) ;
410+ list_builder. append_value ( Variant :: from ( 2i32 ) ) ;
411+ list_builder. finish ( ) ;
412+ sub_builder. finish ( ) ;
413+ let variant_array = builder. build ( ) ;
414+
415+ assert_eq ! ( variant_array. len( ) , 4 ) ;
416+ assert ! ( variant_array. is_null( 0 ) ) ;
417+ assert ! ( !variant_array. is_null( 1 ) ) ;
418+ assert_eq ! ( variant_array. value( 1 ) , Variant :: from( 42i32 ) ) ;
419+ assert ! ( !variant_array. is_null( 2 ) ) ;
420+ let variant = variant_array. value ( 2 ) ;
421+ let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
422+ assert_eq ! ( variant. get( "foo" ) . unwrap( ) , Variant :: from( "bar" ) ) ;
423+ assert ! ( !variant_array. is_null( 3 ) ) ;
424+ let variant = variant_array. value ( 3 ) ;
425+ let list = variant. as_list ( ) . expect ( "variant to be a list" ) ;
426+ assert_eq ! ( list. len( ) , 2 ) ;
427+ }
428+
429+ /// Test using non-finished sub builders to append variants
430+ #[ test]
431+ fn test_variant_array_builder_variant_builder_reset ( ) {
432+ let mut builder = VariantArrayBuilder :: new ( 10 ) ;
433+
434+ // make a sub-object in the first row
435+ let mut sub_builder = builder. variant_builder ( ) ;
436+ let mut obj = sub_builder. new_object ( ) ;
437+ obj. insert ( "foo" , 1i32 ) ;
438+ obj. finish ( ) . unwrap ( ) ;
439+ sub_builder. finish ( ) ; // must call finish to write the variant to the buffers
440+
441+ // start appending an object but don't finish
442+ let mut sub_builder = builder. variant_builder ( ) ;
443+ let mut obj = sub_builder. new_object ( ) ;
444+ obj. insert ( "bar" , 2i32 ) ;
445+ obj. finish ( ) . unwrap ( ) ;
446+ drop ( sub_builder) ; // drop the sub builder without finishing it
447+
448+ // make a third sub-object (this should reset the previous unfinished object)
449+ let mut sub_builder = builder. variant_builder ( ) ;
450+ let mut obj = sub_builder. new_object ( ) ;
451+ obj. insert ( "baz" , 3i32 ) ;
452+ obj. finish ( ) . unwrap ( ) ;
453+ sub_builder. finish ( ) ; // must call finish to write the variant to the buffers
454+
455+ let variant_array = builder. build ( ) ;
456+
457+ // only the two finished objects should be present
458+ assert_eq ! ( variant_array. len( ) , 2 ) ;
459+ assert ! ( !variant_array. is_null( 0 ) ) ;
460+ let variant = variant_array. value ( 0 ) ;
461+ let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
462+ assert_eq ! ( variant. get( "foo" ) . unwrap( ) , Variant :: from( 1i32 ) ) ;
463+
464+ assert ! ( !variant_array. is_null( 1 ) ) ;
465+ let variant = variant_array. value ( 1 ) ;
466+ let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
467+ assert_eq ! ( variant. get( "baz" ) . unwrap( ) , Variant :: from( 3i32 ) ) ;
468+ }
223469}
0 commit comments