20
20
use crate :: VariantArray ;
21
21
use arrow:: array:: { ArrayRef , BinaryViewArray , BinaryViewBuilder , NullBufferBuilder , StructArray } ;
22
22
use arrow_schema:: { DataType , Field , Fields } ;
23
- use parquet_variant:: { Variant , VariantBuilder } ;
23
+ use parquet_variant:: { ListBuilder , ObjectBuilder , Variant , VariantBuilder , VariantBuilderExt } ;
24
24
use std:: sync:: Arc ;
25
25
26
26
/// A builder for [`VariantArray`]
@@ -37,13 +37,13 @@ use std::sync::Arc;
37
37
/// ## Example:
38
38
/// ```
39
39
/// # use arrow::array::Array;
40
- /// # use parquet_variant::{Variant, VariantBuilder};
40
+ /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt };
41
41
/// # use parquet_variant_compute::VariantArrayBuilder;
42
42
/// // Create a new VariantArrayBuilder with a capacity of 100 rows
43
43
/// let mut builder = VariantArrayBuilder::new(100);
44
44
/// // append variant values
45
45
/// builder.append_variant(Variant::from(42));
46
- /// // append a null row
46
+ /// // append a null row (note not a Variant::Null)
47
47
/// builder.append_null();
48
48
/// // append a pre-constructed metadata and value buffers
49
49
/// let (metadata, value) = {
@@ -55,9 +55,14 @@ use std::sync::Arc;
55
55
/// };
56
56
/// builder.append_variant_buffers(&metadata, &value);
57
57
///
58
+ /// // Use `variant_builder` method to write values directly to the output array
59
+ /// let mut vb = builder.variant_builder();
60
+ /// vb.append_value("Hello, World!");
61
+ /// vb.finish(); // Note: call finish to write the variant to the buffers
62
+ ///
58
63
/// // create the final VariantArray
59
64
/// let variant_array = builder.build();
60
- /// assert_eq!(variant_array.len(), 3 );
65
+ /// assert_eq!(variant_array.len(), 4 );
61
66
/// // // Access the values
62
67
/// // row 1 is not null and is an integer
63
68
/// assert!(!variant_array.is_null(0));
@@ -67,6 +72,9 @@ use std::sync::Arc;
67
72
/// // row 2 is not null and is an object
68
73
/// assert!(!variant_array.is_null(2));
69
74
/// assert!(variant_array.value(2).as_object().is_some());
75
+ /// // row 3 is a string
76
+ /// assert!(!variant_array.is_null(3));
77
+ /// assert_eq!(variant_array.value(3), Variant::from("Hello, World!"));
70
78
/// ```
71
79
#[ derive( Debug ) ]
72
80
pub struct VariantArrayBuilder {
@@ -147,11 +155,9 @@ impl VariantArrayBuilder {
147
155
148
156
/// Append the [`Variant`] to the builder as the next row
149
157
pub fn append_variant ( & mut self , variant : Variant ) {
150
- // TODO make this more efficient by avoiding the intermediate buffers
151
- let mut variant_builder = VariantBuilder :: new ( ) ;
152
- variant_builder. append_value ( variant) ;
153
- let ( metadata, value) = variant_builder. finish ( ) ;
154
- self . append_variant_buffers ( & metadata, & value) ;
158
+ let mut direct_builder = self . variant_builder ( ) ;
159
+ direct_builder. variant_builder . append_value ( variant) ;
160
+ direct_builder. finish ( )
155
161
}
156
162
157
163
/// Append a metadata and values buffer to the builder
@@ -168,7 +174,171 @@ impl VariantArrayBuilder {
168
174
self . value_buffer . extend_from_slice ( value) ;
169
175
}
170
176
171
- // TODO: Return a Variant builder that will write to the underlying buffers (TODO)
177
+ /// Return a `VariantArrayVariantBuilder` that writes directly to the
178
+ /// buffers of this builder.
179
+ ///
180
+ /// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder
181
+ ///
182
+ /// # Example
183
+ /// ```
184
+ /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
185
+ /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder};
186
+ /// let mut array_builder = VariantArrayBuilder::new(10);
187
+ ///
188
+ /// // First row has a string
189
+ /// let mut variant_builder = array_builder.variant_builder();
190
+ /// variant_builder.append_value("Hello, World!");
191
+ /// // must call finish to write the variant to the buffers
192
+ /// variant_builder.finish();
193
+ ///
194
+ /// // Second row is an object
195
+ /// let mut variant_builder = array_builder.variant_builder();
196
+ /// variant_builder
197
+ /// .new_object()
198
+ /// .with_field("my_field", 42i64)
199
+ /// .finish()
200
+ /// .unwrap();
201
+ /// variant_builder.finish();
202
+ ///
203
+ /// // finalize the array
204
+ /// let variant_array: VariantArray = array_builder.build();
205
+ ///
206
+ /// // verify what we wrote is still there
207
+ /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!"));
208
+ /// assert!(variant_array.value(1).as_object().is_some());
209
+ /// ```
210
+ pub fn variant_builder ( & mut self ) -> VariantArrayVariantBuilder {
211
+ // append directly into the metadata and value buffers
212
+ let metadata_buffer = std:: mem:: take ( & mut self . metadata_buffer ) ;
213
+ let value_buffer = std:: mem:: take ( & mut self . value_buffer ) ;
214
+ VariantArrayVariantBuilder :: new ( self , metadata_buffer, value_buffer)
215
+ }
216
+ }
217
+
218
+ /// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`.
219
+ ///
220
+ /// Note this struct implements [`VariantBuilderExt`], so it can be used
221
+ /// as a drop-in replacement for [`VariantBuilder`] in most cases.
222
+ ///
223
+ /// If [`Self::finish`] is not called, any changes will be rolled back
224
+ ///
225
+ /// See [`VariantArrayBuilder::variant_builder`] for an example
226
+ pub struct VariantArrayVariantBuilder < ' a > {
227
+ /// was finish called?
228
+ finished : bool ,
229
+ /// starting offset in the variant_builder's `metadata` buffer
230
+ metadata_offset : usize ,
231
+ /// starting offset in the variant_builder's `value` buffer
232
+ value_offset : usize ,
233
+ /// Parent array builder that this variant builder writes to. Buffers
234
+ /// have been moved into the variant builder, and must be returned on
235
+ /// drop
236
+ array_builder : & ' a mut VariantArrayBuilder ,
237
+ /// Builder for the in progress variant value, temporarily owns the buffers
238
+ /// from `array_builder`
239
+ variant_builder : VariantBuilder ,
240
+ }
241
+
242
+ impl < ' a > VariantBuilderExt for VariantArrayVariantBuilder < ' a > {
243
+ fn append_value < ' m , ' v > ( & mut self , value : impl Into < Variant < ' m , ' v > > ) {
244
+ self . variant_builder . append_value ( value) ;
245
+ }
246
+
247
+ fn new_list ( & mut self ) -> ListBuilder {
248
+ self . variant_builder . new_list ( )
249
+ }
250
+
251
+ fn new_object ( & mut self ) -> ObjectBuilder {
252
+ self . variant_builder . new_object ( )
253
+ }
254
+ }
255
+
256
+ impl < ' a > VariantArrayVariantBuilder < ' a > {
257
+ /// Constructs a new VariantArrayVariantBuilder
258
+ ///
259
+ /// Note this is not public as this is a structure that is logically
260
+ /// part of the [`VariantArrayBuilder`] and relies on its internal structure
261
+ fn new (
262
+ array_builder : & ' a mut VariantArrayBuilder ,
263
+ metadata_buffer : Vec < u8 > ,
264
+ value_buffer : Vec < u8 > ,
265
+ ) -> Self {
266
+ let metadata_offset = metadata_buffer. len ( ) ;
267
+ let value_offset = value_buffer. len ( ) ;
268
+ VariantArrayVariantBuilder {
269
+ finished : false ,
270
+ metadata_offset,
271
+ value_offset,
272
+ variant_builder : VariantBuilder :: new_with_buffers ( metadata_buffer, value_buffer) ,
273
+ array_builder,
274
+ }
275
+ }
276
+
277
+ /// Return a reference to the underlying `VariantBuilder`
278
+ pub fn inner ( & self ) -> & VariantBuilder {
279
+ & self . variant_builder
280
+ }
281
+
282
+ /// Return a mutable reference to the underlying `VariantBuilder`
283
+ pub fn inner_mut ( & mut self ) -> & mut VariantBuilder {
284
+ & mut self . variant_builder
285
+ }
286
+
287
+ /// Called to finish the in progress variant and write it to the underlying
288
+ /// buffers
289
+ ///
290
+ /// Note if you do not call finish, on drop any changes made to the
291
+ /// underlying buffers will be rolled back.
292
+ pub fn finish ( mut self ) {
293
+ self . finished = true ;
294
+ // Note: buffers are returned and replaced in the drop impl
295
+ }
296
+ }
297
+
298
+ impl < ' a > Drop for VariantArrayVariantBuilder < ' a > {
299
+ /// If the builder was not finished, roll back any changes made to the
300
+ /// underlying buffers (by truncating them)
301
+ fn drop ( & mut self ) {
302
+ let metadata_offset = self . metadata_offset ;
303
+ let value_offset = self . value_offset ;
304
+
305
+ // get the buffers back from the variant builder
306
+ let ( mut metadata_buffer, mut value_buffer) =
307
+ std:: mem:: take ( & mut self . variant_builder ) . finish ( ) ;
308
+
309
+ // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost)
310
+ let metadata_len = metadata_buffer
311
+ . len ( )
312
+ . checked_sub ( metadata_offset)
313
+ . expect ( "metadata length decreased unexpectedly" ) ;
314
+ let value_len = value_buffer
315
+ . len ( )
316
+ . checked_sub ( value_offset)
317
+ . expect ( "value length decreased unexpectedly" ) ;
318
+
319
+ if self . finished {
320
+ // if the object was finished, commit the changes by putting the
321
+ // offsets and lengths into the parent array builder.
322
+ self . array_builder
323
+ . metadata_locations
324
+ . push ( ( metadata_offset, metadata_len) ) ;
325
+ self . array_builder
326
+ . value_locations
327
+ . push ( ( value_offset, value_len) ) ;
328
+ self . array_builder . nulls . append_non_null ( ) ;
329
+ } else {
330
+ // if the object was not finished, truncate the buffers to the
331
+ // original offsets to roll back any changes. Note this is fast
332
+ // because truncate doesn't free any memory: it just has to drop
333
+ // elements (and u8 doesn't have a destructor)
334
+ metadata_buffer. truncate ( metadata_offset) ;
335
+ value_buffer. truncate ( value_offset) ;
336
+ }
337
+
338
+ // put the buffers back into the array builder
339
+ self . array_builder . metadata_buffer = metadata_buffer;
340
+ self . array_builder . value_buffer = value_buffer;
341
+ }
172
342
}
173
343
174
344
fn binary_view_array_from_buffers (
@@ -220,4 +390,91 @@ mod test {
220
390
) ;
221
391
}
222
392
}
393
+
394
+ /// Test using sub builders to append variants
395
+ #[ test]
396
+ fn test_variant_array_builder_variant_builder ( ) {
397
+ let mut builder = VariantArrayBuilder :: new ( 10 ) ;
398
+ builder. append_null ( ) ; // should not panic
399
+ builder. append_variant ( Variant :: from ( 42i32 ) ) ;
400
+
401
+ // let's make a sub-object in the next row
402
+ let mut sub_builder = builder. variant_builder ( ) ;
403
+ sub_builder
404
+ . new_object ( )
405
+ . with_field ( "foo" , "bar" )
406
+ . finish ( )
407
+ . unwrap ( ) ;
408
+ sub_builder. finish ( ) ; // must call finish to write the variant to the buffers
409
+
410
+ // append a new list
411
+ let mut sub_builder = builder. variant_builder ( ) ;
412
+ sub_builder
413
+ . new_list ( )
414
+ . with_value ( Variant :: from ( 1i32 ) )
415
+ . with_value ( Variant :: from ( 2i32 ) )
416
+ . finish ( ) ;
417
+ sub_builder. finish ( ) ;
418
+ let variant_array = builder. build ( ) ;
419
+
420
+ assert_eq ! ( variant_array. len( ) , 4 ) ;
421
+ assert ! ( variant_array. is_null( 0 ) ) ;
422
+ assert ! ( !variant_array. is_null( 1 ) ) ;
423
+ assert_eq ! ( variant_array. value( 1 ) , Variant :: from( 42i32 ) ) ;
424
+ assert ! ( !variant_array. is_null( 2 ) ) ;
425
+ let variant = variant_array. value ( 2 ) ;
426
+ let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
427
+ assert_eq ! ( variant. get( "foo" ) . unwrap( ) , Variant :: from( "bar" ) ) ;
428
+ assert ! ( !variant_array. is_null( 3 ) ) ;
429
+ let variant = variant_array. value ( 3 ) ;
430
+ let list = variant. as_list ( ) . expect ( "variant to be a list" ) ;
431
+ assert_eq ! ( list. len( ) , 2 ) ;
432
+ }
433
+
434
+ /// Test using non-finished sub builders to append variants
435
+ #[ test]
436
+ fn test_variant_array_builder_variant_builder_reset ( ) {
437
+ let mut builder = VariantArrayBuilder :: new ( 10 ) ;
438
+
439
+ // make a sub-object in the first row
440
+ let mut sub_builder = builder. variant_builder ( ) ;
441
+ sub_builder
442
+ . new_object ( )
443
+ . with_field ( "foo" , 1i32 )
444
+ . finish ( )
445
+ . unwrap ( ) ;
446
+ sub_builder. finish ( ) ; // must call finish to write the variant to the buffers
447
+
448
+ // start appending an object but don't finish
449
+ let mut sub_builder = builder. variant_builder ( ) ;
450
+ sub_builder
451
+ . new_object ( )
452
+ . with_field ( "bar" , 2i32 )
453
+ . finish ( )
454
+ . unwrap ( ) ;
455
+ drop ( sub_builder) ; // drop the sub builder without finishing it
456
+
457
+ // make a third sub-object (this should reset the previous unfinished object)
458
+ let mut sub_builder = builder. variant_builder ( ) ;
459
+ sub_builder
460
+ . new_object ( )
461
+ . with_field ( "baz" , 3i32 )
462
+ . finish ( )
463
+ . unwrap ( ) ;
464
+ sub_builder. finish ( ) ; // must call finish to write the variant to the buffers
465
+
466
+ let variant_array = builder. build ( ) ;
467
+
468
+ // only the two finished objects should be present
469
+ assert_eq ! ( variant_array. len( ) , 2 ) ;
470
+ assert ! ( !variant_array. is_null( 0 ) ) ;
471
+ let variant = variant_array. value ( 0 ) ;
472
+ let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
473
+ assert_eq ! ( variant. get( "foo" ) . unwrap( ) , Variant :: from( 1i32 ) ) ;
474
+
475
+ assert ! ( !variant_array. is_null( 1 ) ) ;
476
+ let variant = variant_array. value ( 1 ) ;
477
+ let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
478
+ assert_eq ! ( variant. get( "baz" ) . unwrap( ) , Variant :: from( 3i32 ) ) ;
479
+ }
223
480
}
0 commit comments