@@ -61,9 +61,35 @@ fn write_offset(buf: &mut Vec<u8>, value: usize, nbytes: u8) {
61
61
buf. extend_from_slice ( & bytes[ ..nbytes as usize ] ) ;
62
62
}
63
63
64
- #[ derive( Default ) ]
64
+ /// Wrapper around a `Vec<u8>` that provides methods for appending
65
+ /// primitive values, variant types, and metadata.
66
+ ///
67
+ /// This is used internally by the builders to construct the
68
+ /// the `value` field for [`Variant`] values.
69
+ ///
70
+ /// You can reuse an existing `Vec<u8>` by using the `from` impl
71
+ #[ derive( Debug , Default ) ]
65
72
struct ValueBuffer ( Vec < u8 > ) ;
66
73
74
+ impl ValueBuffer {
75
+ /// Construct a ValueBuffer that will write to a new underlying `Vec`
76
+ fn new ( ) -> Self {
77
+ Default :: default ( )
78
+ }
79
+ }
80
+
81
+ impl From < Vec < u8 > > for ValueBuffer {
82
+ fn from ( value : Vec < u8 > ) -> Self {
83
+ Self ( value)
84
+ }
85
+ }
86
+
87
+ impl From < ValueBuffer > for Vec < u8 > {
88
+ fn from ( value_buffer : ValueBuffer ) -> Self {
89
+ value_buffer. 0
90
+ }
91
+ }
92
+
67
93
impl ValueBuffer {
68
94
fn append_u8 ( & mut self , term : u8 ) {
69
95
self . 0 . push ( term) ;
@@ -82,7 +108,7 @@ impl ValueBuffer {
82
108
}
83
109
84
110
fn into_inner ( self ) -> Vec < u8 > {
85
- self . 0
111
+ self . into ( )
86
112
}
87
113
88
114
fn inner_mut ( & mut self ) -> & mut Vec < u8 > {
@@ -252,13 +278,31 @@ impl ValueBuffer {
252
278
}
253
279
}
254
280
255
- #[ derive( Default ) ]
281
+ /// Builder for constructing metadata for [`Variant`] values.
282
+ ///
283
+ /// This is used internally by the [`VariantBuilder`] to construct the metadata
284
+ ///
285
+ /// You can use an existing `Vec<u8>` as the metadata buffer by using the `from` impl.
286
+ #[ derive( Default , Debug ) ]
256
287
struct MetadataBuilder {
257
288
// Field names -- field_ids are assigned in insert order
258
289
field_names : IndexSet < String > ,
259
290
260
291
// flag that checks if field names by insertion order are also lexicographically sorted
261
292
is_sorted : bool ,
293
+
294
+ /// Output buffer. Metadata is written to the end of this buffer
295
+ metadata_buffer : Vec < u8 > ,
296
+ }
297
+
298
+ /// Create a new MetadataBuilder that will write to the specified metadata buffer
299
+ impl From < Vec < u8 > > for MetadataBuilder {
300
+ fn from ( metadata_buffer : Vec < u8 > ) -> Self {
301
+ Self {
302
+ metadata_buffer,
303
+ ..Default :: default ( )
304
+ }
305
+ }
262
306
}
263
307
264
308
impl MetadataBuilder {
@@ -307,6 +351,12 @@ impl MetadataBuilder {
307
351
// Calculate metadata size
308
352
let total_dict_size: usize = self . metadata_size ( ) ;
309
353
354
+ let Self {
355
+ field_names,
356
+ is_sorted,
357
+ mut metadata_buffer,
358
+ } = self ;
359
+
310
360
// Determine appropriate offset size based on the larger of dict size or total string size
311
361
let max_offset = std:: cmp:: max ( total_dict_size, nkeys) ;
312
362
let offset_size = int_size ( max_offset) ;
@@ -315,29 +365,29 @@ impl MetadataBuilder {
315
365
let string_start = offset_start + ( nkeys + 1 ) * offset_size as usize ;
316
366
let metadata_size = string_start + total_dict_size;
317
367
318
- let mut metadata = Vec :: with_capacity ( metadata_size) ;
368
+ metadata_buffer . reserve ( metadata_size) ;
319
369
320
370
// Write header: version=1, field names are sorted, with calculated offset_size
321
- metadata . push ( 0x01 | ( self . is_sorted as u8 ) << 4 | ( ( offset_size - 1 ) << 6 ) ) ;
371
+ metadata_buffer . push ( 0x01 | ( is_sorted as u8 ) << 4 | ( ( offset_size - 1 ) << 6 ) ) ;
322
372
323
373
// Write dictionary size
324
- write_offset ( & mut metadata , nkeys, offset_size) ;
374
+ write_offset ( & mut metadata_buffer , nkeys, offset_size) ;
325
375
326
376
// Write offsets
327
377
let mut cur_offset = 0 ;
328
- for key in self . field_names . iter ( ) {
329
- write_offset ( & mut metadata , cur_offset, offset_size) ;
378
+ for key in field_names. iter ( ) {
379
+ write_offset ( & mut metadata_buffer , cur_offset, offset_size) ;
330
380
cur_offset += key. len ( ) ;
331
381
}
332
382
// Write final offset
333
- write_offset ( & mut metadata , cur_offset, offset_size) ;
383
+ write_offset ( & mut metadata_buffer , cur_offset, offset_size) ;
334
384
335
385
// Write string data
336
- for key in self . field_names {
337
- metadata . extend_from_slice ( key. as_bytes ( ) ) ;
386
+ for key in field_names {
387
+ metadata_buffer . extend_from_slice ( key. as_bytes ( ) ) ;
338
388
}
339
389
340
- metadata
390
+ metadata_buffer
341
391
}
342
392
}
343
393
@@ -570,6 +620,41 @@ impl ParentState<'_> {
570
620
/// );
571
621
///
572
622
/// ```
623
+ /// # Example: Reusing Buffers
624
+ ///
625
+ /// You can use the [`VariantBuilder`] to write into existing buffers (for
626
+ /// example to write multiple variants back to back in the same buffer)
627
+ ///
628
+ /// ```
629
+ /// // we will write two variants back to back
630
+ /// use parquet_variant::{Variant, VariantBuilder};
631
+ /// // Append 12345
632
+ /// let mut builder = VariantBuilder::new();
633
+ /// builder.append_value(12345);
634
+ /// let (metadata, value) = builder.finish();
635
+ /// // remember where the first variant ends
636
+ /// let (first_meta_offset, first_meta_len) = (0, metadata.len());
637
+ /// let (first_value_offset, first_value_len) = (0, value.len());
638
+ ///
639
+ /// // now, append a second variant to the same buffers
640
+ /// let mut builder = VariantBuilder::new_with_buffers(metadata, value);
641
+ /// builder.append_value("Foo");
642
+ /// let (metadata, value) = builder.finish();
643
+ ///
644
+ /// // The variants can be referenced in their appropriate location
645
+ /// let variant1 = Variant::new(
646
+ /// &metadata[first_meta_offset..first_meta_len],
647
+ /// &value[first_value_offset..first_value_len]
648
+ /// );
649
+ /// assert_eq!(variant1, Variant::Int32(12345));
650
+ ///
651
+ /// let variant2 = Variant::new(
652
+ /// &metadata[first_meta_len..],
653
+ /// &value[first_value_len..]
654
+ /// );
655
+ /// assert_eq!(variant2, Variant::from("Foo"));
656
+ /// ```
657
+ ///
573
658
/// # Example: Unique Field Validation
574
659
///
575
660
/// This example shows how enabling unique field validation will cause an error
@@ -626,23 +711,33 @@ impl ParentState<'_> {
626
711
/// let (metadata, value) = builder.finish();
627
712
/// let variant = Variant::try_new(&metadata, &value).unwrap();
628
713
/// ```
629
- ///
630
- #[ derive( Default ) ]
714
+ #[ derive( Default , Debug ) ]
631
715
pub struct VariantBuilder {
632
716
buffer : ValueBuffer ,
633
717
metadata_builder : MetadataBuilder ,
634
718
validate_unique_fields : bool ,
635
719
}
636
720
637
721
impl VariantBuilder {
722
+ /// Create a new VariantBuilder with new underlying buffer
638
723
pub fn new ( ) -> Self {
639
724
Self {
640
- buffer : ValueBuffer :: default ( ) ,
725
+ buffer : ValueBuffer :: new ( ) ,
641
726
metadata_builder : MetadataBuilder :: default ( ) ,
642
727
validate_unique_fields : false ,
643
728
}
644
729
}
645
730
731
+ /// Create a new VariantBuilder that will write the metadata and values to
732
+ /// the specified buffers.
733
+ pub fn new_with_buffers ( metadata_buffer : Vec < u8 > , value_buffer : Vec < u8 > ) -> Self {
734
+ Self {
735
+ buffer : ValueBuffer :: from ( value_buffer) ,
736
+ metadata_builder : MetadataBuilder :: from ( metadata_buffer) ,
737
+ validate_unique_fields : false ,
738
+ }
739
+ }
740
+
646
741
/// Enables validation of unique field keys in nested objects.
647
742
///
648
743
/// This setting is propagated to all [`ObjectBuilder`]s created through this [`VariantBuilder`]
@@ -1916,6 +2011,80 @@ mod tests {
1916
2011
assert_eq ! ( metadata. num_field_names( ) , 3 ) ;
1917
2012
}
1918
2013
2014
+ /// Test reusing buffers with nested objects
2015
+ #[ test]
2016
+ fn test_with_existing_buffers_nested ( ) {
2017
+ let mut builder = VariantBuilder :: new ( ) ;
2018
+ append_test_list ( & mut builder) ;
2019
+ let ( m1, v1) = builder. finish ( ) ;
2020
+ let variant1 = Variant :: new ( & m1, & v1) ;
2021
+
2022
+ let mut builder = VariantBuilder :: new ( ) ;
2023
+ append_test_object ( & mut builder) ;
2024
+ let ( m2, v2) = builder. finish ( ) ;
2025
+ let variant2 = Variant :: new ( & m2, & v2) ;
2026
+
2027
+ let mut builder = VariantBuilder :: new ( ) ;
2028
+ builder. append_value ( "This is a string" ) ;
2029
+ let ( m3, v3) = builder. finish ( ) ;
2030
+ let variant3 = Variant :: new ( & m3, & v3) ;
2031
+
2032
+ // Now, append those three variants to the a new buffer that is reused
2033
+ let mut builder = VariantBuilder :: new ( ) ;
2034
+ append_test_list ( & mut builder) ;
2035
+ let ( metadata, value) = builder. finish ( ) ;
2036
+ let ( meta1_offset, meta1_end) = ( 0 , metadata. len ( ) ) ;
2037
+ let ( value1_offset, value1_end) = ( 0 , value. len ( ) ) ;
2038
+
2039
+ // reuse same buffer
2040
+ let mut builder = VariantBuilder :: new_with_buffers ( metadata, value) ;
2041
+ append_test_object ( & mut builder) ;
2042
+ let ( metadata, value) = builder. finish ( ) ;
2043
+ let ( meta2_offset, meta2_end) = ( meta1_end, metadata. len ( ) ) ;
2044
+ let ( value2_offset, value2_end) = ( value1_end, value. len ( ) ) ;
2045
+
2046
+ // Append a string
2047
+ let mut builder = VariantBuilder :: new_with_buffers ( metadata, value) ;
2048
+ builder. append_value ( "This is a string" ) ;
2049
+ let ( metadata, value) = builder. finish ( ) ;
2050
+ let ( meta3_offset, meta3_end) = ( meta2_end, metadata. len ( ) ) ;
2051
+ let ( value3_offset, value3_end) = ( value2_end, value. len ( ) ) ;
2052
+
2053
+ // verify we can read the variants back correctly
2054
+ let roundtrip1 = Variant :: new (
2055
+ & metadata[ meta1_offset..meta1_end] ,
2056
+ & value[ value1_offset..value1_end] ,
2057
+ ) ;
2058
+ assert_eq ! ( roundtrip1, variant1, ) ;
2059
+
2060
+ let roundtrip2 = Variant :: new (
2061
+ & metadata[ meta2_offset..meta2_end] ,
2062
+ & value[ value2_offset..value2_end] ,
2063
+ ) ;
2064
+ assert_eq ! ( roundtrip2, variant2, ) ;
2065
+
2066
+ let roundtrip3 = Variant :: new (
2067
+ & metadata[ meta3_offset..meta3_end] ,
2068
+ & value[ value3_offset..value3_end] ,
2069
+ ) ;
2070
+ assert_eq ! ( roundtrip3, variant3) ;
2071
+ }
2072
+
2073
+ /// append a simple List variant
2074
+ fn append_test_list ( builder : & mut VariantBuilder ) {
2075
+ let mut list = builder. new_list ( ) ;
2076
+ list. append_value ( 1234 ) ;
2077
+ list. append_value ( "a string value" ) ;
2078
+ list. finish ( ) ;
2079
+ }
2080
+
2081
+ /// append an object variant
2082
+ fn append_test_object ( builder : & mut VariantBuilder ) {
2083
+ let mut obj = builder. new_object ( ) ;
2084
+ obj. insert ( "a" , true ) ;
2085
+ obj. finish ( ) . unwrap ( ) ;
2086
+ }
2087
+
1919
2088
#[ test]
1920
2089
fn test_variant_builder_to_list_builder_no_finish ( ) {
1921
2090
// Create a list builder but never finish it
0 commit comments