20
20
use crate :: VariantArray ;
21
21
use arrow:: array:: { ArrayRef , BinaryViewArray , BinaryViewBuilder , NullBufferBuilder , StructArray } ;
22
22
use arrow_schema:: { DataType , Field , Fields } ;
23
- use parquet_variant:: { Variant , VariantBuilder } ;
23
+ use parquet_variant:: { ListBuilder , ObjectBuilder , Variant , VariantBuilder , VariantBuilderExt } ;
24
24
use std:: sync:: Arc ;
25
25
26
26
/// A builder for [`VariantArray`]
@@ -147,11 +147,9 @@ impl VariantArrayBuilder {
147
147
148
148
/// Append the [`Variant`] to the builder as the next row
149
149
pub fn append_variant ( & mut self , variant : Variant ) {
150
- // TODO make this more efficient by avoiding the intermediate buffers
151
- let mut variant_builder = VariantBuilder :: new ( ) ;
152
- variant_builder. append_value ( variant) ;
153
- let ( metadata, value) = variant_builder. finish ( ) ;
154
- self . append_variant_buffers ( & metadata, & value) ;
150
+ let mut direct_builder = self . variant_builder ( ) ;
151
+ direct_builder. variant_builder . append_value ( variant) ;
152
+ direct_builder. finish ( )
155
153
}
156
154
157
155
/// Append a metadata and values buffer to the builder
@@ -168,7 +166,156 @@ impl VariantArrayBuilder {
168
166
self . value_buffer . extend_from_slice ( value) ;
169
167
}
170
168
171
- // TODO: Return a Variant builder that will write to the underlying buffers (TODO)
169
+ /// Return a `VariantArrayVariantBuilder` that writes directly to the
170
+ /// buffers of this builder.
171
+ ///
172
+ /// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder
173
+ ///
174
+ /// # Example
175
+ /// ```
176
+ /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
177
+ /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder};
178
+ /// let mut array_builder = VariantArrayBuilder::new(10);
179
+ ///
180
+ /// // First row has a string
181
+ /// let mut variant_builder = array_builder.variant_builder();
182
+ /// variant_builder.append_value("Hello, World!");
183
+ /// // must call finish to write the variant to the buffers
184
+ /// variant_builder.finish();
185
+ ///
186
+ /// // Second row is an object
187
+ /// let mut variant_builder = array_builder.variant_builder();
188
+ /// let mut obj = variant_builder.new_object();
189
+ /// obj.insert("my_field", 42i64);
190
+ /// obj.finish().unwrap();
191
+ /// variant_builder.finish();
192
+ ///
193
+ /// // finalize the array
194
+ /// let variant_array: VariantArray = array_builder.build();
195
+ ///
196
+ /// // verify what we wrote is still there
197
+ /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!"));
198
+ /// assert!(variant_array.value(1).as_object().is_some());
199
+ /// ```
200
+ pub fn variant_builder ( & mut self ) -> VariantArrayVariantBuilder {
201
+ // append directly into the metadata and value buffers
202
+ let metadata_buffer = std:: mem:: take ( & mut self . metadata_buffer ) ;
203
+ let value_buffer = std:: mem:: take ( & mut self . value_buffer ) ;
204
+ let metadata_offset = metadata_buffer. len ( ) ;
205
+ let value_offset = value_buffer. len ( ) ;
206
+ VariantArrayVariantBuilder {
207
+ finished : false ,
208
+ metadata_offset,
209
+ value_offset,
210
+ variant_builder : VariantBuilder :: new_with_buffers ( metadata_buffer, value_buffer) ,
211
+ array_builder : self ,
212
+ }
213
+ }
214
+ }
215
+
216
+ /// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`.
217
+ ///
218
+ /// Note this struct implements [`VariantBuilderExt`], so it can be used
219
+ /// as a drop-in replacement for [`VariantBuilder`] in most cases.
220
+ ///
221
+ /// If [`Self::finish`] is not called, any changes will be rolled back
222
+ ///
223
+ /// See [`VariantArrayBuilder::variant_builder`] for an example
224
+ pub struct VariantArrayVariantBuilder < ' a > {
225
+ /// was finish called?
226
+ finished : bool ,
227
+ /// starting metadata offset in the underlying buffers
228
+ metadata_offset : usize ,
229
+ /// starting value offset
230
+ value_offset : usize ,
231
+ /// Parent array builder that this variant builder writes to. Buffers
232
+ /// have been moved into the variant builder, and must be returned on
233
+ /// drop
234
+ array_builder : & ' a mut VariantArrayBuilder ,
235
+ /// Builder for the variant, temporarily owns the buffers from `array_builder`
236
+ variant_builder : VariantBuilder ,
237
+ }
238
+
239
+ impl < ' a > VariantBuilderExt for VariantArrayVariantBuilder < ' a > {
240
+ fn append_value < ' m , ' v > ( & mut self , value : impl Into < Variant < ' m , ' v > > ) {
241
+ self . variant_builder . append_value ( value) ;
242
+ }
243
+
244
+ fn new_list ( & mut self ) -> ListBuilder {
245
+ self . variant_builder . new_list ( )
246
+ }
247
+
248
+ fn new_object ( & mut self ) -> ObjectBuilder {
249
+ self . variant_builder . new_object ( )
250
+ }
251
+ }
252
+
253
+ impl VariantArrayVariantBuilder < ' _ > {
254
+ /// Return a reference to the underlying `VariantBuilder`
255
+ pub fn inner ( & self ) -> & VariantBuilder {
256
+ & self . variant_builder
257
+ }
258
+
259
+ /// Return a mutable reference to the underlying `VariantBuilder`
260
+ pub fn inner_mut ( & mut self ) -> & mut VariantBuilder {
261
+ & mut self . variant_builder
262
+ }
263
+
264
+ /// Called to finish the in progress variant and write it to the underlying
265
+ /// buffers
266
+ ///
267
+ /// Note if you do not call finish, on drop any changes made to the
268
+ /// underlying buffers will be rolled back.
269
+ pub fn finish ( mut self ) {
270
+ self . finished = true ;
271
+ // Note: buffers are returned and replaced in the drop impl
272
+ }
273
+ }
274
+
275
+ impl < ' a > Drop for VariantArrayVariantBuilder < ' a > {
276
+ /// If the builder was not finished, roll back any changes made to the
277
+ /// underlying buffers (by truncating them)
278
+ fn drop ( & mut self ) {
279
+ let metadata_offset = self . metadata_offset ;
280
+ let value_offset = self . value_offset ;
281
+
282
+ // get the buffers back from the variant builder
283
+ let ( mut metadata_buffer, mut value_buffer) =
284
+ std:: mem:: take ( & mut self . variant_builder ) . finish ( ) ;
285
+
286
+ // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost)
287
+ let metadata_len = metadata_buffer
288
+ . len ( )
289
+ . checked_sub ( metadata_offset)
290
+ . expect ( "metadata length decreased unexpectedly" ) ;
291
+ let value_len = value_buffer
292
+ . len ( )
293
+ . checked_sub ( value_offset)
294
+ . expect ( "value length decreased unexpectedly" ) ;
295
+
296
+ if self . finished {
297
+ // if the object was finished, commit the changes by putting the
298
+ // offsets and lengths into the parent array builder.
299
+ self . array_builder
300
+ . metadata_locations
301
+ . push ( ( metadata_offset, metadata_len) ) ;
302
+ self . array_builder
303
+ . value_locations
304
+ . push ( ( value_offset, value_len) ) ;
305
+ self . array_builder . nulls . append_non_null ( ) ;
306
+ } else {
307
+ // if the object was not finished, truncate the buffers to the
308
+ // original offsets to roll back any changes. Note this is fast
309
+ // because truncate doesn't free any memory: it just has to drop
310
+ // elements (but u8 doesn't have a destructor)
311
+ metadata_buffer. truncate ( metadata_offset) ;
312
+ value_buffer. truncate ( value_offset) ;
313
+ }
314
+
315
+ // put the buffers back into the array builder
316
+ self . array_builder . metadata_buffer = metadata_buffer;
317
+ self . array_builder . value_buffer = value_buffer;
318
+ }
172
319
}
173
320
174
321
fn binary_view_array_from_buffers (
0 commit comments