Skip to content

Commit fce19eb

Browse files
committed
Fix Cargo for benchmark
1 parent 99eb1bc commit fce19eb

File tree

5 files changed

+291
-32
lines changed

5 files changed

+291
-32
lines changed

parquet-variant-compute/src/from_json.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
use crate::{VariantArray, VariantArrayBuilder};
2222
use arrow::array::{Array, ArrayRef, StringArray};
2323
use arrow_schema::ArrowError;
24-
use parquet_variant::VariantBuilder;
2524
use parquet_variant_json::json_to_variant;
2625

2726
/// Parse a batch of JSON strings into a batch of Variants represented as
@@ -41,10 +40,10 @@ pub fn batch_json_string_to_variant(input: &ArrayRef) -> Result<VariantArray, Ar
4140
// The subfields are expected to be non-nullable according to the parquet variant spec.
4241
variant_array_builder.append_null();
4342
} else {
44-
let mut vb = VariantBuilder::new();
43+
let mut vb = variant_array_builder.variant_builder();
44+
// parse JSON directly to the variant builder
4545
json_to_variant(input_string_array.value(i), &mut vb)?;
46-
let (metadata, value) = vb.finish();
47-
variant_array_builder.append_variant_buffers(&metadata, &value);
46+
vb.finish()
4847
}
4948
}
5049
Ok(variant_array_builder.build())

parquet-variant-compute/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ mod variant_array_builder;
2222
pub mod variant_get;
2323

2424
pub use variant_array::VariantArray;
25-
pub use variant_array_builder::VariantArrayBuilder;
25+
pub use variant_array_builder::{VariantArrayBuilder, VariantArrayVariantBuilder};
2626

2727
pub use from_json::batch_json_string_to_variant;
2828
pub use to_json::batch_variant_to_json_string;

parquet-variant-compute/src/variant_array_builder.rs

Lines changed: 267 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
use crate::VariantArray;
2121
use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
2222
use arrow_schema::{DataType, Field, Fields};
23-
use parquet_variant::{Variant, VariantBuilder};
23+
use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilder, VariantBuilderExt};
2424
use std::sync::Arc;
2525

2626
/// A builder for [`VariantArray`]
@@ -37,13 +37,13 @@ use std::sync::Arc;
3737
/// ## Example:
3838
/// ```
3939
/// # use arrow::array::Array;
40-
/// # use parquet_variant::{Variant, VariantBuilder};
40+
/// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
4141
/// # use parquet_variant_compute::VariantArrayBuilder;
4242
/// // Create a new VariantArrayBuilder with a capacity of 100 rows
4343
/// let mut builder = VariantArrayBuilder::new(100);
4444
/// // append variant values
4545
/// builder.append_variant(Variant::from(42));
46-
/// // append a null row
46+
/// // append a null row (note not a Variant::Null)
4747
/// builder.append_null();
4848
/// // append a pre-constructed metadata and value buffers
4949
/// let (metadata, value) = {
@@ -55,9 +55,14 @@ use std::sync::Arc;
5555
/// };
5656
/// builder.append_variant_buffers(&metadata, &value);
5757
///
58+
/// // Use `variant_builder` method to write values directly to the output array
59+
/// let mut vb = builder.variant_builder();
60+
/// vb.append_value("Hello, World!");
61+
/// vb.finish(); // Note: call finish to write the variant to the buffers
62+
///
5863
/// // create the final VariantArray
5964
/// let variant_array = builder.build();
60-
/// assert_eq!(variant_array.len(), 3);
65+
/// assert_eq!(variant_array.len(), 4);
6166
/// // // Access the values
6267
/// // row 1 is not null and is an integer
6368
/// assert!(!variant_array.is_null(0));
@@ -67,6 +72,9 @@ use std::sync::Arc;
6772
/// // row 2 is not null and is an object
6873
/// assert!(!variant_array.is_null(2));
6974
/// assert!(variant_array.value(2).as_object().is_some());
75+
/// // row 3 is a string
76+
/// assert!(!variant_array.is_null(3));
77+
/// assert_eq!(variant_array.value(3), Variant::from("Hello, World!"));
7078
/// ```
7179
#[derive(Debug)]
7280
pub struct VariantArrayBuilder {
@@ -147,11 +155,9 @@ impl VariantArrayBuilder {
147155

148156
/// Append the [`Variant`] to the builder as the next row
149157
pub fn append_variant(&mut self, variant: Variant) {
150-
// TODO make this more efficient by avoiding the intermediate buffers
151-
let mut variant_builder = VariantBuilder::new();
152-
variant_builder.append_value(variant);
153-
let (metadata, value) = variant_builder.finish();
154-
self.append_variant_buffers(&metadata, &value);
158+
let mut direct_builder = self.variant_builder();
159+
direct_builder.variant_builder.append_value(variant);
160+
direct_builder.finish()
155161
}
156162

157163
/// Append a metadata and values buffer to the builder
@@ -168,7 +174,171 @@ impl VariantArrayBuilder {
168174
self.value_buffer.extend_from_slice(value);
169175
}
170176

171-
// TODO: Return a Variant builder that will write to the underlying buffers (TODO)
177+
/// Return a `VariantArrayVariantBuilder` that writes directly to the
178+
/// buffers of this builder.
179+
///
180+
/// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder
181+
///
182+
/// # Example
183+
/// ```
184+
/// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
185+
/// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder};
186+
/// let mut array_builder = VariantArrayBuilder::new(10);
187+
///
188+
/// // First row has a string
189+
/// let mut variant_builder = array_builder.variant_builder();
190+
/// variant_builder.append_value("Hello, World!");
191+
/// // must call finish to write the variant to the buffers
192+
/// variant_builder.finish();
193+
///
194+
/// // Second row is an object
195+
/// let mut variant_builder = array_builder.variant_builder();
196+
/// variant_builder
197+
/// .new_object()
198+
/// .with_field("my_field", 42i64)
199+
/// .finish()
200+
/// .unwrap();
201+
/// variant_builder.finish();
202+
///
203+
/// // finalize the array
204+
/// let variant_array: VariantArray = array_builder.build();
205+
///
206+
/// // verify what we wrote is still there
207+
/// assert_eq!(variant_array.value(0), Variant::from("Hello, World!"));
208+
/// assert!(variant_array.value(1).as_object().is_some());
209+
/// ```
210+
pub fn variant_builder(&mut self) -> VariantArrayVariantBuilder {
211+
// append directly into the metadata and value buffers
212+
let metadata_buffer = std::mem::take(&mut self.metadata_buffer);
213+
let value_buffer = std::mem::take(&mut self.value_buffer);
214+
VariantArrayVariantBuilder::new(self, metadata_buffer, value_buffer)
215+
}
216+
}
217+
218+
/// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`.
219+
///
220+
/// Note this struct implements [`VariantBuilderExt`], so it can be used
221+
/// as a drop-in replacement for [`VariantBuilder`] in most cases.
222+
///
223+
/// If [`Self::finish`] is not called, any changes will be rolled back
224+
///
225+
/// See [`VariantArrayBuilder::variant_builder`] for an example
226+
pub struct VariantArrayVariantBuilder<'a> {
227+
/// was finish called?
228+
finished: bool,
229+
/// starting offset in the variant_builder's `metadata` buffer
230+
metadata_offset: usize,
231+
/// starting offset in the variant_builder's `value` buffer
232+
value_offset: usize,
233+
/// Parent array builder that this variant builder writes to. Buffers
234+
/// have been moved into the variant builder, and must be returned on
235+
/// drop
236+
array_builder: &'a mut VariantArrayBuilder,
237+
/// Builder for the in progress variant value, temporarily owns the buffers
238+
/// from `array_builder`
239+
variant_builder: VariantBuilder,
240+
}
241+
242+
impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> {
243+
fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
244+
self.variant_builder.append_value(value);
245+
}
246+
247+
fn new_list(&mut self) -> ListBuilder {
248+
self.variant_builder.new_list()
249+
}
250+
251+
fn new_object(&mut self) -> ObjectBuilder {
252+
self.variant_builder.new_object()
253+
}
254+
}
255+
256+
impl<'a> VariantArrayVariantBuilder<'a> {
257+
/// Constructs a new VariantArrayVariantBuilder
258+
///
259+
/// Note this is not public as this is a structure that is logically
260+
/// part of the [`VariantArrayBuilder`] and relies on its internal structure
261+
fn new(
262+
array_builder: &'a mut VariantArrayBuilder,
263+
metadata_buffer: Vec<u8>,
264+
value_buffer: Vec<u8>,
265+
) -> Self {
266+
let metadata_offset = metadata_buffer.len();
267+
let value_offset = value_buffer.len();
268+
VariantArrayVariantBuilder {
269+
finished: false,
270+
metadata_offset,
271+
value_offset,
272+
variant_builder: VariantBuilder::new_with_buffers(metadata_buffer, value_buffer),
273+
array_builder,
274+
}
275+
}
276+
277+
/// Return a reference to the underlying `VariantBuilder`
278+
pub fn inner(&self) -> &VariantBuilder {
279+
&self.variant_builder
280+
}
281+
282+
/// Return a mutable reference to the underlying `VariantBuilder`
283+
pub fn inner_mut(&mut self) -> &mut VariantBuilder {
284+
&mut self.variant_builder
285+
}
286+
287+
/// Called to finish the in progress variant and write it to the underlying
288+
/// buffers
289+
///
290+
/// Note if you do not call finish, on drop any changes made to the
291+
/// underlying buffers will be rolled back.
292+
pub fn finish(mut self) {
293+
self.finished = true;
294+
// Note: buffers are returned and replaced in the drop impl
295+
}
296+
}
297+
298+
impl<'a> Drop for VariantArrayVariantBuilder<'a> {
299+
/// If the builder was not finished, roll back any changes made to the
300+
/// underlying buffers (by truncating them)
301+
fn drop(&mut self) {
302+
let metadata_offset = self.metadata_offset;
303+
let value_offset = self.value_offset;
304+
305+
// get the buffers back from the variant builder
306+
let (mut metadata_buffer, mut value_buffer) =
307+
std::mem::take(&mut self.variant_builder).finish();
308+
309+
// Sanity Check: if the buffers got smaller, something went wrong (previous data was lost)
310+
let metadata_len = metadata_buffer
311+
.len()
312+
.checked_sub(metadata_offset)
313+
.expect("metadata length decreased unexpectedly");
314+
let value_len = value_buffer
315+
.len()
316+
.checked_sub(value_offset)
317+
.expect("value length decreased unexpectedly");
318+
319+
if self.finished {
320+
// if the object was finished, commit the changes by putting the
321+
// offsets and lengths into the parent array builder.
322+
self.array_builder
323+
.metadata_locations
324+
.push((metadata_offset, metadata_len));
325+
self.array_builder
326+
.value_locations
327+
.push((value_offset, value_len));
328+
self.array_builder.nulls.append_non_null();
329+
} else {
330+
// if the object was not finished, truncate the buffers to the
331+
// original offsets to roll back any changes. Note this is fast
332+
// because truncate doesn't free any memory: it just has to drop
333+
// elements (and u8 doesn't have a destructor)
334+
metadata_buffer.truncate(metadata_offset);
335+
value_buffer.truncate(value_offset);
336+
}
337+
338+
// put the buffers back into the array builder
339+
self.array_builder.metadata_buffer = metadata_buffer;
340+
self.array_builder.value_buffer = value_buffer;
341+
}
172342
}
173343

174344
fn binary_view_array_from_buffers(
@@ -220,4 +390,91 @@ mod test {
220390
);
221391
}
222392
}
393+
394+
/// Test using sub builders to append variants
395+
#[test]
396+
fn test_variant_array_builder_variant_builder() {
397+
let mut builder = VariantArrayBuilder::new(10);
398+
builder.append_null(); // should not panic
399+
builder.append_variant(Variant::from(42i32));
400+
401+
// let's make a sub-object in the next row
402+
let mut sub_builder = builder.variant_builder();
403+
sub_builder
404+
.new_object()
405+
.with_field("foo", "bar")
406+
.finish()
407+
.unwrap();
408+
sub_builder.finish(); // must call finish to write the variant to the buffers
409+
410+
// append a new list
411+
let mut sub_builder = builder.variant_builder();
412+
sub_builder
413+
.new_list()
414+
.with_value(Variant::from(1i32))
415+
.with_value(Variant::from(2i32))
416+
.finish();
417+
sub_builder.finish();
418+
let variant_array = builder.build();
419+
420+
assert_eq!(variant_array.len(), 4);
421+
assert!(variant_array.is_null(0));
422+
assert!(!variant_array.is_null(1));
423+
assert_eq!(variant_array.value(1), Variant::from(42i32));
424+
assert!(!variant_array.is_null(2));
425+
let variant = variant_array.value(2);
426+
let variant = variant.as_object().expect("variant to be an object");
427+
assert_eq!(variant.get("foo").unwrap(), Variant::from("bar"));
428+
assert!(!variant_array.is_null(3));
429+
let variant = variant_array.value(3);
430+
let list = variant.as_list().expect("variant to be a list");
431+
assert_eq!(list.len(), 2);
432+
}
433+
434+
/// Test using non-finished sub builders to append variants
435+
#[test]
436+
fn test_variant_array_builder_variant_builder_reset() {
437+
let mut builder = VariantArrayBuilder::new(10);
438+
439+
// make a sub-object in the first row
440+
let mut sub_builder = builder.variant_builder();
441+
sub_builder
442+
.new_object()
443+
.with_field("foo", 1i32)
444+
.finish()
445+
.unwrap();
446+
sub_builder.finish(); // must call finish to write the variant to the buffers
447+
448+
// start appending an object but don't finish
449+
let mut sub_builder = builder.variant_builder();
450+
sub_builder
451+
.new_object()
452+
.with_field("bar", 2i32)
453+
.finish()
454+
.unwrap();
455+
drop(sub_builder); // drop the sub builder without finishing it
456+
457+
// make a third sub-object (this should reset the previous unfinished object)
458+
let mut sub_builder = builder.variant_builder();
459+
sub_builder
460+
.new_object()
461+
.with_field("baz", 3i32)
462+
.finish()
463+
.unwrap();
464+
sub_builder.finish(); // must call finish to write the variant to the buffers
465+
466+
let variant_array = builder.build();
467+
468+
// only the two finished objects should be present
469+
assert_eq!(variant_array.len(), 2);
470+
assert!(!variant_array.is_null(0));
471+
let variant = variant_array.value(0);
472+
let variant = variant.as_object().expect("variant to be an object");
473+
assert_eq!(variant.get("foo").unwrap(), Variant::from(1i32));
474+
475+
assert!(!variant_array.is_null(1));
476+
let variant = variant_array.value(1);
477+
let variant = variant.as_object().expect("variant to be an object");
478+
assert_eq!(variant.get("baz").unwrap(), Variant::from(3i32));
479+
}
223480
}

0 commit comments

Comments
 (0)