Skip to content

Commit 4dc7f30

Browse files
Append complex variants
1 parent 7b219f9 commit 4dc7f30

File tree

2 files changed

+132
-10
lines changed

2 files changed

+132
-10
lines changed

parquet-variant/src/builder.rs

Lines changed: 131 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
18-
use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8};
18+
use crate::{
19+
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList,
20+
VariantObject,
21+
};
1922
use arrow_schema::ArrowError;
2023
use indexmap::{IndexMap, IndexSet};
2124
use std::collections::HashSet;
@@ -192,8 +195,7 @@ impl ValueBuffer {
192195
self.0.len()
193196
}
194197

195-
fn append_non_nested_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
196-
let variant = value.into();
198+
fn append_variant<'m, 'd>(&mut self, variant: Variant<'m, 'd>) {
197199
match variant {
198200
Variant::Null => self.append_null(),
199201
Variant::BooleanTrue => self.append_bool(true),
@@ -213,14 +215,14 @@ impl ValueBuffer {
213215
Variant::Binary(v) => self.append_binary(v),
214216
Variant::String(s) => self.append_string(s),
215217
Variant::ShortString(s) => self.append_short_string(s),
216-
Variant::Object(_) | Variant::List(_) => {
217-
unreachable!(
218-
"Nested values are handled specially by ObjectBuilder and ListBuilder"
219-
);
220-
}
218+
_ => unreachable!("Objects and lists must be appended using VariantBuilder::append_object and VariantBuilder::append_list"),
221219
}
222220
}
223221

222+
fn append_non_nested_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
223+
self.append_variant(value.into());
224+
}
225+
224226
/// Writes out the header byte for a variant object or list
225227
fn append_header(&mut self, header_byte: u8, is_large: bool, num_items: usize) {
226228
let buf = self.inner_mut();
@@ -697,6 +699,79 @@ impl VariantBuilder {
697699
ObjectBuilder::new(parent_state, validate_unique_fields)
698700
}
699701

702+
/// Appends a [`VariantObject`] to the builder.
703+
///
704+
/// # Panics
705+
/// Will panic if the appended object has duplicate field names or any nested validation fails.
706+
/// Use `try_append_object` if you need full validation for untrusted data.
707+
pub fn append_object<'m, 'v>(&mut self, object: VariantObject<'m, 'v>) {
708+
let (parent_state, validate_unique_fields) = self.parent_state();
709+
710+
let mut obj_builder = ObjectBuilder::new(parent_state, validate_unique_fields);
711+
712+
for (field_name, variant) in object.iter() {
713+
obj_builder.insert(field_name, variant);
714+
}
715+
716+
obj_builder.finish().unwrap();
717+
}
718+
719+
/// Appends a [`VariantObject`] to the builder with full validation during iteration.
720+
///
721+
/// Recursively validates all nested variants in the object during iteration.
722+
pub fn try_append_object<'m, 'v>(
723+
&mut self,
724+
object: VariantObject<'m, 'v>,
725+
) -> Result<(), ArrowError> {
726+
let (parent_state, validate_unique_fields) = self.parent_state();
727+
728+
let mut obj_builder = ObjectBuilder::new(parent_state, validate_unique_fields);
729+
730+
for res in object.iter_try() {
731+
let (field_name, variant) = res?;
732+
733+
obj_builder.insert(field_name, variant);
734+
}
735+
736+
obj_builder.finish()?;
737+
738+
Ok(())
739+
}
740+
741+
/// Appends a [`VariantList`] to the builder.
742+
///
743+
/// # Panics
744+
/// Will panic if any nested validation fails during list iteration.
745+
/// Use `try_append_list` if you need full validation for untrusted data.
746+
pub fn append_list<'m, 'v>(&mut self, list: VariantList<'m, 'v>) {
747+
let (parent_state, validate_unique_fields) = self.parent_state();
748+
749+
let mut list_builder = ListBuilder::new(parent_state, validate_unique_fields);
750+
751+
for variant in list.iter() {
752+
list_builder.append_value(variant);
753+
}
754+
755+
list_builder.finish();
756+
}
757+
758+
/// Appends a [`VariantList`] to the builder with full validation during iteration.
759+
///
760+
/// Recursively validates all nested variants in the list during iteration.
761+
pub fn try_append_list<'m, 'v>(&mut self, list: VariantList<'m, 'v>) -> Result<(), ArrowError> {
762+
let (parent_state, validate_unique_fields) = self.parent_state();
763+
764+
let mut list_builder = ListBuilder::new(parent_state, validate_unique_fields);
765+
766+
for variant in list.iter_try() {
767+
list_builder.append_value(variant?);
768+
}
769+
770+
list_builder.finish();
771+
772+
Ok(())
773+
}
774+
700775
/// Append a non-nested value to the builder.
701776
///
702777
/// # Example
@@ -707,7 +782,13 @@ impl VariantBuilder {
707782
/// builder.append_value(42i8);
708783
/// ```
709784
pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
710-
self.buffer.append_non_nested_value(value);
785+
let variant = value.into();
786+
787+
match variant {
788+
Variant::Object(obj) => self.append_object(obj),
789+
Variant::List(list) => self.append_list(list),
790+
primitive => self.buffer.append_variant(primitive),
791+
}
711792
}
712793

713794
/// Finish the builder and return the metadata and value buffers.
@@ -2170,4 +2251,45 @@ mod tests {
21702251
let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
21712252
assert_eq!(variant, Variant::Int8(2));
21722253
}
2254+
2255+
#[test]
2256+
fn test_append_object() {
2257+
let (m1, v1) = make_object();
2258+
let variant = Variant::new(&m1, &v1);
2259+
2260+
let mut builder = VariantBuilder::new();
2261+
builder.append_value(variant.clone());
2262+
let (metadata, value) = builder.finish();
2263+
assert_eq!(variant, Variant::new(&metadata, &value));
2264+
}
2265+
2266+
/// make an object variant
2267+
fn make_object() -> (Vec<u8>, Vec<u8>) {
2268+
let mut builder = VariantBuilder::new();
2269+
2270+
let mut obj = builder.new_object();
2271+
obj.insert("a", true);
2272+
obj.finish().unwrap();
2273+
builder.finish()
2274+
}
2275+
2276+
#[test]
2277+
fn test_append_list() {
2278+
let (m1, v1) = make_list();
2279+
let variant = Variant::new(&m1, &v1);
2280+
let mut builder = VariantBuilder::new();
2281+
builder.append_value(variant.clone());
2282+
let (metadata, value) = builder.finish();
2283+
assert_eq!(variant, Variant::new(&metadata, &value));
2284+
}
2285+
2286+
/// make a simple List variant
2287+
fn make_list() -> (Vec<u8>, Vec<u8>) {
2288+
let mut builder = VariantBuilder::new();
2289+
let mut list = builder.new_list();
2290+
list.append_value(1234);
2291+
list.append_value("a string value");
2292+
list.finish();
2293+
builder.finish()
2294+
}
21732295
}

parquet-variant/src/variant/metadata.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ impl VariantMetadataHeader {
127127
/// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
128128
#[derive(Debug, Clone, PartialEq)]
129129
pub struct VariantMetadata<'m> {
130-
bytes: &'m [u8],
130+
pub(crate) bytes: &'m [u8],
131131
header: VariantMetadataHeader,
132132
dictionary_size: u32,
133133
first_value_byte: u32,

0 commit comments

Comments
 (0)