Skip to content

Commit b054960

Browse files
Append complex variants
1 parent 7b219f9 commit b054960

File tree

2 files changed

+126
-10
lines changed

2 files changed

+126
-10
lines changed

parquet-variant/src/builder.rs

Lines changed: 125 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
18-
use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8};
18+
use crate::{
19+
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList,
20+
VariantObject,
21+
};
1922
use arrow_schema::ArrowError;
2023
use indexmap::{IndexMap, IndexSet};
2124
use std::collections::HashSet;
@@ -192,8 +195,7 @@ impl ValueBuffer {
192195
self.0.len()
193196
}
194197

195-
fn append_non_nested_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
196-
let variant = value.into();
198+
fn append_variant<'m, 'd>(&mut self, variant: Variant<'m, 'd>) {
197199
match variant {
198200
Variant::Null => self.append_null(),
199201
Variant::BooleanTrue => self.append_bool(true),
@@ -213,14 +215,14 @@ impl ValueBuffer {
213215
Variant::Binary(v) => self.append_binary(v),
214216
Variant::String(s) => self.append_string(s),
215217
Variant::ShortString(s) => self.append_short_string(s),
216-
Variant::Object(_) | Variant::List(_) => {
217-
unreachable!(
218-
"Nested values are handled specially by ObjectBuilder and ListBuilder"
219-
);
220-
}
218+
_ => unreachable!("Objects and lists must be appended using VariantBuilder::append_object and VariantBuilder::append_list"),
221219
}
222220
}
223221

222+
fn append_non_nested_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
223+
self.append_variant(value.into());
224+
}
225+
224226
/// Writes out the header byte for a variant object or list
225227
fn append_header(&mut self, header_byte: u8, is_large: bool, num_items: usize) {
226228
let buf = self.inner_mut();
@@ -697,6 +699,73 @@ impl VariantBuilder {
697699
ObjectBuilder::new(parent_state, validate_unique_fields)
698700
}
699701

702+
/// Appends a [`VariantObject`] to the builder.
703+
fn append_object<'m, 'v>(&mut self, object: VariantObject<'m, 'v>) -> Result<(), ArrowError> {
704+
let (parent_state, validate_unique_fields) = self.parent_state();
705+
706+
let mut obj_builder = ObjectBuilder::new(parent_state, validate_unique_fields);
707+
708+
for (field_name, variant) in object.iter() {
709+
obj_builder.insert(field_name, variant);
710+
}
711+
712+
obj_builder.finish()?;
713+
714+
Ok(())
715+
}
716+
717+
/// Appends a [`VariantObject`] to the builder with full validation during iteration.
718+
///
719+
/// Recursively validates all nested variants in the object during iteration.
720+
fn try_append_object<'m, 'v>(
721+
&mut self,
722+
object: VariantObject<'m, 'v>,
723+
) -> Result<(), ArrowError> {
724+
let (parent_state, validate_unique_fields) = self.parent_state();
725+
726+
let mut obj_builder = ObjectBuilder::new(parent_state, validate_unique_fields);
727+
728+
for res in object.iter_try() {
729+
let (field_name, variant) = res?;
730+
731+
obj_builder.insert(field_name, variant);
732+
}
733+
734+
obj_builder.finish()?;
735+
736+
Ok(())
737+
}
738+
739+
/// Appends a [`VariantList`] to the builder.
740+
fn append_list<'m, 'v>(&mut self, list: VariantList<'m, 'v>) {
741+
let (parent_state, validate_unique_fields) = self.parent_state();
742+
743+
let mut list_builder = ListBuilder::new(parent_state, validate_unique_fields);
744+
745+
for variant in list.iter() {
746+
list_builder.append_value(variant);
747+
}
748+
749+
list_builder.finish();
750+
}
751+
752+
/// Appends a [`VariantList`] to the builder with full validation during iteration.
753+
///
754+
/// Recursively validates all nested variants in the list during iteration.
755+
fn try_append_list<'m, 'v>(&mut self, list: VariantList<'m, 'v>) -> Result<(), ArrowError> {
756+
let (parent_state, validate_unique_fields) = self.parent_state();
757+
758+
let mut list_builder = ListBuilder::new(parent_state, validate_unique_fields);
759+
760+
for variant in list.iter_try() {
761+
list_builder.append_value(variant?);
762+
}
763+
764+
list_builder.finish();
765+
766+
Ok(())
767+
}
768+
700769
/// Append a non-nested value to the builder.
701770
///
702771
/// # Example
@@ -707,7 +776,13 @@ impl VariantBuilder {
707776
/// builder.append_value(42i8);
708777
/// ```
709778
pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
710-
self.buffer.append_non_nested_value(value);
779+
let variant = value.into();
780+
781+
match variant {
782+
Variant::Object(obj) => self.append_object(obj),
783+
Variant::List(list) => self.append_list(list),
784+
primitive => self.buffer.append_variant(primitive),
785+
}
711786
}
712787

713788
/// Finish the builder and return the metadata and value buffers.
@@ -2170,4 +2245,45 @@ mod tests {
21702245
let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
21712246
assert_eq!(variant, Variant::Int8(2));
21722247
}
2248+
2249+
#[test]
2250+
fn test_append_object() {
2251+
let (m1, v1) = make_object();
2252+
let variant = Variant::new(&m1, &v1);
2253+
2254+
let mut builder = VariantBuilder::new();
2255+
builder.append_value(variant.clone());
2256+
let (metadata, value) = builder.finish();
2257+
assert_eq!(variant, Variant::new(&metadata, &value));
2258+
}
2259+
2260+
/// make an object variant
2261+
fn make_object() -> (Vec<u8>, Vec<u8>) {
2262+
let mut builder = VariantBuilder::new();
2263+
2264+
let mut obj = builder.new_object();
2265+
obj.insert("a", true);
2266+
obj.finish().unwrap();
2267+
builder.finish()
2268+
}
2269+
2270+
#[test]
2271+
fn test_append_list() {
2272+
let (m1, v1) = make_list();
2273+
let variant = Variant::new(&m1, &v1);
2274+
let mut builder = VariantBuilder::new();
2275+
builder.append_value(variant.clone());
2276+
let (metadata, value) = builder.finish();
2277+
assert_eq!(variant, Variant::new(&metadata, &value));
2278+
}
2279+
2280+
/// make a simple List variant
2281+
fn make_list() -> (Vec<u8>, Vec<u8>) {
2282+
let mut builder = VariantBuilder::new();
2283+
let mut list = builder.new_list();
2284+
list.append_value(1234);
2285+
list.append_value("a string value");
2286+
list.finish();
2287+
builder.finish()
2288+
}
21732289
}

parquet-variant/src/variant/metadata.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ impl VariantMetadataHeader {
127127
/// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
128128
#[derive(Debug, Clone, PartialEq)]
129129
pub struct VariantMetadata<'m> {
130-
bytes: &'m [u8],
130+
pub(crate) bytes: &'m [u8],
131131
header: VariantMetadataHeader,
132132
dictionary_size: u32,
133133
first_value_byte: u32,

0 commit comments

Comments
 (0)