Skip to content
Draft
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 141 additions & 3 deletions parquet-variant-compute/src/variant_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,15 +303,16 @@ mod test {
use std::sync::Arc;

use arrow::array::{
Array, ArrayRef, BinaryViewArray, Float16Array, Float32Array, Float64Array, Int16Array,
Int32Array, Int64Array, Int8Array, StringArray, StructArray, UInt16Array, UInt32Array,
make_builder, Array, ArrayRef, BinaryBuilder, BinaryViewArray, Float16Array, Float32Array,
Float64Array, GenericListBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
StringArray, StringBuilder, StructArray, StructBuilder, UInt16Array, UInt32Array,
UInt64Array, UInt8Array,
};
use arrow::buffer::NullBuffer;
use arrow::compute::CastOptions;
use arrow::datatypes::DataType::{Int16, Int32, Int64, UInt16, UInt32, UInt64, UInt8};
use arrow_schema::{DataType, Field, FieldRef, Fields};
use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};
use parquet_variant::{Variant, VariantBuilder, VariantPath, EMPTY_VARIANT_METADATA_BYTES};

use crate::json_to_variant;
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
Expand Down Expand Up @@ -1261,7 +1262,144 @@ mod test {
let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)]));
assert_eq!(&result, &expected);
}
/// This test manually constructs a shredded variant array representing lists
/// like ["comedy", "drama"] and ["horror", 123]
/// as VariantArray using variant_get.
#[test]
fn test_shredded_list_field_access() {
let array = shredded_list_variant_array();

// Test: Extract the 0 index field as VariantArray first
let options = GetOptions::new_with_path(VariantPath::from(0));
let result = variant_get(&array, options).unwrap();

let result_variant: &VariantArray = result.as_any().downcast_ref().unwrap();
assert_eq!(result_variant.len(), 3);

// Row 0: expect 0 index = "comedy"
assert_eq!(result_variant.value(0), Variant::from("comedy"));
// Row 1: expect 0 index = "horror"
assert_eq!(result_variant.value(1), Variant::from("horror"));
}
/// Test extracting shredded list field with type conversion
#[test]
fn test_shredded_list_as_string() {
let array = shredded_list_variant_array();

// Test: Extract the 0 index values as StringArray (type conversion)
let field = Field::new("typed_value", DataType::Utf8, false);
let options = GetOptions::new_with_path(VariantPath::from(0))
.with_as_type(Some(FieldRef::from(field)));
let result = variant_get(&array, options).unwrap();

// Should get StringArray
let expected: ArrayRef =
Arc::new(StringArray::from(vec![Some("comedy"), None, Some("drama")]));
assert_eq!(&result, &expected);
}
/// Helper function to create a shredded variant array representing lists
///
/// This creates an array that represents:
/// Row 0: ["comedy", "drama"] ([0] is shredded, [1] is shredded - perfectly shredded)
/// Row 1: ["horror", 123] ([0] is shredded, [1] is int - partially shredded)
///
/// The physical layout follows the shredding spec where:
/// - metadata: contains list metadata
/// - typed_value: StructArray with 0 index value
/// - value: contains fallback for
fn shredded_list_variant_array() -> ArrayRef {
// Create metadata array
let metadata_array =
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 2));

// Building the typed_value ListArray

// Need a StructBuilder to create a ListBuilder
let fields = Fields::from(vec![
Field::new("value", DataType::Binary, true),
Field::new("typed_value", DataType::Utf8, true),
]);
let field_builders = vec![
make_builder(&DataType::Binary, 4),
make_builder(&DataType::Utf8, 4),
];
let struct_builder = StructBuilder::new(fields, field_builders);

let mut builder = GenericListBuilder::<i32, StructBuilder>::new(struct_builder);

// Row 0 index 0
builder
.values()
.field_builder::<BinaryBuilder>(0)
.unwrap()
.append_null();
builder
.values()
.field_builder::<StringBuilder>(1)
.unwrap()
.append_value("comedy");
builder.values().append(true);

// Row 0 index 1
builder
.values()
.field_builder::<BinaryBuilder>(0)
.unwrap()
.append_null();
builder
.values()
.field_builder::<StringBuilder>(1)
.unwrap()
.append_value("drama");
builder.values().append(true);

// Next row
builder.append(true);

// Row 1 index 0
builder
.values()
.field_builder::<BinaryBuilder>(0)
.unwrap()
.append_null();
builder
.values()
.field_builder::<StringBuilder>(1)
.unwrap()
.append_value("horror");
builder.values().append(true);

// Row 1 index 1
let mut variant_builder = VariantBuilder::new();
variant_builder.append_value(123i32); // <------ couldn't find the right way to do it, used this as placeholder for binary
let (_, value) = variant_builder.finish();

builder
.values()
.field_builder::<BinaryBuilder>(0)
.unwrap()
.append_value(value);
builder
.values()
.field_builder::<StringBuilder>(1)
.unwrap()
.append_null();
builder.values().append(true);

// Next row
builder.append(true);

let typed_value_array = builder.finish();

// Build the main VariantArray
let main_struct = crate::variant_array::StructArrayBuilder::new()
.with_field("metadata", Arc::new(metadata_array), false)
// .with_field("value", Arc::new(value_array), true)
.with_field("typed_value", Arc::new(typed_value_array), true)
.build();

Arc::new(VariantArray::try_new(Arc::new(main_struct)).expect("should create variant array"))
}
/// Helper function to create a shredded variant array representing objects
///
/// This creates an array that represents:
Expand Down
Loading