-
Notifications
You must be signed in to change notification settings - Fork 1k
[WIP] Support Shredded Lists/Array in variant_get
#8354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
9c25cc4
ed961a4
03ecb95
158d6d7
d53c831
174e429
69de7d7
cc6d787
8f6ad1b
bc8abd9
c0d2065
85aaa3f
40b6311
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1010,7 +1010,101 @@ mod test { | |
let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)])); | ||
assert_eq!(&result, &expected); | ||
} | ||
/// This test manually constructs a shredded variant array representing lists | ||
/// like ["comedy", "drama"], ["horror", null] and ["comedy", "drama", "romance"] | ||
/// as VariantArray using variant_get. | ||
#[test] | ||
fn test_shredded_list_field_access() { | ||
let array = shredded_list_variant_array(); | ||
|
||
// Test: Extract the 0 index field as VariantArray first | ||
let options = GetOptions::new_with_path(VariantPath::from(0)); | ||
let result = variant_get(&array, options).unwrap(); | ||
|
||
let result_variant: &VariantArray = result.as_any().downcast_ref().unwrap(); | ||
assert_eq!(result_variant.len(), 3); | ||
|
||
// Row 0: expect 0 index = "comedy" | ||
assert_eq!(result_variant.value(0), Variant::String("comedy")); | ||
// Row 1: expect 0 index = "horror" | ||
assert_eq!(result_variant.value(1), Variant::String("horror")); | ||
// Row 2: expect 0 index = "comedy" | ||
assert_eq!(result_variant.value(2), Variant::String("comedy")); | ||
} | ||
/// Test extracting shredded list field with type conversion | ||
#[test] | ||
fn test_shredded_list_as_string() { | ||
let array = shredded_list_variant_array(); | ||
|
||
// Test: Extract the 0 index values as StringArray (type conversion) | ||
let field = Field::new("typed_value", DataType::Utf8, false); | ||
let options = GetOptions::new_with_path(VariantPath::from(0)) | ||
.with_as_type(Some(FieldRef::from(field))); | ||
let result = variant_get(&array, options).unwrap(); | ||
|
||
// Should get StringArray | ||
let expected: ArrayRef = Arc::new(StringArray::from(vec![Some("comedy"), Some("drama")])); | ||
assert_eq!(&result, &expected); | ||
} | ||
/// Helper function to create a shredded variant array representing lists | ||
/// | ||
/// This creates an array that represents: | ||
/// Row 0: ["comedy", "drama"] ([0] is shredded, [1] is shredded - perfectly shredded) | ||
/// Row 1: ["horror", null] ([0] is shredded, [1] is binary null - partially shredded) | ||
/// Row 2: ["comedy", "drama", "romance"] (perfectly shredded) | ||
sdf-jkl marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
/// | ||
/// The physical layout follows the shredding spec where: | ||
/// - metadata: contains list metadata | ||
/// - typed_value: StructArray with 0 index value | ||
/// - value: contains fallback for | ||
fn shredded_list_variant_array() -> ArrayRef { | ||
// Create the base metadata for lists | ||
|
||
// Could add this as an api for VariantList, like VariantList::from() | ||
fn build_list_metadata(vector: Vec<Variant>) -> (Vec<u8>, Vec<u8>) { | ||
let mut builder = parquet_variant::VariantBuilder::new(); | ||
let mut list = builder.new_list(); | ||
for value in vector { | ||
list.append_value(value); | ||
} | ||
list.finish(); | ||
builder.finish() | ||
} | ||
let (metadata1, _) = | ||
build_list_metadata(vec![Variant::String("comedy"), Variant::String("drama")]); | ||
sdf-jkl marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
let (metadata2, _) = build_list_metadata(vec![Variant::String("horror"), Variant::Null]); | ||
|
||
let (metadata3, _) = build_list_metadata(vec![ | ||
Variant::String("comedy"), | ||
Variant::String("drama"), | ||
Variant::String("romance"), | ||
]); | ||
|
||
// Create metadata array | ||
let metadata_array = | ||
BinaryViewArray::from_iter_values(vec![metadata1, metadata2, metadata3]); | ||
|
||
// Create the untyped value array | ||
let value_array = BinaryViewArray::from(vec![Variant::Null.as_u8_slice()]); | ||
// Maybe I should try with an actual primitive array | ||
let typed_value_array = StringArray::from(vec![ | ||
Some("comedy"), | ||
Some("drama"), | ||
Some("horror"), | ||
Some("comedy"), | ||
Some("drama"), | ||
Some("romance"), | ||
]); | ||
// Build the main VariantArray | ||
let main_struct = crate::variant_array::StructArrayBuilder::new() | ||
.with_field("metadata", Arc::new(metadata_array)) | ||
.with_field("value", Arc::new(value_array)) | ||
.with_field("typed_value", Arc::new(typed_value_array)) | ||
|
||
.build(); | ||
|
||
Arc::new(VariantArray::try_new(Arc::new(main_struct)).expect("should create variant array")) | ||
} | ||
/// Helper function to create a shredded variant array representing objects | ||
/// | ||
/// This creates an array that represents: | ||
|
Uh oh!
There was an error while loading. Please reload this page.