-
Notifications
You must be signed in to change notification settings - Fork 981
[Variant] test: add variant object tests with different sizes #7896
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -400,6 +400,8 @@ impl<'m, 'v> VariantObject<'m, 'v> { | |
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::VariantBuilder; | ||
|
||
use super::*; | ||
|
||
#[test] | ||
|
@@ -618,4 +620,116 @@ mod tests { | |
ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..16 from 15-byte buffer") | ||
)); | ||
} | ||
|
||
fn test_variant_object_with_count(count: i32, expected_field_id_size: OffsetSizeBytes) { | ||
let field_names: Vec<_> = (0..count).map(|val| val.to_string()).collect(); | ||
let mut builder = | ||
VariantBuilder::new().with_field_names(field_names.iter().map(|s| s.as_str())); | ||
|
||
let mut obj = builder.new_object(); | ||
|
||
for i in 0..count { | ||
obj.insert(&field_names[i as usize], i); | ||
} | ||
|
||
obj.finish().unwrap(); | ||
let (metadata, value) = builder.finish(); | ||
let variant = Variant::new(&metadata, &value); | ||
|
||
if let Variant::Object(obj) = variant { | ||
assert_eq!(obj.len(), count as usize); | ||
|
||
assert_eq!(obj.get(&field_names[0]).unwrap(), Variant::Int32(0)); | ||
assert_eq!( | ||
obj.get(&field_names[(count - 1) as usize]).unwrap(), | ||
Variant::Int32(count - 1) | ||
); | ||
|
||
let header_byte = first_byte_from_slice(&value).unwrap(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
let header = VariantObjectHeader::try_new(header_byte).unwrap(); | ||
assert_eq!( | ||
header.field_id_size, expected_field_id_size, | ||
"Expected {}-byte field IDs, got {}-byte field IDs", | ||
expected_field_id_size as usize, header.field_id_size as usize | ||
); | ||
} else { | ||
panic!("Expected object variant"); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_257_elements() { | ||
test_variant_object_with_count((1 << 8) + 1, OffsetSizeBytes::Two); // 2^8 + 1, expected 2-byte field IDs | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_65537_elements() { | ||
test_variant_object_with_count((1 << 16) + 1, OffsetSizeBytes::Three); | ||
// 2^16 + 1, expected 3-byte field IDs | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_16777217_elements() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test takes about 45 seconds to run on my laptop. THe next longest test takes less than a second Is there any way to make this faster?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking about something like batch write. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my brief profiling suggests that a large amount of the time was inserting the field names in (basically doing the hash table lookup). I am not sure how we would fix that -- you could try pre-populating the field names in the builder I suppose There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tested pre-populating. let field_names: Vec<String> = (0..count).map(|val| val.to_string()).collect();
let s = field_names.iter().map(|s| s.as_str());
let start = std::time::Instant::now();
let mut builder = VariantBuilder::new().with_field_names(s);
println!("Pre-populating field names: {:?}", start.elapsed()); Pre-populating field names: 20.150840416s There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sadly it still seems to take 45 seconds on my machine |
||
test_variant_object_with_count((1 << 24) + 1, OffsetSizeBytes::Four); | ||
// 2^24 + 1, expected 4-byte field IDs | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_small_sizes_255_elements() { | ||
test_variant_object_with_count(255, OffsetSizeBytes::One); | ||
} | ||
|
||
fn test_variant_object_with_large_data( | ||
data_size_per_field: usize, | ||
expected_field_offset_size: OffsetSizeBytes, | ||
) { | ||
let num_fields = 20; | ||
let mut builder = VariantBuilder::new(); | ||
let mut obj = builder.new_object(); | ||
|
||
let str_val = "a".repeat(data_size_per_field); | ||
|
||
for val in 0..num_fields { | ||
let key = format!("id_{}", val); | ||
obj.insert(&key, str_val.as_str()); | ||
} | ||
|
||
obj.finish().unwrap(); | ||
let (metadata, value) = builder.finish(); | ||
let variant = Variant::new(&metadata, &value); | ||
|
||
if let Variant::Object(obj) = variant { | ||
assert_eq!(obj.len(), num_fields); | ||
let header_byte = first_byte_from_slice(&value).unwrap(); | ||
let header = VariantObjectHeader::try_new(header_byte).unwrap(); | ||
assert_eq!( | ||
header.field_offset_size, expected_field_offset_size, | ||
"Expected {}-byte field offsets, got {}-byte field offsets", | ||
expected_field_offset_size as usize, header.field_offset_size as usize | ||
); | ||
} else { | ||
panic!("Expected object variant"); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_child_data_0_byte_offsets_minus_one() { | ||
test_variant_object_with_large_data(10, OffsetSizeBytes::One); | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_256_bytes_child_data_3_byte_offsets() { | ||
test_variant_object_with_large_data(256 + 1, OffsetSizeBytes::Two); // 2^8 - 2^16 elements | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_16777216_bytes_child_data_4_byte_offsets() { | ||
test_variant_object_with_large_data(65536 + 1, OffsetSizeBytes::Three); // 2^16 - 2^24 elements | ||
} | ||
|
||
#[test] | ||
fn test_variant_object_65535_bytes_child_data_2_byte_offsets() { | ||
test_variant_object_with_large_data(16777216 + 1, OffsetSizeBytes::Four); | ||
// 2^24 | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment in line 411 seems incorrect --
So header byte = 00_0_1_0001 = 0x10
. Maybe we can improve it alsoThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed to 0x11