Skip to content

Commit 2b1edde

Browse files
committed
fixup! Add tests for BatchCoalescer::push_batch_with_filter, fix bug (apache#7774)
1 parent f5b0465 commit 2b1edde

File tree

1 file changed

+19
-30
lines changed

1 file changed

+19
-30
lines changed

parquet-variant/src/builder.rs

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -545,41 +545,30 @@ impl ParentState<'_> {
545545
metadata_builder,
546546
fields,
547547
field_name,
548+
object_start_offset,
548549
..
549550
} => {
550551
let field_id = metadata_builder.upsert_field_name(field_name);
551-
fields.insert(field_id, starting_offset);
552+
let shifted_start_offset = starting_offset - *object_start_offset;
553+
fields.insert(field_id, shifted_start_offset);
552554
}
553555
}
554556
}
555557

556-
// returns the beginning offset of buffer for the parent if it is object builder, else 0.
557-
// for object builder will reuse the buffer from the parent, this is needed for `finish`
558-
// which needs the relative offset from the current variant.
559-
fn object_start_offset(&self) -> usize {
560-
match self {
561-
ParentState::Object {
562-
object_start_offset,
563-
..
564-
} => *object_start_offset,
565-
_ => 0,
566-
}
567-
}
568-
569558
/// Return mutable references to the buffer and metadata builder that this
570559
/// parent state is using.
571560
fn buffer_and_metadata_builder(&mut self) -> (&mut ValueBuffer, &mut MetadataBuilder) {
572561
match self {
573562
ParentState::Variant {
574563
buffer,
575564
metadata_builder,
576-
} => (buffer, metadata_builder),
577-
ParentState::List {
565+
}
566+
| ParentState::List {
578567
buffer,
579568
metadata_builder,
580569
..
581-
} => (buffer, metadata_builder),
582-
ParentState::Object {
570+
}
571+
| ParentState::Object {
583572
buffer,
584573
metadata_builder,
585574
..
@@ -590,9 +579,9 @@ impl ParentState<'_> {
590579
// return the offset of the underlying buffer at the time of calling this method.
591580
fn buffer_current_offset(&self) -> usize {
592581
match self {
593-
ParentState::Variant { buffer, .. } => buffer.offset(),
594-
ParentState::Object { buffer, .. } => buffer.offset(),
595-
ParentState::List { buffer, .. } => buffer.offset(),
582+
ParentState::Variant { buffer, .. }
583+
| ParentState::Object { buffer, .. }
584+
| ParentState::List { buffer, .. } => buffer.offset(),
596585
}
597586
}
598587
}
@@ -1043,11 +1032,10 @@ impl<'a> ListBuilder<'a> {
10431032
let offset_size = int_size(data_size);
10441033

10451034
// Get parent's buffer
1046-
let offset_shift = self.parent_state.object_start_offset();
10471035
let parent_buffer = self.parent_state.buffer();
10481036
// as object builder has been reused the parent buffer,
10491037
// we need to shift the offset by the starting offset of the parent object
1050-
let starting_offset = parent_buffer.offset() - offset_shift;
1038+
let starting_offset = parent_buffer.offset();
10511039

10521040
// Write header
10531041
let header = array_header(is_large, offset_size);
@@ -1217,7 +1205,10 @@ impl<'a> ObjectBuilder<'a> {
12171205

12181206
// Shift existing data to make room for the header
12191207
let buffer = parent_buffer.inner_mut();
1220-
buffer.splice(starting_offset..starting_offset, vec![0u8; header_size]);
1208+
buffer.splice(
1209+
starting_offset..starting_offset,
1210+
std::iter::repeat_n(0u8, header_size),
1211+
);
12211212

12221213
// Write header at the original start position
12231214
let mut header_pos = starting_offset;
@@ -1237,15 +1228,15 @@ impl<'a> ObjectBuilder<'a> {
12371228
}
12381229

12391230
// Write field IDs
1240-
for (&field_id, _) in &self.fields {
1241-
let id_bytes = (field_id as usize).to_le_bytes();
1231+
for field_id in self.fields.keys() {
1232+
let id_bytes = field_id.to_le_bytes();
12421233
buffer[header_pos..header_pos + id_size as usize]
12431234
.copy_from_slice(&id_bytes[..id_size as usize]);
12441235
header_pos += id_size as usize;
12451236
}
12461237

12471238
// Write field offsets (adjusted for header)
1248-
for (_, &relative_offset) in &self.fields {
1239+
for relative_offset in self.fields.values() {
12491240
let offset_bytes = relative_offset.to_le_bytes();
12501241
buffer[header_pos..header_pos + offset_size as usize]
12511242
.copy_from_slice(&offset_bytes[..offset_size as usize]);
@@ -1257,9 +1248,7 @@ impl<'a> ObjectBuilder<'a> {
12571248
buffer[header_pos..header_pos + offset_size as usize]
12581249
.copy_from_slice(&data_size_bytes[..offset_size as usize]);
12591250

1260-
let start_offset_shift = self.parent_state.object_start_offset();
1261-
self.parent_state
1262-
.finish(starting_offset - start_offset_shift);
1251+
self.parent_state.finish(starting_offset);
12631252

12641253
// mark that this object has been finished
12651254
self.has_been_finished = true;

0 commit comments

Comments
 (0)