Skip to content

Commit 93ffeb0

Browse files
authored
Merge pull request #9139 from ariesdevil/dev
feat: Split block by default rows
2 parents f2f6537 + 3913ccc commit 93ffeb0

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

src/query/pipeline/sources/src/processors/sources/input_formats/impls/input_format_parquet.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ impl RowGroupInMemory {
280280
)?;
281281
column_chunks.push(array_iters);
282282
}
283+
283284
match RowGroupDeserializer::new(column_chunks, self.meta.num_rows(), None).next() {
284285
None => Err(ErrorCode::Internal(
285286
"deserialize from raw group: fail to get a chunk",
@@ -333,7 +334,21 @@ impl BlockBuilderTrait for ParquetBlockBuilder {
333334
if let Some(rg) = batch.as_mut() {
334335
let chunk = rg.get_arrow_chunk()?;
335336
let block = DataBlock::from_chunk(&self.ctx.schema, &chunk)?;
336-
Ok(vec![block])
337+
338+
let block_total_rows = block.num_rows();
339+
let num_rows_per_block = self.ctx.block_compact_thresholds.max_rows_per_block;
340+
let blocks: Vec<DataBlock> = (0..block_total_rows)
341+
.step_by(num_rows_per_block)
342+
.map(|idx| {
343+
if idx + num_rows_per_block < block_total_rows {
344+
block.slice(idx, num_rows_per_block)
345+
} else {
346+
block.slice(idx, block_total_rows - idx)
347+
}
348+
})
349+
.collect();
350+
351+
Ok(blocks)
337352
} else {
338353
Ok(vec![])
339354
}

0 commit comments

Comments
 (0)