Skip to content

Commit 9cbe207

Browse files
committed
build_bounded_merge_sort
1 parent c5db85b commit 9cbe207

File tree

4 files changed

+128
-18
lines changed

4 files changed

+128
-18
lines changed

src/query/service/src/pipelines/builders/builder_sort.rs

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ use databend_common_exception::Result;
1919
use databend_common_expression::DataSchemaRef;
2020
use databend_common_expression::LimitType;
2121
use databend_common_expression::SortColumnDescription;
22+
use databend_common_pipeline_core::processors::InputPort;
23+
use databend_common_pipeline_core::processors::OutputPort;
2224
use databend_common_pipeline_core::processors::ProcessorPtr;
25+
use databend_common_pipeline_core::Pipe;
26+
use databend_common_pipeline_core::PipeItem;
2327
use databend_common_pipeline_core::Pipeline;
2428
use databend_common_pipeline_transforms::processors::add_k_way_merge_sort;
2529
use databend_common_pipeline_transforms::processors::sort::utils::add_order_field;
@@ -38,6 +42,7 @@ use databend_storages_common_cache::TempDirManager;
3842

3943
use crate::pipelines::memory_settings::MemorySettingsExt;
4044
use crate::pipelines::processors::transforms::add_range_shuffle_route;
45+
use crate::pipelines::processors::transforms::BoundedMergeSortBuilder;
4146
use crate::pipelines::processors::transforms::SortInjector;
4247
use crate::pipelines::processors::transforms::SortRangeExchange;
4348
use crate::pipelines::processors::transforms::TransformLimit;
@@ -156,7 +161,7 @@ impl PipelineBuilder {
156161
self.build_pipeline(&sort.input)?;
157162
}
158163

159-
self.main_pipeline.resize(1, false)
164+
builder.build_bounded_merge_sort(&mut self.main_pipeline)
160165
}
161166
SortStep::Route => self.main_pipeline.resize(1, false),
162167
}
@@ -253,7 +258,7 @@ impl SortPipelineBuilder {
253258
let memory_settings = MemorySettings::from_sort_settings(&self.ctx)?;
254259
let enable_loser_tree = settings.get_enable_loser_tree_merge_sort()?;
255260

256-
let builder = TransformSortBuilder::create(
261+
let builder = TransformSortBuilder::new(
257262
self.schema.clone(),
258263
self.sort_desc.clone(),
259264
max_block_size,
@@ -325,7 +330,7 @@ impl SortPipelineBuilder {
325330
let memory_settings = MemorySettings::from_sort_settings(&self.ctx)?;
326331
let enable_loser_tree = settings.get_enable_loser_tree_merge_sort()?;
327332

328-
let builder = TransformSortBuilder::create(
333+
let builder = TransformSortBuilder::new(
329334
self.schema.clone(),
330335
self.sort_desc.clone(),
331336
max_block_size,
@@ -377,12 +382,10 @@ impl SortPipelineBuilder {
377382
// Merge sort
378383
let need_multi_merge = pipeline.output_len() > 1;
379384
let output_order_col = need_multi_merge || !self.remove_order_col_at_last;
380-
debug_assert!(if order_col_generated {
385+
debug_assert!(
381386
// If `order_col_generated`, it means this transform is the last processor in the distributed sort pipeline.
382-
!output_order_col
383-
} else {
384-
true
385-
});
387+
!order_col_generated || !output_order_col
388+
);
386389

387390
let memory_settings = MemorySettings::from_sort_settings(&self.ctx)?;
388391
let sort_merge_output_schema = match output_order_col {
@@ -414,7 +417,7 @@ impl SortPipelineBuilder {
414417
};
415418

416419
pipeline.add_transform(|input, output| {
417-
let builder = TransformSortBuilder::create(
420+
let builder = TransformSortBuilder::new(
418421
sort_merge_output_schema.clone(),
419422
self.sort_desc.clone(),
420423
self.block_size,
@@ -476,4 +479,32 @@ impl SortPipelineBuilder {
476479
pub fn exchange_injector(&self) -> Arc<dyn ExchangeInjector> {
477480
Arc::new(SortInjector {})
478481
}
482+
483+
pub fn build_bounded_merge_sort(self, pipeline: &mut Pipeline) -> Result<()> {
484+
let inputs_port: Vec<_> = (0..pipeline.output_len())
485+
.map(|_| InputPort::create())
486+
.collect();
487+
let output_port = OutputPort::create();
488+
489+
let processor = ProcessorPtr::create(
490+
BoundedMergeSortBuilder::new(
491+
inputs_port.clone(),
492+
output_port.clone(),
493+
self.schema.clone(),
494+
self.sort_desc.clone(),
495+
self.block_size,
496+
self.limit,
497+
self.remove_order_col_at_last,
498+
self.enable_loser_tree,
499+
)
500+
.build()?,
501+
);
502+
503+
pipeline.add_pipe(Pipe::create(inputs_port.len(), 1, vec![PipeItem::create(
504+
processor,
505+
inputs_port,
506+
vec![output_port],
507+
)]));
508+
Ok(())
509+
}
479510
}

src/query/service/src/pipelines/processors/transforms/sort/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,14 @@ mod sort_route;
3939
mod sort_shuffle;
4040
mod sort_spill;
4141

42+
pub use merge_sort::*;
4243
pub use sort_builder::*;
44+
pub use sort_collect::*;
45+
pub use sort_combine::*;
4346
pub use sort_exchange::*;
4447
pub use sort_exchange_injector::*;
48+
pub use sort_merge_stream::*;
49+
pub use sort_restore::*;
4550
pub use sort_route::*;
4651
pub use sort_shuffle::*;
4752

src/query/service/src/pipelines/processors/transforms/sort/sort_builder.rs

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,7 @@ use databend_common_pipeline_transforms::sort::RowsTypeVisitor;
3434
use databend_common_pipeline_transforms::AccumulatingTransformer;
3535
use databend_common_pipeline_transforms::MemorySettings;
3636

37-
use super::merge_sort::TransformSort;
38-
use super::sort_collect::TransformSortCollect;
39-
use super::sort_combine::TransformSortCombine;
40-
use super::sort_restore::TransformSortRestore;
41-
use super::sort_shuffle::SortSampleState;
42-
use super::sort_shuffle::TransformSortBoundBroadcast;
43-
use super::Base;
37+
use super::*;
4438
use crate::sessions::QueryContext;
4539
use crate::spillers::Spiller;
4640

@@ -65,7 +59,7 @@ pub struct TransformSortBuilder {
6559
}
6660

6761
impl TransformSortBuilder {
68-
pub fn create(
62+
pub fn new(
6963
schema: DataSchemaRef,
7064
sort_desc: Arc<[SortColumnDescription]>,
7165
block_size: usize,
@@ -357,3 +351,83 @@ impl RowsTypeVisitor for Build<'_> {
357351
}
358352
}
359353
}
354+
355+
pub struct BoundedMergeSortBuilder {
356+
inputs: Vec<Arc<InputPort>>,
357+
output: Arc<OutputPort>,
358+
schema: DataSchemaRef,
359+
sort_desc: Arc<[SortColumnDescription]>,
360+
block_size: usize,
361+
limit: Option<usize>,
362+
remove_order_col: bool,
363+
enable_loser_tree: bool,
364+
}
365+
366+
impl BoundedMergeSortBuilder {
367+
pub fn new(
368+
inputs: Vec<Arc<InputPort>>,
369+
output: Arc<OutputPort>,
370+
schema: DataSchemaRef,
371+
sort_desc: Arc<[SortColumnDescription]>,
372+
block_size: usize,
373+
limit: Option<usize>,
374+
remove_order_col: bool,
375+
enable_loser_tree: bool,
376+
) -> Self {
377+
Self {
378+
inputs,
379+
output,
380+
schema,
381+
sort_desc,
382+
block_size,
383+
limit,
384+
remove_order_col,
385+
enable_loser_tree,
386+
}
387+
}
388+
389+
pub fn build(mut self) -> Result<Box<dyn Processor>> {
390+
select_row_type(&mut self)
391+
}
392+
}
393+
394+
impl RowsTypeVisitor for BoundedMergeSortBuilder {
395+
type Result = Result<Box<dyn Processor>>;
396+
397+
fn schema(&self) -> DataSchemaRef {
398+
self.schema.clone()
399+
}
400+
401+
fn sort_desc(&self) -> &[SortColumnDescription] {
402+
&self.sort_desc
403+
}
404+
405+
fn visit_type<R, C>(&mut self) -> Self::Result
406+
where
407+
R: Rows + 'static,
408+
C: RowConverter<R> + Send + 'static,
409+
{
410+
match self.enable_loser_tree {
411+
true => Ok(Box::new(
412+
BoundedMultiSortMergeProcessor::<LoserTreeSort<R>>::new(
413+
self.inputs.clone(),
414+
self.output.clone(),
415+
self.schema.clone(),
416+
self.block_size,
417+
self.limit,
418+
self.remove_order_col,
419+
)?,
420+
)),
421+
false => Ok(Box::new(
422+
BoundedMultiSortMergeProcessor::<HeapSort<R>>::new(
423+
self.inputs.clone(),
424+
self.output.clone(),
425+
self.schema.clone(),
426+
self.block_size,
427+
self.limit,
428+
self.remove_order_col,
429+
)?,
430+
)),
431+
}
432+
}
433+
}

src/query/service/src/pipelines/processors/transforms/sort/sort_merge_stream.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ where A: SortAlgorithm
5353
impl<A> BoundedMultiSortMergeProcessor<A>
5454
where A: SortAlgorithm
5555
{
56-
pub fn create(
56+
pub fn new(
5757
inputs: Vec<Arc<InputPort>>,
5858
output: Arc<OutputPort>,
5959
schema: DataSchemaRef,

0 commit comments

Comments
 (0)