@@ -27,6 +27,7 @@ use databend_common_pipeline_core::processors::Event;
27
27
use databend_common_pipeline_core:: processors:: InputPort ;
28
28
use databend_common_pipeline_core:: processors:: OutputPort ;
29
29
use databend_common_pipeline_core:: processors:: Processor ;
30
+ use databend_common_sql:: IndexType ;
30
31
31
32
pub struct TransformMergeBlock {
32
33
finished : bool ,
@@ -35,90 +36,82 @@ pub struct TransformMergeBlock {
35
36
36
37
input_data : Option < DataBlock > ,
37
38
output_data : Option < DataBlock > ,
38
- left_schema : DataSchemaRef ,
39
- right_schema : DataSchemaRef ,
40
- pairs : Vec < ( String , String ) > ,
39
+ schemas : Vec < DataSchemaRef > ,
40
+ output_cols : Vec < Vec < IndexType > > ,
41
41
42
- receiver : Receiver < DataBlock > ,
43
- receiver_result : Option < DataBlock > ,
42
+ receivers : Vec < ( usize , Receiver < DataBlock > ) > ,
43
+ receiver_results : Vec < ( usize , DataBlock ) > ,
44
44
}
45
45
46
46
impl TransformMergeBlock {
47
47
pub fn try_create (
48
48
input : Arc < InputPort > ,
49
49
output : Arc < OutputPort > ,
50
- left_schema : DataSchemaRef ,
51
- right_schema : DataSchemaRef ,
52
- pairs : Vec < ( String , String ) > ,
53
- receiver : Receiver < DataBlock > ,
50
+ schemas : Vec < DataSchemaRef > ,
51
+ output_cols : Vec < Vec < IndexType > > ,
52
+ receivers : Vec < ( usize , Receiver < DataBlock > ) > ,
54
53
) -> Result < Box < dyn Processor > > {
55
54
Ok ( Box :: new ( TransformMergeBlock {
56
55
finished : false ,
57
56
input,
58
57
output,
59
58
input_data : None ,
60
59
output_data : None ,
61
- left_schema,
62
- right_schema,
63
- pairs,
64
- receiver,
65
- receiver_result : None ,
60
+ schemas,
61
+ output_cols,
62
+ receivers,
63
+ receiver_results : vec ! [ ] ,
66
64
} ) )
67
65
}
68
66
69
- fn project_block ( & self , block : DataBlock , is_left : bool ) -> Result < DataBlock > {
67
+ fn project_block ( & self , block : DataBlock , idx : Option < usize > ) -> Result < DataBlock > {
70
68
let num_rows = block. num_rows ( ) ;
71
- let columns = self
72
- . pairs
73
- . iter ( )
74
- . map ( |( left, right) | {
75
- if is_left {
69
+ let columns = if let Some ( idx) = idx {
70
+ self . check_type ( idx, & block) ?
71
+ } else {
72
+ self . output_cols [ 0 ]
73
+ . iter ( )
74
+ . map ( |idx| {
76
75
Ok ( block
77
- . get_by_offset ( self . left_schema . index_of ( left ) ?)
76
+ . get_by_offset ( self . schemas [ 0 ] . index_of ( & idx . to_string ( ) ) ?)
78
77
. clone ( ) )
79
- } else {
80
- // If block from right, check if block schema matches self scheme(left schema)
81
- // If unmatched, covert block columns types or report error
82
- self . check_type ( left, right, & block)
83
- }
84
- } )
85
- . collect :: < Result < Vec < _ > > > ( ) ?;
78
+ } )
79
+ . collect :: < Result < Vec < _ > > > ( ) ?
80
+ } ;
86
81
Ok ( DataBlock :: new ( columns, num_rows) )
87
82
}
88
83
89
- fn check_type (
90
- & self ,
91
- left_name : & str ,
92
- right_name : & str ,
93
- block : & DataBlock ,
94
- ) -> Result < BlockEntry > {
95
- let left_field = self . left_schema . field_with_name ( left_name) ?;
96
- let left_data_type = left_field. data_type ( ) ;
84
+ fn check_type ( & self , idx : usize , block : & DataBlock ) -> Result < Vec < BlockEntry > > {
85
+ let mut columns = vec ! [ ] ;
86
+ for ( left_idx, right_idx) in self . output_cols [ 0 ] . iter ( ) . zip ( self . output_cols [ idx] . iter ( ) ) {
87
+ let left_field = self . schemas [ 0 ] . field_with_name ( & left_idx. to_string ( ) ) ?;
88
+ let left_data_type = left_field. data_type ( ) ;
97
89
98
- let right_field = self . right_schema . field_with_name ( right_name ) ?;
99
- let right_data_type = right_field. data_type ( ) ;
90
+ let right_field = self . schemas [ idx ] . field_with_name ( & right_idx . to_string ( ) ) ?;
91
+ let right_data_type = right_field. data_type ( ) ;
100
92
101
- let index = self . right_schema . index_of ( right_name) ?;
102
-
103
- if left_data_type == right_data_type {
104
- return Ok ( block. get_by_offset ( index) . clone ( ) ) ;
105
- }
93
+ let offset = self . schemas [ idx] . index_of ( & right_idx. to_string ( ) ) ?;
94
+ if left_data_type == right_data_type {
95
+ columns. push ( block. get_by_offset ( offset) . clone ( ) ) ;
96
+ }
106
97
107
- if left_data_type. remove_nullable ( ) == right_data_type. remove_nullable ( ) {
108
- let origin_column = block. get_by_offset ( index) . clone ( ) ;
109
- let mut builder = ColumnBuilder :: with_capacity ( left_data_type, block. num_rows ( ) ) ;
110
- let value = origin_column. value . as_ref ( ) ;
111
- for idx in 0 ..block. num_rows ( ) {
112
- let scalar = value. index ( idx) . unwrap ( ) ;
113
- builder. push ( scalar) ;
98
+ if left_data_type. remove_nullable ( ) == right_data_type. remove_nullable ( ) {
99
+ let origin_column = block. get_by_offset ( offset) . clone ( ) ;
100
+ let mut builder = ColumnBuilder :: with_capacity ( left_data_type, block. num_rows ( ) ) ;
101
+ let value = origin_column. value . as_ref ( ) ;
102
+ for idx in 0 ..block. num_rows ( ) {
103
+ let scalar = value. index ( idx) . unwrap ( ) ;
104
+ builder. push ( scalar) ;
105
+ }
106
+ let col = builder. build ( ) ;
107
+ columns. push ( BlockEntry :: new ( left_data_type. clone ( ) , Value :: Column ( col) ) ) ;
108
+ } else {
109
+ return Err ( ErrorCode :: IllegalDataType (
110
+ "The data type on both sides of the union does not match" ,
111
+ ) ) ;
114
112
}
115
- let col = builder. build ( ) ;
116
- Ok ( BlockEntry :: new ( left_data_type. clone ( ) , Value :: Column ( col) ) )
117
- } else {
118
- Err ( ErrorCode :: IllegalDataType (
119
- "The data type on both sides of the union does not match" ,
120
- ) )
121
113
}
114
+ Ok ( columns)
122
115
}
123
116
}
124
117
@@ -148,12 +141,7 @@ impl Processor for TransformMergeBlock {
148
141
return Ok ( Event :: NeedConsume ) ;
149
142
}
150
143
151
- if self . input_data . is_some ( ) || self . receiver_result . is_some ( ) {
152
- return Ok ( Event :: Sync ) ;
153
- }
154
-
155
- if let Ok ( result) = self . receiver . try_recv ( ) {
156
- self . receiver_result = Some ( result) ;
144
+ if self . input_data . is_some ( ) || !self . receiver_results . is_empty ( ) {
157
145
return Ok ( Event :: Sync ) ;
158
146
}
159
147
@@ -175,28 +163,25 @@ impl Processor for TransformMergeBlock {
175
163
}
176
164
177
165
fn process ( & mut self ) -> Result < ( ) > {
166
+ let mut blocks = vec ! [ ] ;
167
+ for ( idx, receive_result) in self . receiver_results . iter ( ) {
168
+ blocks. push ( self . project_block ( receive_result. clone ( ) , Some ( * idx) ) ?) ;
169
+ }
170
+ self . receiver_results . clear ( ) ;
178
171
if let Some ( input_data) = self . input_data . take ( ) {
179
- if let Some ( receiver_result) = self . receiver_result . take ( ) {
180
- self . output_data = Some ( DataBlock :: concat ( & [
181
- self . project_block ( input_data, true ) ?,
182
- self . project_block ( receiver_result, false ) ?,
183
- ] ) ?) ;
184
- } else {
185
- self . output_data = Some ( self . project_block ( input_data, true ) ?) ;
186
- }
187
- } else if let Some ( receiver_result) = self . receiver_result . take ( ) {
188
- self . output_data = Some ( self . project_block ( receiver_result, false ) ?) ;
172
+ blocks. push ( self . project_block ( input_data, None ) ?) ;
189
173
}
190
-
174
+ self . output_data = Some ( DataBlock :: concat ( & blocks ) ? ) ;
191
175
Ok ( ( ) )
192
176
}
193
177
194
178
#[ async_backtrace:: framed]
195
179
async fn async_process ( & mut self ) -> Result < ( ) > {
196
180
if !self . finished {
197
- if let Ok ( result) = self . receiver . recv ( ) . await {
198
- self . receiver_result = Some ( result) ;
199
- return Ok ( ( ) ) ;
181
+ for ( idx, receiver) in self . receivers . iter ( ) {
182
+ if let Ok ( result) = receiver. recv ( ) . await {
183
+ self . receiver_results . push ( ( * idx, result) ) ;
184
+ }
200
185
}
201
186
self . finished = true ;
202
187
}
0 commit comments