@@ -17,14 +17,17 @@ use std::io::BufRead;
17
17
use std:: io:: Cursor ;
18
18
use std:: ops:: Not ;
19
19
use std:: sync:: Arc ;
20
+ use std:: time:: Instant ;
20
21
21
22
use aho_corasick:: AhoCorasick ;
22
23
use common_ast:: ast:: Expr ;
23
24
use common_ast:: parser:: parse_comma_separated_exprs;
24
25
use common_ast:: parser:: tokenize_sql;
25
26
use common_ast:: Backtrace ;
26
27
use common_base:: runtime:: GlobalIORuntime ;
28
+ use common_catalog:: plan:: StageTableInfo ;
27
29
use common_catalog:: table:: AppendMode ;
30
+ use common_catalog:: table_context:: StageAttachment ;
28
31
use common_datablocks:: DataBlock ;
29
32
use common_datavalues:: prelude:: * ;
30
33
use common_exception:: ErrorCode ;
@@ -33,19 +36,26 @@ use common_formats::parse_timezone;
33
36
use common_formats:: FastFieldDecoderValues ;
34
37
use common_io:: cursor_ext:: ReadBytesExt ;
35
38
use common_io:: cursor_ext:: ReadCheckPointExt ;
39
+ use common_meta_types:: UserStageInfo ;
40
+ use common_pipeline_core:: Pipeline ;
36
41
use common_pipeline_sources:: processors:: sources:: AsyncSource ;
37
42
use common_pipeline_sources:: processors:: sources:: AsyncSourcer ;
38
43
use common_pipeline_transforms:: processors:: transforms:: Transform ;
39
44
use common_sql:: evaluator:: ChunkOperator ;
40
45
use common_sql:: evaluator:: CompoundChunkOperator ;
46
+ use common_sql:: executor:: table_read_plan:: ToReadDataSourcePlan ;
41
47
use common_sql:: Metadata ;
42
48
use common_sql:: MetadataRef ;
49
+ use common_storages_factory:: Table ;
50
+ use common_storages_stage:: StageTable ;
51
+ use common_users:: UserApiProvider ;
43
52
use parking_lot:: Mutex ;
44
53
use parking_lot:: RwLock ;
45
54
46
55
use crate :: interpreters:: common:: append2table;
47
56
use crate :: interpreters:: Interpreter ;
48
57
use crate :: interpreters:: InterpreterPtr ;
58
+ use crate :: pipelines:: processors:: TransformAddOn ;
49
59
use crate :: pipelines:: PipelineBuildResult ;
50
60
use crate :: pipelines:: SourcePipeBuilder ;
51
61
use crate :: schedulers:: build_query_pipeline;
@@ -101,6 +111,100 @@ impl InsertInterpreterV2 {
101
111
let cast_needed = select_schema != * output_schema;
102
112
Ok ( cast_needed)
103
113
}
114
+
115
+ async fn build_insert_from_stage_pipeline (
116
+ & self ,
117
+ table : Arc < dyn Table > ,
118
+ attachment : Arc < StageAttachment > ,
119
+ pipeline : & mut Pipeline ,
120
+ ) -> Result < ( ) > {
121
+ let start = Instant :: now ( ) ;
122
+ let ctx = self . ctx . clone ( ) ;
123
+ let table_ctx: Arc < dyn TableContext > = ctx. clone ( ) ;
124
+ let source_schema = self . plan . schema ( ) ;
125
+ let target_schema = table. schema ( ) ;
126
+ let catalog_name = self . plan . catalog . clone ( ) ;
127
+ let overwrite = self . plan . overwrite ;
128
+
129
+ let ( mut stage_info, path) = parse_stage_location ( & self . ctx , & attachment. location ) . await ?;
130
+ stage_info. apply_format_options ( & attachment. format_options ) ?;
131
+ stage_info. apply_copy_options ( & attachment. copy_options ) ?;
132
+
133
+ let mut stage_table_info = StageTableInfo {
134
+ schema : source_schema. clone ( ) ,
135
+ user_stage_info : stage_info,
136
+ path : path. to_string ( ) ,
137
+ files : vec ! [ ] ,
138
+ pattern : "" . to_string ( ) ,
139
+ files_to_copy : None ,
140
+ } ;
141
+
142
+ let all_source_file_infos = StageTable :: list_files ( & table_ctx, & stage_table_info) . await ?;
143
+
144
+ tracing:: info!(
145
+ "insert: read all stage attachment files finished: {}, elapsed:{}" ,
146
+ all_source_file_infos. len( ) ,
147
+ start. elapsed( ) . as_secs( )
148
+ ) ;
149
+
150
+ stage_table_info. files_to_copy = Some ( all_source_file_infos. clone ( ) ) ;
151
+ let stage_table = StageTable :: try_create ( stage_table_info. clone ( ) ) ?;
152
+ let read_source_plan = {
153
+ stage_table
154
+ . read_plan_with_catalog ( ctx. clone ( ) , catalog_name, None )
155
+ . await ?
156
+ } ;
157
+
158
+ stage_table. read_data ( table_ctx, & read_source_plan, pipeline) ?;
159
+
160
+ let need_fill_missing_columns = target_schema != source_schema;
161
+ if need_fill_missing_columns {
162
+ pipeline. add_transform ( |transform_input_port, transform_output_port| {
163
+ TransformAddOn :: try_create (
164
+ transform_input_port,
165
+ transform_output_port,
166
+ source_schema. clone ( ) ,
167
+ target_schema. clone ( ) ,
168
+ ctx. clone ( ) ,
169
+ )
170
+ } ) ?;
171
+ }
172
+ table. append_data ( ctx. clone ( ) , pipeline, AppendMode :: Copy , false ) ?;
173
+
174
+ pipeline. set_on_finished ( move |may_error| {
175
+ // capture out variable
176
+ let overwrite = overwrite;
177
+ let ctx = ctx. clone ( ) ;
178
+ let table = table. clone ( ) ;
179
+
180
+ match may_error {
181
+ Some ( error) => {
182
+ tracing:: error!( "insert stage file error: {}" , error) ;
183
+ Err ( may_error. as_ref ( ) . unwrap ( ) . clone ( ) )
184
+ }
185
+ None => {
186
+ let append_entries = ctx. consume_precommit_blocks ( ) ;
187
+ // We must put the commit operation to global runtime, which will avoid the "dispatch dropped without returning error" in tower
188
+ GlobalIORuntime :: instance ( ) . block_on ( async move {
189
+ tracing:: info!(
190
+ "insert: try to commit append entries:{}, elapsed:{}" ,
191
+ append_entries. len( ) ,
192
+ start. elapsed( ) . as_secs( )
193
+ ) ;
194
+ table
195
+ . commit_insertion ( ctx, append_entries, overwrite)
196
+ . await ?;
197
+
198
+ // TODO:(everpcpc) purge copied files
199
+
200
+ Ok ( ( ) )
201
+ } )
202
+ }
203
+ }
204
+ } ) ;
205
+
206
+ Ok ( ( ) )
207
+ }
104
208
}
105
209
106
210
#[ async_trait:: async_trait]
@@ -156,6 +260,16 @@ impl Interpreter for InsertInterpreterV2 {
156
260
. format
157
261
. exec_stream ( input_context. clone ( ) , & mut build_res. main_pipeline ) ?;
158
262
}
263
+ InsertInputSource :: Stage ( opts) => {
264
+ tracing:: info!( "insert: from stage with options {:?}" , opts) ;
265
+ self . build_insert_from_stage_pipeline (
266
+ table. clone ( ) ,
267
+ opts. clone ( ) ,
268
+ & mut build_res. main_pipeline ,
269
+ )
270
+ . await ?;
271
+ return Ok ( build_res) ;
272
+ }
159
273
InsertInputSource :: SelectPlan ( plan) => {
160
274
let table1 = table. clone ( ) ;
161
275
let ( mut select_plan, select_column_bindings) = match plan. as_ref ( ) {
@@ -602,3 +716,26 @@ async fn exprs_to_datavalue<'a>(
602
716
let datavalues: Vec < DataValue > = res. columns ( ) . iter ( ) . skip ( 1 ) . map ( |col| col. get ( 0 ) ) . collect ( ) ;
603
717
Ok ( datavalues)
604
718
}
719
+
720
+ // TODO:(everpcpc) tmp copy from src/query/sql/src/planner/binder/copy.rs
721
+ // move to user stage module
722
+ async fn parse_stage_location (
723
+ ctx : & Arc < QueryContext > ,
724
+ location : & str ,
725
+ ) -> Result < ( UserStageInfo , String ) > {
726
+ let s: Vec < & str > = location. split ( '@' ) . collect ( ) ;
727
+ // @my_ext_stage/abc/
728
+ let names: Vec < & str > = s[ 1 ] . splitn ( 2 , '/' ) . filter ( |v| !v. is_empty ( ) ) . collect ( ) ;
729
+
730
+ let stage = if names[ 0 ] == "~" {
731
+ UserStageInfo :: new_user_stage ( & ctx. get_current_user ( ) ?. name )
732
+ } else {
733
+ UserApiProvider :: instance ( )
734
+ . get_stage ( & ctx. get_tenant ( ) , names[ 0 ] )
735
+ . await ?
736
+ } ;
737
+
738
+ let path = names. get ( 1 ) . unwrap_or ( & "" ) . trim_start_matches ( '/' ) ;
739
+
740
+ Ok ( ( stage, path. to_string ( ) ) )
741
+ }
0 commit comments