Skip to content

Commit 909a01c

Browse files
committed
fix: rename sideload to stage_attachment
1 parent 805db70 commit 909a01c

File tree

8 files changed

+130
-65
lines changed

8 files changed

+130
-65
lines changed

src/query/catalog/src/table_context.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::collections::BTreeMap;
1516
use std::net::SocketAddr;
1617
use std::sync::atomic::AtomicBool;
1718
use std::sync::Arc;
@@ -55,9 +56,9 @@ pub struct ProcessInfo {
5556
}
5657

5758
#[derive(Debug, Clone)]
58-
pub struct SideloadOptions {
59-
pub uri: Option<String>,
60-
pub stage: Option<String>,
59+
pub struct StageAttachment {
60+
pub location: String,
61+
pub params: BTreeMap<String, String>,
6162
}
6263

6364
#[async_trait::async_trait]
@@ -105,5 +106,5 @@ pub trait TableContext: Send + Sync {
105106
async fn get_table(&self, catalog: &str, database: &str, table: &str)
106107
-> Result<Arc<dyn Table>>;
107108
fn get_processes_info(&self) -> Vec<ProcessInfo>;
108-
fn get_sideload(&self) -> Option<SideloadOptions>;
109+
fn get_stage_attachment(&self) -> Option<StageAttachment>;
109110
}

src/query/service/src/interpreters/interpreter_insert_v2.rs

Lines changed: 92 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::collections::BTreeMap;
1516
use std::collections::VecDeque;
1617
use std::io::BufRead;
1718
use std::io::Cursor;
1819
use std::ops::Not;
20+
use std::str::FromStr;
1921
use std::sync::Arc;
2022
use std::time::Instant;
2123

@@ -28,6 +30,7 @@ use common_base::runtime::GlobalIORuntime;
2830
use common_catalog::plan::StageFileStatus;
2931
use common_catalog::plan::StageTableInfo;
3032
use common_catalog::table::AppendMode;
33+
use common_catalog::table_context::StageAttachment;
3134
use common_datablocks::DataBlock;
3235
use common_datavalues::prelude::*;
3336
use common_exception::ErrorCode;
@@ -36,6 +39,9 @@ use common_formats::parse_timezone;
3639
use common_formats::FastFieldDecoderValues;
3740
use common_io::cursor_ext::ReadBytesExt;
3841
use common_io::cursor_ext::ReadCheckPointExt;
42+
use common_meta_types::OnErrorMode;
43+
use common_meta_types::StageFileCompression;
44+
use common_meta_types::StageFileFormatType;
3945
use common_meta_types::UserStageInfo;
4046
use common_pipeline_core::Pipeline;
4147
use common_pipeline_sources::processors::sources::AsyncSource;
@@ -112,11 +118,74 @@ impl InsertInterpreterV2 {
112118
Ok(cast_needed)
113119
}
114120

115-
// TODO:(everpcpc)
121+
fn apply_stage_options(
122+
&self,
123+
stage: &mut UserStageInfo,
124+
params: &BTreeMap<String, String>,
125+
) -> Result<()> {
126+
for (k, v) in params.iter() {
127+
match k.as_str() {
128+
// file format options
129+
"format" => {
130+
let format = StageFileFormatType::from_str(v)?;
131+
stage.file_format_options.format = format;
132+
}
133+
"skip_header" => {
134+
let skip_header = u64::from_str(v)?;
135+
stage.file_format_options.skip_header = skip_header;
136+
}
137+
"field_delimiter" => stage.file_format_options.field_delimiter = v.clone(),
138+
"record_delimiter" => stage.file_format_options.record_delimiter = v.clone(),
139+
"nan_display" => stage.file_format_options.nan_display = v.clone(),
140+
"escape" => stage.file_format_options.escape = v.clone(),
141+
"compression" => {
142+
let compression = StageFileCompression::from_str(v)?;
143+
stage.file_format_options.compression = compression;
144+
}
145+
"row_tag" => stage.file_format_options.row_tag = v.clone(),
146+
"quote" => stage.file_format_options.quote = v.clone(),
147+
148+
// copy options
149+
"on_error" => {
150+
let on_error = OnErrorMode::from_str(v)?;
151+
stage.copy_options.on_error = on_error;
152+
}
153+
"size_limit" => {
154+
let size_limit = usize::from_str(v)?;
155+
stage.copy_options.size_limit = size_limit;
156+
}
157+
"split_size" => {
158+
let split_size = usize::from_str(v)?;
159+
stage.copy_options.split_size = split_size;
160+
}
161+
"purge" => {
162+
let purge = bool::from_str(v).map_err(|_| {
163+
ErrorCode::StrParseError(format!("Cannot parse purge: {} as bool", v))
164+
})?;
165+
stage.copy_options.purge = purge;
166+
}
167+
"single" => {
168+
let single = bool::from_str(v).map_err(|_| {
169+
ErrorCode::StrParseError(format!("Cannot parse single: {} as bool", v))
170+
})?;
171+
stage.copy_options.single = single;
172+
}
173+
"max_file_size" => {
174+
let max_file_size = usize::from_str(v)?;
175+
stage.copy_options.max_file_size = max_file_size;
176+
}
177+
178+
_ => {}
179+
}
180+
}
181+
182+
Ok(())
183+
}
184+
116185
async fn build_insert_from_stage_pipeline(
117186
&self,
118187
table: Arc<dyn Table>,
119-
stage_location: &str,
188+
attachment: Arc<StageAttachment>,
120189
pipeline: &mut Pipeline,
121190
) -> Result<()> {
122191
let start = Instant::now();
@@ -127,7 +196,8 @@ impl InsertInterpreterV2 {
127196
let catalog_name = self.plan.catalog.clone();
128197
let overwrite = self.plan.overwrite;
129198

130-
let (stage_info, path) = parse_stage_location(&self.ctx, stage_location).await?;
199+
let (mut stage_info, path) = parse_stage_location(&self.ctx, &attachment.location).await?;
200+
self.apply_stage_options(&mut stage_info, &attachment.params)?;
131201

132202
let mut stage_table_info = StageTableInfo {
133203
schema: source_schema.clone(),
@@ -140,7 +210,7 @@ impl InsertInterpreterV2 {
140210

141211
let all_source_file_infos = StageTable::list_files(&table_ctx, &stage_table_info).await?;
142212

143-
// TODO: color_copied_files
213+
// TODO:(everpcpc) color_copied_files
144214

145215
let mut need_copied_file_infos = vec![];
146216
for file in &all_source_file_infos {
@@ -149,9 +219,8 @@ impl InsertInterpreterV2 {
149219
}
150220
}
151221

152-
// DEBUG:
153-
tracing::warn!(
154-
"insert: read all sideload files finished, all:{}, need copy:{}, elapsed:{}",
222+
tracing::info!(
223+
"insert: read all stage attachment files finished, all:{}, need copy:{}, elapsed:{}",
155224
all_source_file_infos.len(),
156225
need_copied_file_infos.len(),
157226
start.elapsed().as_secs()
@@ -199,19 +268,20 @@ impl InsertInterpreterV2 {
199268
None => {
200269
let append_entries = ctx.consume_precommit_blocks();
201270
// We must put the commit operation to global runtime, which will avoid the "dispatch dropped without returning error" in tower
202-
return GlobalIORuntime::instance().block_on(async move {
203-
// DEBUG:
204-
tracing::warn!(
271+
GlobalIORuntime::instance().block_on(async move {
272+
tracing::info!(
205273
"insert: try to commit append entries:{}, elapsed:{}",
206274
append_entries.len(),
207275
start.elapsed().as_secs()
208276
);
209277
table
210278
.commit_insertion(ctx, append_entries, overwrite)
211279
.await?;
280+
281+
// TODO:(everpcpc) purge copied files
282+
212283
Ok(())
213-
// TODO: purge copied files
214-
});
284+
})
215285
}
216286
}
217287
});
@@ -273,25 +343,15 @@ impl Interpreter for InsertInterpreterV2 {
273343
.format
274344
.exec_stream(input_context.clone(), &mut build_res.main_pipeline)?;
275345
}
276-
InsertInputSource::Sideload(opts) => {
346+
InsertInputSource::Stage(opts) => {
277347
// DEBUG:
278-
tracing::warn!("==> sideload insert: {:?}", opts);
279-
280-
match &opts.stage {
281-
None => {
282-
return Err(ErrorCode::BadDataValueType(
283-
"No stage location provided".to_string(),
284-
));
285-
}
286-
Some(stage_location) => {
287-
self.build_insert_from_stage_pipeline(
288-
table.clone(),
289-
stage_location,
290-
&mut build_res.main_pipeline,
291-
)
292-
.await?;
293-
}
294-
}
348+
tracing::warn!("==> insert from stage: {:?}", opts);
349+
self.build_insert_from_stage_pipeline(
350+
table.clone(),
351+
opts.clone(),
352+
&mut build_res.main_pipeline,
353+
)
354+
.await?;
295355
return Ok(build_res);
296356
}
297357
InsertInputSource::SelectPlan(plan) => {
@@ -741,7 +801,8 @@ async fn exprs_to_datavalue<'a>(
741801
Ok(datavalues)
742802
}
743803

744-
// FIXME: tmp copy from src/query/sql/src/planner/binder/copy.rs
804+
// TODO:(everpcpc) tmp copy from src/query/sql/src/planner/binder/copy.rs
805+
// move to user stage module
745806
async fn parse_stage_location(
746807
ctx: &Arc<QueryContext>,
747808
location: &str,

src/query/service/src/servers/http/v1/query/http_query.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use common_base::base::tokio;
2121
use common_base::base::tokio::sync::Mutex as TokioMutex;
2222
use common_base::base::tokio::sync::RwLock;
2323
use common_base::runtime::TrySpawn;
24-
use common_catalog::table_context::SideloadOptions;
24+
use common_catalog::table_context::StageAttachment;
2525
use common_exception::ErrorCode;
2626
use common_exception::Result;
2727
use serde::Deserialize;
@@ -60,7 +60,7 @@ pub struct HttpQueryRequest {
6060
pub pagination: PaginationConf,
6161
#[serde(default = "default_as_true")]
6262
pub string_fields: bool,
63-
pub sideload: Option<SideloadConf>,
63+
pub stage_attachment: Option<StageAttachmentConf>,
6464
}
6565

6666
const DEFAULT_MAX_ROWS_IN_BUFFER: usize = 5 * 1000 * 1000;
@@ -144,9 +144,9 @@ impl HttpSessionConf {
144144
}
145145

146146
#[derive(Deserialize, Debug, Clone)]
147-
pub struct SideloadConf {
148-
pub(crate) stage: Option<String>,
149-
pub(crate) url: Option<String>,
147+
pub struct StageAttachmentConf {
148+
pub(crate) location: String,
149+
pub(crate) params: Option<BTreeMap<String, String>>,
150150
}
151151

152152
#[derive(Debug, Clone)]
@@ -237,10 +237,13 @@ impl HttpQuery {
237237
let sql = &request.sql;
238238
tracing::info!("run query_id={id} in session_id={session_id}, sql='{sql}'");
239239

240-
match &request.sideload {
241-
Some(sideload) => ctx.attach_sideload(SideloadOptions {
242-
uri: sideload.url.clone(),
243-
stage: sideload.stage.clone(),
240+
match &request.stage_attachment {
241+
Some(attachment) => ctx.attach_stage(StageAttachment {
242+
location: attachment.location.clone(),
243+
params: match attachment.params {
244+
Some(ref params) => params.clone(),
245+
None => BTreeMap::new(),
246+
},
244247
}),
245248
None => {}
246249
};

src/query/service/src/sessions/query_ctx.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use common_catalog::plan::DataSourcePlan;
3232
use common_catalog::plan::PartInfoPtr;
3333
use common_catalog::plan::Partitions;
3434
use common_catalog::plan::StageTableInfo;
35-
use common_catalog::table_context::SideloadOptions;
35+
use common_catalog::table_context::StageAttachment;
3636
use common_config::DATABEND_COMMIT_VERSION;
3737
use common_datablocks::DataBlock;
3838
use common_datavalues::DataValue;
@@ -193,8 +193,8 @@ impl QueryContext {
193193
self.shared.set_executor(weak_ptr)
194194
}
195195

196-
pub fn attach_sideload(&self, sideload: SideloadOptions) {
197-
self.shared.attach_sideload(sideload);
196+
pub fn attach_stage(&self, attachment: StageAttachment) {
197+
self.shared.attach_stage(attachment);
198198
}
199199

200200
pub fn get_created_time(&self) -> SystemTime {
@@ -360,9 +360,9 @@ impl TableContext for QueryContext {
360360
SessionManager::instance().processes_info()
361361
}
362362

363-
// Get Sideload Options.
364-
fn get_sideload(&self) -> Option<SideloadOptions> {
365-
self.shared.get_sideload()
363+
// Get Stage Attachment.
364+
fn get_stage_attachment(&self) -> Option<StageAttachment> {
365+
self.shared.get_stage_attachment()
366366
}
367367
}
368368

src/query/service/src/sessions/query_ctx_shared.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::time::SystemTime;
2222

2323
use common_base::base::Progress;
2424
use common_base::runtime::Runtime;
25-
use common_catalog::table_context::SideloadOptions;
25+
use common_catalog::table_context::StageAttachment;
2626
use common_config::Config;
2727
use common_datablocks::DataBlock;
2828
use common_exception::ErrorCode;
@@ -79,7 +79,7 @@ pub struct QueryContextShared {
7979
pub(in crate::sessions) data_operator: DataOperator,
8080
pub(in crate::sessions) executor: Arc<RwLock<Weak<PipelineExecutor>>>,
8181
pub(in crate::sessions) precommit_blocks: Arc<RwLock<Vec<DataBlock>>>,
82-
pub(in crate::sessions) sideload_config: Arc<RwLock<Option<SideloadOptions>>>,
82+
pub(in crate::sessions) stage_attachment: Arc<RwLock<Option<StageAttachment>>>,
8383
pub(in crate::sessions) created_time: SystemTime,
8484
}
8585

@@ -109,7 +109,7 @@ impl QueryContextShared {
109109
affect: Arc::new(Mutex::new(None)),
110110
executor: Arc::new(RwLock::new(Weak::new())),
111111
precommit_blocks: Arc::new(RwLock::new(vec![])),
112-
sideload_config: Arc::new(RwLock::new(None)),
112+
stage_attachment: Arc::new(RwLock::new(None)),
113113
created_time: SystemTime::now(),
114114
}))
115115
}
@@ -319,13 +319,13 @@ impl QueryContextShared {
319319
swaped_precommit_blocks
320320
}
321321

322-
pub fn get_sideload(&self) -> Option<SideloadOptions> {
323-
self.sideload_config.read().clone()
322+
pub fn get_stage_attachment(&self) -> Option<StageAttachment> {
323+
self.stage_attachment.read().clone()
324324
}
325325

326-
pub fn attach_sideload(&self, sideload: SideloadOptions) {
327-
let mut sideload_config = self.sideload_config.write();
328-
*sideload_config = Some(sideload);
326+
pub fn attach_stage(&self, attachment: StageAttachment) {
327+
let mut stage_attachment = self.stage_attachment.write();
328+
*stage_attachment = Some(attachment);
329329
}
330330

331331
pub fn get_created_time(&self) -> SystemTime {

src/query/service/tests/it/storages/fuse/operations/commit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use common_catalog::plan::PartInfoPtr;
2727
use common_catalog::plan::Partitions;
2828
use common_catalog::table::Table;
2929
use common_catalog::table_context::ProcessInfo;
30-
use common_catalog::table_context::SideloadOptions;
30+
use common_catalog::table_context::StageAttachment;
3131
use common_catalog::table_context::TableContext;
3232
use common_datablocks::DataBlock;
3333
use common_exception::ErrorCode;
@@ -455,7 +455,7 @@ impl TableContext for CtxDelegation {
455455
todo!()
456456
}
457457

458-
fn get_sideload(&self) -> Option<SideloadOptions> {
458+
fn get_stage_attachment(&self) -> Option<StageAttachment> {
459459
todo!()
460460
}
461461
}

src/query/sql/src/planner/binder/insert.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ impl<'a> Binder {
9696
opts, start, None,
9797
))
9898
}
99-
InsertSource::Values { rest_str } => match self.ctx.get_sideload() {
100-
Some(sideload) => Ok(InsertInputSource::Sideload(Arc::new(sideload))),
99+
InsertSource::Values { rest_str } => match self.ctx.get_stage_attachment() {
100+
Some(attachment) => Ok(InsertInputSource::Stage(Arc::new(attachment))),
101101
None => {
102102
let data = rest_str.trim_end_matches(';').trim_start().to_owned();
103103
Ok(InsertInputSource::Values(data))

0 commit comments

Comments
 (0)