Skip to content

Commit cb85a9b

Browse files
authored
Merge branch 'main' into feat-insert
2 parents ced20aa + a8ba76b commit cb85a9b

File tree

73 files changed

+896
-293
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+896
-293
lines changed

.github/actions/artifact_upload/action.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ runs:
3939
name: ${{ inputs.profile }}-${{ inputs.sha }}-${{ inputs.target }}-${{ inputs.category }}
4040
path: ./target/${{ inputs.target }}/${{ inputs.profile }}/databend-*
4141

42+
- name: Upload open-sharing to github
43+
if: steps.info.outputs.src == 'github'
44+
uses: actions/upload-artifact@v3
45+
with:
46+
name: ${{ inputs.profile }}-${{ inputs.sha }}-${{ inputs.target }}-${{ inputs.category }}
47+
path: ./target/${{ inputs.target }}/${{ inputs.profile }}/open-sharing
48+
4249
- name: Upload artifacts to s3
4350
if: steps.info.outputs.src == 's3'
4451
shell: bash

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md

Lines changed: 106 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -160,34 +160,78 @@ formatTypeOptions ::=
160160
RECORD_DELIMITER = '<character>'
161161
FIELD_DELIMITER = '<character>'
162162
SKIP_HEADER = <integer>
163+
QUOTE = '<character>'
164+
ESCAPE = '<character>'
165+
NAN_DISPLAY = '<string>'
166+
ROW_TAG = '<string>'
163167
COMPRESSION = AUTO | GZIP | BZ2 | BROTLI | ZSTD | DEFLATE | RAW_DEFLATE | XZ | NONE
164168
```
165169

166-
#### `RECORD_DELIMITER = '<character>'`
170+
#### `TYPE = 'CSV'`
167171

168-
Description: One character that separate records in an input file.
172+
Comma Separated Values format ([RFC](https://www.rfc-editor.org/rfc/rfc4180)).
169173

170-
Default: `'\n'`
174+
some notice:
171175

172-
#### `FIELD_DELIMITER = '<character>'`
176+
1. a string field contains `Quote`|`Escape`|`RECORD_DELIMITER`|`RECORD_DELIMITER` must be quoted.
177+
2. no character is escaped except `Quote` in quoted string.
178+
3. no space between `FIELD_DELIMITER` and `Quote`.
179+
4. no trailing `FIELD_DELIMITER` for a record.
180+
5. Array/Struct field is serialized to a string as in SQL, and then the resulting string is output to CSV in quotes.
181+
6. if you are generating CSV via programing, we highly recommend you to use the CSV lib of the programing language.
182+
7. for text file unloaded from [MySQL](https://dev.mysql.com/doc/refman/8.0/en/load-data.html), the default format is
183+
TSV in databend. it is valid CSV only if `ESCAPED BY` is empty and `ENCLOSED BY` is not empty.
173184

174-
Description: One character that separate fields in an input file.
185+
##### `RECORD_DELIMITER = '<character>'`
175186

176-
Default: `','` (comma)
187+
**Description**: One character that separate records in an input file.
188+
**Supported Values**: `\r\n` or One character including escaped char: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\xHH`
189+
**Default**: `\n`
177190

178-
#### `SKIP_HEADER = '<integer>'`
191+
##### `FIELD_DELIMITER = '<character>'`
179192

180-
Description: Number of lines at the start of the file to skip.
193+
**Description**: One character that separate fields in an input file.
194+
**Supported Values**: One character only, including escaped char: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\xHH`
195+
**Default**: `,` (comma)
181196

182-
Default: `0`
197+
##### `Quote = '<character>'`
183198

184-
#### `COMPRESSION = AUTO | GZIP | BZ2 | BROTLI | ZSTD | DEFLATE | RAW_DEFLATE | XZ | NONE`
199+
**Description**: One character to quote strings in CSV file.
185200

186-
Description: String that represents the compression algorithm.
201+
for data loading, quote is not necessary unless a string contains `Quote`|`Escape`|`RECORD_DELIMITER`|`RECORD_DELIMITER`
187202

188-
Default: `NONE`
203+
**Supported Values**: `\'` or `\"`.
189204

190-
Values:
205+
**Default**: `\"`
206+
207+
##### `ESCAPE = '<character>'`
208+
209+
**Description**: One character to escape quote in quoted strings.
210+
211+
**Supported Values**: `\'` or `\"` or `\\`.
212+
213+
**Default**: `\"`
214+
215+
##### `SKIP_HEADER = '<integer>'`
216+
217+
**Use**: Data loading only.
218+
219+
**Description**: Number of lines at the start of the file to skip.
220+
221+
**Default**: `0`
222+
223+
##### `NAN_DISPLAY = '<string>'`
224+
225+
**Supported Values**: must be literal `'nan'` or `'null'` (case-insensitive)
226+
**Default**: `'NaN'`
227+
228+
##### `COMPRESSION = AUTO | GZIP | BZ2 | BROTLI | ZSTD | DEFLATE | RAW_DEFLATE | XZ | NONE`
229+
230+
**Description**: String that represents the compression algorithm.
231+
232+
**Default**: `NONE`
233+
234+
**Supported Values**:
191235

192236
| Values | Notes |
193237
| ------------- | --------------------------------------------------------------- |
@@ -201,6 +245,55 @@ Values:
201245
| `XZ` | |
202246
| `NONE` | Indicates that the files have not been compressed. |
203247

248+
#### `TYPE = 'TSV'`
249+
250+
1. these characters are escaped: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\\`, `\'`, `RECORD_DELIMITER`,`FIELD_DELIMITER`.
251+
2. quoting/enclosing is not support now.
252+
3. Array/Struct field is serialized to a string as in SQL, and then the resulting string is output to CSV in quotes.
253+
4. Null is serialized as `\N`
254+
255+
##### `RECORD_DELIMITER = '<character>'`
256+
257+
**Description**: One character that separate records in an input file.
258+
259+
**Supported Values**: `\r\n` or One character including escaped char: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\xHH`
260+
261+
**Default**: `\n`
262+
263+
##### `FIELD_DELIMITER = '<character>'`
264+
265+
**Description**: One character that separate fields in an input file.
266+
267+
**Supported Values**: One character only, including escaped char: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\xHH`
268+
269+
**Default**: `\t` (TAB)
270+
271+
##### `COMPRESSION = AUTO | GZIP | BZ2 | BROTLI | ZSTD | DEFLATE | RAW_DEFLATE | XZ | NONE`
272+
273+
same as `COMPRESSION` in `TYPE = 'CSV'`
274+
275+
#### `TYPE = 'NDJSON'`
276+
277+
##### `COMPRESSION = AUTO | GZIP | BZ2 | BROTLI | ZSTD | DEFLATE | RAW_DEFLATE | XZ | NONE`
278+
279+
same as `COMPRESSION` in `TYPE = 'CSV'`
280+
281+
#### `TYPE = 'XML'`
282+
283+
##### `COMPRESSION = AUTO | GZIP | BZ2 | BROTLI | ZSTD | DEFLATE | RAW_DEFLATE | XZ | NONE`
284+
285+
same as `COMPRESSION` in `TYPE = 'CSV'`
286+
287+
##### `ROW_TAG` = `<string>`
288+
289+
**Description**: used to select XML elements to be decoded as a record.
290+
291+
**Default**: `'row'`
292+
293+
#### `TYPE = 'Parquet'`
294+
295+
No options available now.
296+
204297
### copyOptions
205298

206299
```

src/common/base/src/base/singleton_instance.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl SingletonType {
5656
let guard = LOCAL.wait().read();
5757
let v: &T = guard
5858
.get(&thread_name)
59-
.unwrap_or_else(|| panic!("thread {thread_name} is not initiated"))
59+
.unwrap_or_else(|| panic!("thread {thread_name} is not initiated, don't worry if we are in dropping"))
6060
.get();
6161
v.clone()
6262
}
@@ -75,7 +75,7 @@ impl SingletonType {
7575
let guard = LOCAL.wait().read();
7676
guard
7777
.get(&thread_name)
78-
.unwrap_or_else(|| panic!("thread {thread_name} is not initiated"))
78+
.unwrap_or_else(|| panic!("thread {thread_name} is not initiated, don't worry if we are in dropping"))
7979
.set(value)
8080
}
8181
}

src/common/base/src/mem_allocator/mmap_allocator.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,14 @@ pub mod linux {
5151
#[inline(always)]
5252
fn mmap_alloc(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
5353
debug_assert!(layout.align() <= page_size());
54+
ThreadTracker::alloc(layout.size() as i64);
5455
const PROT: i32 = libc::PROT_READ | libc::PROT_WRITE;
5556
const FLAGS: i32 = libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_POPULATE;
5657
let addr = unsafe { libc::mmap(null_mut(), layout.size(), PROT, FLAGS, -1, 0) };
5758
if addr == libc::MAP_FAILED {
5859
return Err(AllocError);
5960
}
6061
let addr = NonNull::new(addr as *mut ()).ok_or(AllocError)?;
61-
ThreadTracker::alloc(layout.size() as i64);
6262
Ok(NonNull::<[u8]>::from_raw_parts(addr, layout.size()))
6363
}
6464

src/common/storage/src/config.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ pub struct StorageMokaConfig {
451451
}
452452

453453
impl Default for StorageMokaConfig {
454+
#[no_sanitize(address)]
454455
fn default() -> Self {
455456
Self {
456457
// Use 1G as default.

src/common/storage/src/lib.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
//! - Table snapshots, segments cache must be stored accessed via cache operator.
2929
//! - Intermediate data generated by query could be stored by temporary operator.
3030
31+
#![feature(no_sanitize)]
32+
3133
mod config;
3234
pub use config::CacheConfig;
3335
pub use config::ShareTableConfig;
@@ -55,10 +57,6 @@ pub use operator::init_operator;
5557
pub use operator::CacheOperator;
5658
pub use operator::DataOperator;
5759

58-
mod location;
59-
pub use location::parse_uri_location;
60-
pub use location::UriLocation;
61-
6260
mod metrics;
6361
pub use metrics::StorageMetrics;
6462
pub use metrics::StorageMetricsLayer;

src/meta/api/src/schema_api_impl.rs

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use common_meta_app::schema::DatabaseIdent;
3333
use common_meta_app::schema::DatabaseInfo;
3434
use common_meta_app::schema::DatabaseMeta;
3535
use common_meta_app::schema::DatabaseNameIdent;
36+
use common_meta_app::schema::DatabaseType;
3637
use common_meta_app::schema::DbIdList;
3738
use common_meta_app::schema::DbIdListKey;
3839
use common_meta_app::schema::DropDatabaseReply;
@@ -1602,6 +1603,11 @@ impl<KV: KVApi> SchemaApi for KV {
16021603
"get_table"
16031604
);
16041605

1606+
let db_type = match db_meta.from_share {
1607+
Some(share_ident) => DatabaseType::ShareDB(share_ident),
1608+
None => DatabaseType::NormalDB,
1609+
};
1610+
16051611
let tb_info = TableInfo {
16061612
ident: TableIdent {
16071613
table_id: tbid.table_id,
@@ -1612,7 +1618,7 @@ impl<KV: KVApi> SchemaApi for KV {
16121618
// Safe unwrap() because: tb_meta_seq > 0
16131619
meta: tb_meta.unwrap(),
16141620
tenant: req.tenant.clone(),
1615-
from_share: db_meta.from_share.clone(),
1621+
db_type,
16161622
};
16171623

16181624
return Ok(Arc::new(tb_info));
@@ -1698,6 +1704,11 @@ impl<KV: KVApi> SchemaApi for KV {
16981704
table_name: table_id_list_key.table_name.clone(),
16991705
};
17001706

1707+
let db_type = match db_meta.from_share.clone() {
1708+
Some(share_ident) => DatabaseType::ShareDB(share_ident),
1709+
None => DatabaseType::NormalDB,
1710+
};
1711+
17011712
let tb_info = TableInfo {
17021713
ident: TableIdent {
17031714
table_id: *table_id,
@@ -1707,7 +1718,7 @@ impl<KV: KVApi> SchemaApi for KV {
17071718
name: table_id_list_key.table_name.clone(),
17081719
meta: tb_meta,
17091720
tenant: tenant_dbname.tenant.clone(),
1710-
from_share: db_meta.from_share.clone(),
1721+
db_type,
17111722
};
17121723

17131724
tb_info_list.push(Arc::new(tb_info));
@@ -2633,7 +2644,7 @@ async fn get_tableinfos_by_ids(
26332644
ids: &[u64],
26342645
tenant_dbname: &DatabaseNameIdent,
26352646
dbid_tbnames_opt: Option<Vec<DBIdTableName>>,
2636-
share_name: Option<ShareNameIdent>,
2647+
db_type: DatabaseType,
26372648
) -> Result<Vec<Arc<TableInfo>>, KVAppError> {
26382649
let mut tb_meta_keys = Vec::with_capacity(ids.len());
26392650
for id in ids.iter() {
@@ -2671,7 +2682,7 @@ async fn get_tableinfos_by_ids(
26712682
meta: tb_meta,
26722683
name: tbnames[i].clone(),
26732684
tenant: tenant_dbname.tenant.clone(),
2674-
from_share: share_name.clone(),
2685+
db_type: db_type.clone(),
26752686
};
26762687
tb_infos.push(Arc::new(tb_info));
26772688
} else {
@@ -2700,7 +2711,14 @@ async fn list_tables_from_unshare_db(
27002711

27012712
let (dbid_tbnames, ids) = list_u64_value(kv_api, &dbid_tbname).await?;
27022713

2703-
get_tableinfos_by_ids(kv_api, &ids, tenant_dbname, Some(dbid_tbnames), None).await
2714+
get_tableinfos_by_ids(
2715+
kv_api,
2716+
&ids,
2717+
tenant_dbname,
2718+
Some(dbid_tbnames),
2719+
DatabaseType::NormalDB,
2720+
)
2721+
.await
27042722
}
27052723

27062724
async fn list_tables_from_share_db(
@@ -2739,5 +2757,12 @@ async fn list_tables_from_share_db(
27392757
ids.push(table_id);
27402758
}
27412759
}
2742-
get_tableinfos_by_ids(kv_api, &ids, tenant_dbname, None, Some(share)).await
2760+
get_tableinfos_by_ids(
2761+
kv_api,
2762+
&ids,
2763+
tenant_dbname,
2764+
None,
2765+
DatabaseType::ShareDB(share),
2766+
)
2767+
.await
27432768
}

src/meta/api/src/schema_api_test_suite.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use common_meta_app::schema::DatabaseIdToName;
3030
use common_meta_app::schema::DatabaseInfo;
3131
use common_meta_app::schema::DatabaseMeta;
3232
use common_meta_app::schema::DatabaseNameIdent;
33+
use common_meta_app::schema::DatabaseType;
3334
use common_meta_app::schema::DbIdList;
3435
use common_meta_app::schema::DbIdListKey;
3536
use common_meta_app::schema::DropDatabaseReq;
@@ -3533,7 +3534,7 @@ impl SchemaApiTestSuite {
35333534
assert_eq!(table_info.name, tb1.to_string());
35343535
assert_eq!(table_info.ident.table_id, share_table_id);
35353536
assert_eq!(table_info.tenant, tenant2.to_string());
3536-
assert_eq!(table_info.from_share, Some(share_name.clone()));
3537+
assert_eq!(table_info.db_type, DatabaseType::ShareDB(share_name));
35373538
}
35383539

35393540
info!("--- get tables from share db");

src/meta/app/src/schema/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub use table::CountTablesReq;
4747
pub use table::CreateTableReply;
4848
pub use table::CreateTableReq;
4949
pub use table::DBIdTableName;
50+
pub use table::DatabaseType;
5051
pub use table::DropTableReply;
5152
pub use table::DropTableReq;
5253
pub use table::GetTableCopiedFileReply;

0 commit comments

Comments
 (0)