Skip to content

Commit 2254e89

Browse files
authored
Merge pull request #9188 from BohuTANG/dev-record-delimiter
feat: custom csv record delimiter
2 parents 6334a0e + 42853ce commit 2254e89

File tree

10 files changed

+57
-14
lines changed

10 files changed

+57
-14
lines changed

src/query/formats/src/format_option_checker.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -230,17 +230,25 @@ pub fn check_field_delimiter(option: &mut String, default: &str) -> Result<()> {
230230
Ok(())
231231
}
232232

233+
/// `\r\n` or u8
233234
pub fn check_record_delimiter(option: &mut String) -> Result<()> {
234-
if option.is_empty() {
235-
*option = "\n".to_string()
236-
} else {
237-
let o = option.as_str();
238-
if o != "\n" && o != "\r\n" {
235+
match option.len() {
236+
0 => *option = "\n".to_string(),
237+
1 => {}
238+
2 => {
239+
if option != "\r\n" {
240+
return Err(ErrorCode::InvalidArgument(
241+
"record_delimiter with two chars can only be '\\r\\n'",
242+
));
243+
};
244+
}
245+
_ => {
239246
return Err(ErrorCode::InvalidArgument(
240-
"record_delimiter can only be '\\n' or '\\r\\n'",
247+
"record_delimiter can not more than two chars, please use one char or '\\r\\n'",
241248
));
242-
};
249+
}
243250
}
251+
244252
Ok(())
245253
}
246254

src/query/formats/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,6 @@ pub use field_decoder::*;
2828
pub use file_format_type::parse_timezone;
2929
pub use file_format_type::FileFormatOptionsExt;
3030
pub use file_format_type::FileFormatTypeExt;
31+
pub use format_option_checker::check_record_delimiter;
3132

3233
use crate::common_settings::CommonSettings;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright 2022 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use common_formats::check_record_delimiter;
16+
17+
/// This test code is written by OpenAI's GPT-3.
18+
#[test]
19+
fn test_check_record_delimiter() {
20+
let mut option = "".to_string();
21+
assert!(check_record_delimiter(&mut option).is_ok());
22+
assert_eq!(option, "\n");
23+
24+
let mut option = "|".to_string();
25+
assert!(check_record_delimiter(&mut option).is_ok());
26+
assert_eq!(option, "|");
27+
28+
let mut option = "\r\n".to_string();
29+
assert!(check_record_delimiter(&mut option).is_ok());
30+
assert_eq!(option, "\r\n");
31+
32+
let mut option = "foo".to_string();
33+
assert!(check_record_delimiter(&mut option).is_err());
34+
35+
let mut option = "|\r".to_string();
36+
assert!(check_record_delimiter(&mut option).is_err());
37+
}

src/query/formats/tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use common_meta_types::StageFileFormatType;
2121
use common_settings::Settings;
2222

2323
mod field_encoder;
24+
mod format_option_checker;
2425
mod output_format_json_each_row;
2526
mod output_format_tcsv;
2627
mod output_format_utils;

tests/suites/1_stateful/05_formats/05_02_csv/05_02_04_csv_no_newline.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
1111
b Int NULL
1212
);" | $MYSQL_CLIENT_CONNECT
1313

14-
curl -sH "insert_sql:insert into test_csv format CSV" -F "upload=@${CURDIR}/no_newline.csv" \
14+
curl -sH "insert_sql:insert into test_csv format CSV" -F "upload=@${CURDIR}/testdata/no_newline.csv" \
1515
-u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep -c "SUCCESS"
1616
echo "select * from test_csv" | $MYSQL_CLIENT_CONNECT

tests/suites/1_stateful/05_formats/05_02_csv/05_02_05_csv_with_0x01_delimiter.sh renamed to tests/suites/1_stateful/05_formats/05_02_csv/05_02_05_csv_0x01_field_0x02_record.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
1212
c VARCHAR
1313
);" | $MYSQL_CLIENT_CONNECT
1414

15-
curl -sH "insert_sql:insert into test_x01_csv format CSV" -H "format_field_delimiter:\x01" -H "format_skip_header:1" -H "format_record_delimiter:\r\n" -F "upload=@${CURDIR}/x01_field_delimiter.csv" \
15+
curl -sH "insert_sql:insert into test_x01_csv format CSV" -H "format_field_delimiter:\x01" -H "format_record_delimiter:\x02" -H "format_skip_header:0" -F "upload=@${CURDIR}/testdata/x01_field_x02_record.csv" \
1616
-u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep -c "SUCCESS"
1717
echo "select count() from test_x01_csv" | $MYSQL_CLIENT_CONNECT
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"Donald John Trump"80"president""马斯克"43"特斯拉""测试"39"TT""测试"39"test"

tests/suites/1_stateful/05_formats/05_02_csv/x01_field_delimiter.csv

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)