Skip to content

Commit 3c98535

Browse files
authored
feat: ndjson support option null_if. (#14938)
1 parent e771569 commit 3c98535

File tree

15 files changed

+164
-11
lines changed

15 files changed

+164
-11
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/meta/app/src/principal/file_format.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ const OPT_ROW_TAG: &str = "row_tag";
4141
const OPT_ERROR_ON_COLUMN_COUNT_MISMATCH: &str = "error_on_column_count_mismatch";
4242
const MISSING_FIELD_AS: &str = "missing_field_as";
4343
const NULL_FIELD_AS: &str = "null_field_as";
44+
const NULL_IF: &str = "null_if";
4445
const OPT_EMPTY_FIELD_AS: &str = "empty_field_as";
4546
const OPT_BINARY_FORMAT: &str = "binary_format";
4647

@@ -184,10 +185,24 @@ impl FileFormatParams {
184185
let compression = ast.take_compression()?;
185186
let missing_field_as = ast.options.remove(MISSING_FIELD_AS);
186187
let null_field_as = ast.options.remove(NULL_FIELD_AS);
188+
let null_if = ast.options.remove(NULL_IF);
189+
let null_if = match null_if {
190+
None => {
191+
vec![]
192+
}
193+
Some(s) => {
194+
let values: Vec<String> = serde_json::from_str(&s).map_err(|_|
195+
ErrorCode::InvalidArgument(format!(
196+
"Invalid option value: NULL_IF is currently set to {s} (in JSON). The valid values are a list of strings."
197+
)))?;
198+
values
199+
}
200+
};
187201
FileFormatParams::NdJson(NdJsonFileFormatParams::try_create(
188202
compression,
189203
missing_field_as.as_deref(),
190204
null_field_as.as_deref(),
205+
null_if,
191206
)?)
192207
}
193208
StageFileFormatType::Parquet => {
@@ -586,13 +601,15 @@ pub struct NdJsonFileFormatParams {
586601
pub compression: StageFileCompression,
587602
pub missing_field_as: NullAs,
588603
pub null_field_as: NullAs,
604+
pub null_if: Vec<String>,
589605
}
590606

591607
impl NdJsonFileFormatParams {
592608
pub fn try_create(
593609
compression: StageFileCompression,
594610
missing_field_as: Option<&str>,
595611
null_field_as: Option<&str>,
612+
null_if: Vec<String>,
596613
) -> Result<Self> {
597614
let missing_field_as = NullAs::parse(missing_field_as, MISSING_FIELD_AS, NullAs::Error)?;
598615
let null_field_as = NullAs::parse(null_field_as, MISSING_FIELD_AS, NullAs::Null)?;
@@ -605,6 +622,7 @@ impl NdJsonFileFormatParams {
605622
compression,
606623
missing_field_as,
607624
null_field_as,
625+
null_if,
608626
})
609627
}
610628
}
@@ -615,6 +633,7 @@ impl Default for NdJsonFileFormatParams {
615633
compression: StageFileCompression::None,
616634
missing_field_as: NullAs::Error,
617635
null_field_as: NullAs::FieldDefault,
636+
null_if: vec![],
618637
}
619638
}
620639
}

src/meta/proto-conv/src/file_format_from_to_protobuf_impl.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ impl FromToProto for mt::principal::NdJsonFileFormatParams {
327327
compression,
328328
p.missing_field_as.as_deref(),
329329
p.null_field_as.as_deref(),
330+
p.null_if,
330331
)
331332
.map_err(|e| Incompatible {
332333
reason: format!("{e}"),
@@ -342,6 +343,7 @@ impl FromToProto for mt::principal::NdJsonFileFormatParams {
342343
compression,
343344
missing_field_as: Some(self.missing_field_as.to_string()),
344345
null_field_as: Some(self.null_field_as.to_string()),
346+
null_if: self.null_if.clone(),
345347
})
346348
}
347349
}

src/meta/proto-conv/src/util.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[
111111
(79, "2024-01-31: Add: udf.proto/UserDefinedFunction add created_on field", ),
112112
(80, "2024-02-01: Add: datatype.proto/DataType Geometry type"),
113113
(81, "2024-03-04: Add: udf.udf_script"),
114-
(82, "2024-03-08: Add: table.inverted_index")
114+
(82, "2024-03-08: Add: table.inverted_index"),
115+
(83, "2024-03-14: Add: null_if in user.proto/NDJSONFileFormatParams")
115116
// Dear developer:
116117
// If you're gonna add a new metadata version, you'll have to add a test for it.
117118
// You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`)

src/meta/proto-conv/tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,4 @@ mod v078_grantentry;
8585
mod v079_udf_created_on;
8686
mod v081_udf_script;
8787
mod v082_table_index;
88+
mod v083_ndjson_format_params;

src/meta/proto-conv/tests/it/v032_file_format_params.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ fn test_decode_v32_ndjson_file_format_params() -> anyhow::Result<()> {
9393
compression: StageFileCompression::Gzip,
9494
missing_field_as: NullAs::Error,
9595
null_field_as: NullAs::Null,
96+
null_if: vec![],
9697
})
9798
};
9899
common::test_pb_from_to(func_name!(), want())?;

src/meta/proto-conv/tests/it/v064_ndjson_format_params.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ fn test_decode_v64_ndjson_file_format_params() -> anyhow::Result<()> {
4141
compression: StageFileCompression::Gzip,
4242
missing_field_as: NullAs::FieldDefault,
4343
null_field_as: NullAs::Null,
44+
null_if: vec![],
4445
})
4546
};
4647
common::test_pb_from_to(func_name!(), want())?;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Copyright 2023 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use databend_common_meta_app::principal::NdJsonFileFormatParams;
16+
use databend_common_meta_app::principal::NullAs;
17+
use databend_common_meta_app::principal::StageFileCompression;
18+
use minitrace::func_name;
19+
20+
use crate::common;
21+
22+
// These bytes are built when a new version in introduced,
23+
24+
// and are kept for backward compatibility test.
25+
//
26+
// *************************************************************
27+
// * These messages should never be updated, *
28+
// * only be added when a new version is added, *
29+
// * or be removed when an old version is no longer supported. *
30+
// *************************************************************
31+
//
32+
#[test]
33+
fn test_decode_v83_ndjson_file_format_params() -> anyhow::Result<()> {
34+
let nd_json_file_format_params_v83 = vec![
35+
8, 1, 18, 13, 70, 73, 69, 76, 68, 95, 68, 69, 70, 65, 85, 76, 84, 26, 13, 70, 73, 69, 76,
36+
68, 95, 68, 69, 70, 65, 85, 76, 84, 34, 0, 160, 6, 83, 168, 6, 24,
37+
];
38+
let want = || NdJsonFileFormatParams {
39+
compression: StageFileCompression::Gzip,
40+
missing_field_as: NullAs::FieldDefault,
41+
null_field_as: NullAs::FieldDefault,
42+
null_if: vec!["".to_string()],
43+
};
44+
common::test_load_old(
45+
func_name!(),
46+
nd_json_file_format_params_v83.as_slice(),
47+
83,
48+
want(),
49+
)?;
50+
common::test_pb_from_to(func_name!(), want())?;
51+
Ok(())
52+
}

src/meta/protos/proto/file_format.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ message NdJsonFileFormatParams {
138138
StageFileCompression compression = 1;
139139
optional string missing_field_as = 2;
140140
optional string null_field_as = 3;
141+
repeated string null_if = 4;
141142
}
142143

143144
message JsonFileFormatParams {

src/query/ast/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ nom-rule = "0.3.0"
3030
ordered-float = { workspace = true }
3131
pratt = "0.4.0"
3232
pretty = "0.11.3"
33+
serde_json = { workspace = true }
3334
strsim = "0.10"
3435
strum = "0.24"
3536
strum_macros = "0.24"

0 commit comments

Comments
 (0)