Skip to content

Commit f68c97e

Browse files
authored
feat(function): Implement map_cat function (#15348)
* Implement map_cat function Signed-off-by: shamb0 <r.raajey@gmail.com> * Implement map_cat function Signed-off-by: shamb0 <r.raajey@gmail.com> * Implement map_cat function Signed-off-by: shamb0 <r.raajey@gmail.com> * feat: implement map functions using MapType * merge to upstream main updates * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Make CI HappyOF Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> * Refactor map.rs function module based on code review feedback Signed-off-by: shamb0 <r.raajey@gmail.com> --------- Signed-off-by: shamb0 <r.raajey@gmail.com>
1 parent 43863f4 commit f68c97e

File tree

5 files changed

+348
-0
lines changed

5 files changed

+348
-0
lines changed

src/query/functions/src/scalars/map.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::collections::HashSet;
1516
use std::hash::Hash;
1617

1718
use databend_common_expression::types::nullable::NullableDomain;
19+
use databend_common_expression::types::ArgType;
1820
use databend_common_expression::types::ArrayType;
1921
use databend_common_expression::types::EmptyArrayType;
2022
use databend_common_expression::types::EmptyMapType;
@@ -159,6 +161,61 @@ pub fn register(registry: &mut FunctionRegistry) {
159161
),
160162
);
161163

164+
registry.register_2_arg::<EmptyMapType, EmptyMapType, EmptyMapType, _, _>(
165+
"map_cat",
166+
|_, _, _| FunctionDomain::Full,
167+
|_, _, _| (),
168+
);
169+
170+
registry.register_passthrough_nullable_2_arg(
171+
"map_cat",
172+
|_, domain1, domain2| {
173+
FunctionDomain::Domain(match (domain1, domain2) {
174+
(Some((key_domain1, val_domain1)), Some((key_domain2, val_domain2))) => Some((
175+
key_domain1.merge(key_domain2),
176+
val_domain1.merge(val_domain2),
177+
)),
178+
(Some(domain1), None) => Some(domain1).cloned(),
179+
(None, Some(domain2)) => Some(domain2).cloned(),
180+
(None, None) => None,
181+
})
182+
},
183+
vectorize_with_builder_2_arg::<
184+
MapType<GenericType<0>, GenericType<1>>,
185+
MapType<GenericType<0>, GenericType<1>>,
186+
MapType<GenericType<0>, GenericType<1>>,
187+
>(|lhs, rhs, output_map, ctx| {
188+
if let Some(validity) = &ctx.validity {
189+
if !validity.get_bit(output_map.len()) {
190+
output_map.push_default();
191+
return;
192+
}
193+
}
194+
195+
let mut concatenated_map_builder =
196+
ArrayType::create_builder(lhs.len() + rhs.len(), ctx.generics);
197+
let mut detect_dup_keys = HashSet::new();
198+
199+
for (lhs_key, lhs_value) in lhs.iter() {
200+
if let Some((_, rhs_value)) = rhs.iter().find(|(rhs_key, _)| lhs_key == *rhs_key) {
201+
detect_dup_keys.insert(lhs_key.clone());
202+
concatenated_map_builder.put_item((lhs_key.clone(), rhs_value.clone()));
203+
} else {
204+
concatenated_map_builder.put_item((lhs_key.clone(), lhs_value.clone()));
205+
}
206+
}
207+
208+
for (rhs_key, rhs_value) in rhs.iter() {
209+
if !detect_dup_keys.contains(&rhs_key) {
210+
concatenated_map_builder.put_item((rhs_key, rhs_value));
211+
}
212+
}
213+
214+
concatenated_map_builder.commit_row();
215+
output_map.append_column(&concatenated_map_builder.build());
216+
}),
217+
);
218+
162219
registry.register_1_arg_core::<EmptyMapType, NumberType<u8>, _, _>(
163220
"map_size",
164221
|_, _| FunctionDomain::Domain(SimpleDomain { min: 0, max: 0 }),

src/query/functions/tests/it/scalars/map.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,72 @@ fn test_map() {
3030
test_map_keys(file);
3131
test_map_values(file);
3232
test_map_size(file);
33+
test_map_cat(file);
34+
}
35+
36+
fn test_map_cat(file: &mut impl Write) {
37+
// Empty Inputs:: tests behavior with empty input maps
38+
run_ast(file, "map_cat({}, {})", &[]);
39+
run_ast(file, "map_cat({}, {'k1': 'v1'})", &[]);
40+
run_ast(file, "map_cat({'k1': 'v1'}, {})", &[]);
41+
42+
// Basic Functionality:: evaluates core functionality
43+
let columns = [
44+
("a_col", StringType::from_data(vec!["a_k1", "a_k2", "a_k3"])),
45+
("b_col", StringType::from_data(vec!["b_k1", "b_k2", "b_k3"])),
46+
("c_col", StringType::from_data(vec!["c_k1", "c_k2", "c_k3"])),
47+
("d_col", StringType::from_data(vec!["aaa1", "aaa2", "aaa3"])),
48+
("e_col", StringType::from_data(vec!["bbb1", "bbb2", "bbb3"])),
49+
("f_col", StringType::from_data(vec!["ccc1", "ccc2", "ccc3"])),
50+
];
51+
52+
run_ast(
53+
file,
54+
"map_cat(map([a_col, b_col], [d_col, e_col]), map([c_col], [f_col]))",
55+
&columns,
56+
);
57+
58+
run_ast(file, "map_cat({'k1':'v1','k2':'v2'}, {'k1':'abc'})", &[]);
59+
60+
// Duplicate Keys:: assesses handling of duplicate keys
61+
let columns = [
62+
("a_col", StringType::from_data(vec!["a_k1", "a_k2", "c_k3"])),
63+
("b_col", StringType::from_data(vec!["b_k1", "c_k2", "b_k3"])),
64+
("c_col", StringType::from_data(vec!["c_k1", "c_k2", "c_k3"])),
65+
("d_col", StringType::from_data(vec!["aaa1", "aaa2", "aaa3"])),
66+
("e_col", StringType::from_data(vec!["bbb1", "bbb2", "bbb3"])),
67+
("f_col", StringType::from_data(vec!["ccc1", "ccc2", "ccc3"])),
68+
];
69+
70+
run_ast(
71+
file,
72+
"map_cat(map([a_col, b_col], [d_col, e_col]), map([c_col], [f_col]))",
73+
&columns,
74+
);
75+
76+
// Map Size Variation:: tests behavior with different map sizes
77+
run_ast(file, "map_cat({'k1': 'v1', 'k2': 'v2'}, {'k3': 'v3'})", &[]);
78+
run_ast(file, "map_cat({'k1': 'v1'}, {'k2': 'v2', 'k3': 'v3'})", &[]);
79+
80+
// Null Values:: validates behavior for null values
81+
run_ast(
82+
file,
83+
"map_cat({'k1': 'v1', 'k2': NULL}, {'k2': 'v2', 'k3': NULL})",
84+
&[],
85+
);
86+
87+
// Nested Maps:: examines recursive merging capabilities
88+
run_ast(
89+
file,
90+
"map_cat({'k1': {'nk1': 'nv1'}, 'k2': {'nk2': 'nv2'}}, {'k2': {'nk3': 'nv3'}, 'k3': {'nk4': 'nv4'}})",
91+
&[],
92+
);
93+
94+
run_ast(
95+
file,
96+
"map_cat({'k1': {'nk1': 'nv1'}, 'k2': {'nk2': 'nv2'}}, {'k1': {'nk1': 'new_nv1'}, 'k2': {'nk3': 'nv3'}})",
97+
&[],
98+
);
3399
}
34100

35101
fn test_create(file: &mut impl Write) {

src/query/functions/tests/it/scalars/testdata/function_list.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2436,6 +2436,10 @@ Functions overloads:
24362436
1 map(Array(Nothing) NULL, Array(Nothing) NULL) :: Map(Nothing) NULL
24372437
2 map(Array(T0), Array(T1)) :: Map(T0, T1)
24382438
3 map(Array(T0) NULL, Array(T1) NULL) :: Map(T0, T1) NULL
2439+
0 map_cat(Map(Nothing), Map(Nothing)) :: Map(Nothing)
2440+
1 map_cat(Map(Nothing) NULL, Map(Nothing) NULL) :: Map(Nothing) NULL
2441+
2 map_cat(Map(T0, T1), Map(T0, T1)) :: Map(T0, T1)
2442+
3 map_cat(Map(T0, T1) NULL, Map(T0, T1) NULL) :: Map(T0, T1) NULL
24392443
0 map_keys(Map(Nothing)) :: Array(Nothing)
24402444
1 map_keys(Map(T0, T1)) :: Array(T0)
24412445
2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL

0 commit comments

Comments
 (0)