Skip to content

Commit c83af2f

Browse files
authored
Merge pull request #7581 from TCeason/ISSUE-7580
feat(query): eliminate the allocation
2 parents a156c2b + 3831883 commit c83af2f

File tree

2 files changed

+89
-79
lines changed

2 files changed

+89
-79
lines changed

src/query/functions-v2/src/scalars/string.rs

Lines changed: 70 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ use common_expression::types::NumberType;
2525
use common_expression::types::StringType;
2626
use common_expression::vectorize_with_builder_1_arg;
2727
use common_expression::vectorize_with_builder_2_arg;
28+
use common_expression::vectorize_with_builder_3_arg;
29+
use common_expression::vectorize_with_builder_4_arg;
2830
use common_expression::FunctionProperty;
2931
use common_expression::FunctionRegistry;
3032
use common_expression::Value;
@@ -110,103 +112,111 @@ pub fn register(registry: &mut FunctionRegistry) {
110112
);
111113
registry.register_aliases("char_length", &["character_length"]);
112114

113-
registry.register_3_arg::<StringType, NumberType<u64>, StringType, StringType, _, _>(
115+
registry.register_passthrough_nullable_3_arg::<StringType, NumberType<u64>, StringType, StringType, _, _>(
114116
"lpad",
115117
FunctionProperty::default(),
116118
|_, _, _| None,
117-
|str: &[u8], l: u64, pad: &[u8]| {
118-
let mut buff: Vec<u8> = vec![];
119-
if l != 0 {
120-
if l > str.len() as u64 {
121-
let l = l - str.len() as u64;
122-
while buff.len() < l as usize {
123-
if buff.len() + pad.len() <= l as usize {
124-
buff.extend_from_slice(pad);
119+
vectorize_with_builder_3_arg::<StringType, NumberType<u64>, StringType, StringType>(
120+
|s, pad_len, pad, output| {
121+
let pad_len = pad_len as usize;
122+
if pad_len <= s.len() {
123+
output.put_slice(&s[..pad_len])
124+
} else {
125+
let mut remain_pad_len = pad_len - s.len();
126+
while remain_pad_len > 0 {
127+
if remain_pad_len < pad.len() {
128+
output.put_slice(&pad[..remain_pad_len]);
129+
remain_pad_len = 0;
125130
} else {
126-
buff.extend_from_slice(&pad[0..l as usize - buff.len()])
131+
output.put_slice(pad);
132+
remain_pad_len -= pad.len();
127133
}
128134
}
129-
buff.extend_from_slice(str);
130-
} else {
131-
buff.extend_from_slice(&str[0..l as usize]);
135+
output.put_slice(s);
132136
}
137+
output.commit_row();
138+
Ok(())
133139
}
134-
buff
135-
},
140+
),
136141
);
137142

138-
registry.register_4_arg::<StringType, NumberType<i64>, NumberType<i64>, StringType, StringType, _, _>(
139-
"insert",
140-
FunctionProperty::default(),
141-
|_, _, _, _| None,
142-
|srcstr, pos, len, substr| {
143-
let mut values: Vec<u8> = vec![];
144-
145-
let sl = srcstr.len() as i64;
146-
if pos < 1 || pos > sl {
147-
values.extend_from_slice(srcstr);
143+
registry.register_passthrough_nullable_4_arg::<StringType, NumberType<i64>, NumberType<i64>, StringType, StringType, _, _>(
144+
"insert",
145+
FunctionProperty::default(),
146+
|_, _, _, _| None,
147+
vectorize_with_builder_4_arg::<StringType, NumberType<i64>, NumberType<i64>, StringType, StringType>(
148+
|srcstr, pos, len, substr, output| {
149+
let pos = pos as usize;
150+
let len = len as usize;
151+
if pos < 1 || pos > srcstr.len() {
152+
output.put_slice(srcstr);
148153
} else {
149-
let p = pos as usize - 1;
150-
values.extend_from_slice(&srcstr[0..p]);
151-
values.extend_from_slice(substr);
152-
if len >= 0 && pos + len < sl {
153-
let l = len as usize;
154-
values.extend_from_slice(&srcstr[p + l..]);
154+
let pos = pos - 1;
155+
output.put_slice(&srcstr[0..pos]);
156+
output.put_slice(substr);
157+
if pos + len < srcstr.len() {
158+
output.put_slice(&srcstr[(pos + len)..]);
155159
}
156160
}
157-
values
158-
}
159-
);
161+
output.commit_row();
162+
Ok(())
163+
}),
164+
);
160165

161-
registry.register_3_arg::<StringType, NumberType<u64>, StringType, StringType, _, _>(
166+
registry.register_passthrough_nullable_3_arg::<StringType, NumberType<u64>, StringType, StringType, _, _>(
162167
"rpad",
163168
FunctionProperty::default(),
164169
|_, _, _| None,
165-
|str: &[u8], l: u64, pad: &[u8]| {
166-
let mut buff: Vec<u8> = vec![];
167-
if l != 0 {
168-
if l > str.len() as u64 {
169-
buff.extend_from_slice(str);
170-
while buff.len() < l as usize {
171-
if buff.len() + pad.len() <= l as usize {
172-
buff.extend_from_slice(pad);
173-
} else {
174-
buff.extend_from_slice(&pad[0..l as usize - buff.len()])
175-
}
170+
vectorize_with_builder_3_arg::<StringType, NumberType<u64>, StringType, StringType>(
171+
|s: &[u8], pad_len: u64, pad: &[u8], output| {
172+
let pad_len = pad_len as usize;
173+
if pad_len <= s.len() {
174+
output.put_slice(&s[..pad_len])
175+
} else {
176+
output.put_slice(s);
177+
let mut remain_pad_len = pad_len - s.len();
178+
while remain_pad_len > 0 {
179+
if remain_pad_len < pad.len() {
180+
output.put_slice(&pad[..remain_pad_len]);
181+
remain_pad_len = 0;
182+
} else {
183+
output.put_slice(pad);
184+
remain_pad_len -= pad.len();
176185
}
177-
} else {
178-
buff.extend_from_slice(&str[0..l as usize]);
179186
}
180187
}
181-
buff
182-
},
188+
output.commit_row();
189+
Ok(())
190+
}),
183191
);
184192

185-
registry.register_3_arg::<StringType, StringType, StringType, StringType, _, _>(
193+
registry.register_passthrough_nullable_3_arg::<StringType, StringType, StringType, StringType, _, _>(
186194
"replace",
187195
FunctionProperty::default(),
188196
|_, _, _| None,
189-
|str, from, to| {
190-
let mut buf: Vec<u8> = vec![];
197+
vectorize_with_builder_3_arg::<StringType, StringType, StringType, StringType>(
198+
|str, from, to, output| {
191199
if from.is_empty() || from == to {
192-
buf.extend_from_slice(str);
193-
return buf;
200+
output.put_slice(str);
201+
output.commit_row();
202+
return Ok(());
194203
}
195204
let mut skip = 0;
196205
for (p, w) in str.windows(from.len()).enumerate() {
197206
if w == from {
198-
buf.extend_from_slice(to);
207+
output.put_slice(to);
199208
skip = from.len();
200209
} else if p + w.len() == str.len() {
201-
buf.extend_from_slice(w);
210+
output.put_slice(w);
202211
} else if skip > 1 {
203212
skip -= 1;
204213
} else {
205-
buf.extend_from_slice(&w[0..1]);
214+
output.put_slice(&w[0..1]);
206215
}
207216
}
208-
buf
209-
},
217+
output.commit_row();
218+
Ok(())
219+
}),
210220
);
211221

212222
registry.register_2_arg::<StringType, StringType, NumberType<i8>, _, _>(

src/query/functions-v2/tests/it/scalars/testdata/string.txt

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2862,19 +2862,19 @@ evaluation:
28622862
| Domain | {"cc"..="test"} | {1..=4} | {1..=5} | {"12"..="zc"} | Unknown |
28632863
| Row 0 | "hi" | 1 | 3 | "xx" | "xx" |
28642864
| Row 1 | "test" | 4 | 5 | "zc" | "teszc" |
2865-
| Row 2 | "cc" | 1 | 1 | "12" | "12" |
2865+
| Row 2 | "cc" | 1 | 1 | "12" | "12c" |
28662866
| Row 3 | "q" | 1 | 1 | "56" | "56" |
28672867
+--------+-----------------+---------+---------+---------------+---------+
28682868
evaluation (internal):
2869-
+--------+----------------------------------------------------------------------------+
2870-
| Column | Data |
2871-
+--------+----------------------------------------------------------------------------+
2872-
| a | StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] } |
2873-
| b | Int32([1, 4, 1, 1]) |
2874-
| c | Int32([3, 5, 1, 1]) |
2875-
| d | StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] } |
2876-
| Output | StringColumn { data: 0x78787465737a6331323536, offsets: [0, 2, 7, 9, 11] } |
2877-
+--------+----------------------------------------------------------------------------+
2869+
+--------+-------------------------------------------------------------------------------+
2870+
| Column | Data |
2871+
+--------+-------------------------------------------------------------------------------+
2872+
| a | StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] } |
2873+
| b | Int32([1, 4, 1, 1]) |
2874+
| c | Int32([3, 5, 1, 1]) |
2875+
| d | StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] } |
2876+
| Output | StringColumn { data: 0x78787465737a633132633536, offsets: [0, 2, 7, 10, 12] } |
2877+
+--------+-------------------------------------------------------------------------------+
28782878

28792879

28802880
ast : insert(x, y, z, u)
@@ -2892,14 +2892,14 @@ evaluation:
28922892
| Row 3 | "q" | 1 | 1 | "56" | "56" |
28932893
+--------+--------------------------+------------------+------------------+------------------------+-------------+
28942894
evaluation (internal):
2895-
+--------+-------------------------------------------------------------------------------------------------------------------------------+
2896-
| Column | Data |
2897-
+--------+-------------------------------------------------------------------------------------------------------------------------------+
2898-
| x | NullableColumn { column: StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] }, validity: [0b____1110] } |
2899-
| y | NullableColumn { column: Int32([1, 4, 1, 1]), validity: [0b____1011] } |
2900-
| z | NullableColumn { column: Int32([3, 5, 1, 1]), validity: [0b____1101] } |
2901-
| u | NullableColumn { column: StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1110] } |
2902-
| Output | NullableColumn { column: StringColumn { data: 0x78787465737a6331323536, offsets: [0, 2, 7, 9, 11] }, validity: [0b____1000] } |
2903-
+--------+-------------------------------------------------------------------------------------------------------------------------------+
2895+
+--------+----------------------------------------------------------------------------------------------------------------------------------+
2896+
| Column | Data |
2897+
+--------+----------------------------------------------------------------------------------------------------------------------------------+
2898+
| x | NullableColumn { column: StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] }, validity: [0b____1110] } |
2899+
| y | NullableColumn { column: Int32([1, 4, 1, 1]), validity: [0b____1011] } |
2900+
| z | NullableColumn { column: Int32([3, 5, 1, 1]), validity: [0b____1101] } |
2901+
| u | NullableColumn { column: StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1110] } |
2902+
| Output | NullableColumn { column: StringColumn { data: 0x78787465737a633132633536, offsets: [0, 2, 7, 10, 12] }, validity: [0b____1000] } |
2903+
+--------+----------------------------------------------------------------------------------------------------------------------------------+
29042904

29052905

0 commit comments

Comments
 (0)