Skip to content

Commit 8d1d931

Browse files
authored
Merge pull request #7583 from andylokandy/substr
chore(function): migrate left, right, substr and space to new expression framework
2 parents 14ca4e7 + 5500a72 commit 8d1d931

File tree

13 files changed

+494
-99
lines changed

13 files changed

+494
-99
lines changed

docs/doc/30-reference/20-functions/40-string-functions/substring.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ SUBSTRING(str FROM pos FOR len)
2424
| Arguments | Description |
2525
| ----------- | ----------- |
2626
| str | The main string from where the character to be extracted |
27-
| pos | The one-indexed position expression to start at. If negative, counts from the end |
28-
| len | The number expression of characters to extract |
27+
| pos | The position (starting from 1) the substring to start at. If negative, counts from the end |
28+
| len | The maximun length of the substring to extract |
2929

3030
## Return Type
3131

src/query/ast/src/ast/expr.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ pub enum Expr<'a> {
111111
Substring {
112112
span: &'a [Token<'a>],
113113
expr: Box<Expr<'a>>,
114-
substring_from: Option<Box<Expr<'a>>>,
114+
substring_from: Box<Expr<'a>>,
115115
substring_for: Option<Box<Expr<'a>>>,
116116
},
117117
/// TRIM([[BOTH | LEADING | TRAILING] <expr> FROM] <expr>)
@@ -728,10 +728,7 @@ impl<'a> Display for Expr<'a> {
728728
substring_for,
729729
..
730730
} => {
731-
write!(f, "SUBSTRING({expr}")?;
732-
if let Some(substring_from) = substring_from {
733-
write!(f, " FROM {substring_from}")?;
734-
}
731+
write!(f, "SUBSTRING({expr} FROM {substring_from}")?;
735732
if let Some(substring_for) = substring_for {
736733
write!(f, " FOR {substring_for}")?;
737734
}

src/query/ast/src/ast/format/ast_format.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -387,16 +387,14 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
387387
&mut self,
388388
_span: &'ast [Token<'ast>],
389389
expr: &'ast Expr<'ast>,
390-
substring_from: &'ast Option<Box<Expr<'ast>>>,
390+
substring_from: &'ast Expr<'ast>,
391391
substring_for: &'ast Option<Box<Expr<'ast>>>,
392392
) {
393393
let mut children = Vec::with_capacity(1);
394394
self.visit_expr(expr);
395395
children.push(self.children.pop().unwrap());
396-
if let Some(substring_from) = substring_from {
397-
self.visit_expr(substring_from);
398-
children.push(self.children.pop().unwrap());
399-
}
396+
self.visit_expr(substring_from);
397+
children.push(self.children.pop().unwrap());
400398
if let Some(substring_for) = substring_for {
401399
self.visit_expr(substring_for);
402400
children.push(self.children.pop().unwrap());

src/query/ast/src/ast/format/syntax/expr.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,14 +183,10 @@ pub(crate) fn pretty_expr(expr: Expr) -> RcDoc {
183183
..
184184
} => RcDoc::text("SUBSTRING(")
185185
.append(pretty_expr(*expr))
186-
.append(if let Some(substring_from) = substring_from {
187-
RcDoc::space()
188-
.append(RcDoc::text("FROM"))
189-
.append(RcDoc::space())
190-
.append(pretty_expr(*substring_from))
191-
} else {
192-
RcDoc::nil()
193-
})
186+
.append(RcDoc::space())
187+
.append(RcDoc::text("FROM"))
188+
.append(RcDoc::space())
189+
.append(pretty_expr(*substring_from))
194190
.append(if let Some(substring_for) = substring_for {
195191
RcDoc::space()
196192
.append(RcDoc::text("FOR"))

src/query/ast/src/parser/expr.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ pub enum ExprElement<'a> {
206206
/// SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])
207207
SubString {
208208
expr: Box<Expr<'a>>,
209-
substring_from: Option<Box<Expr<'a>>>,
209+
substring_from: Box<Expr<'a>>,
210210
substring_for: Option<Box<Expr<'a>>>,
211211
},
212212
/// TRIM([[BOTH | LEADING | TRAILING] <expr> FROM] <expr>)
@@ -667,13 +667,14 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
667667
SUBSTRING
668668
~ ^"("
669669
~ ^#subexpr(0)
670-
~ ( ( FROM | "," ) ~ ^#subexpr(0) )?
670+
~ ( FROM | "," )
671+
~ ^#subexpr(0)
671672
~ ( ( FOR | "," ) ~ ^#subexpr(0) )?
672673
~ ^")"
673674
},
674-
|(_, _, expr, opt_substring_from, opt_substring_for, _)| ExprElement::SubString {
675+
|(_, _, expr, _, substring_from, opt_substring_for, _)| ExprElement::SubString {
675676
expr: Box::new(expr),
676-
substring_from: opt_substring_from.map(|(_, expr)| Box::new(expr)),
677+
substring_from: Box::new(substring_from),
677678
substring_for: opt_substring_for.map(|(_, expr)| Box::new(expr)),
678679
},
679680
);

src/query/ast/src/visitors/visitor.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,13 +197,11 @@ pub trait Visitor<'ast>: Sized {
197197
&mut self,
198198
_span: &'ast [Token<'ast>],
199199
expr: &'ast Expr<'ast>,
200-
substring_from: &'ast Option<Box<Expr<'ast>>>,
200+
substring_from: &'ast Expr<'ast>,
201201
substring_for: &'ast Option<Box<Expr<'ast>>>,
202202
) {
203203
walk_expr(self, expr);
204-
if let Some(substring_from) = substring_from {
205-
walk_expr(self, substring_from);
206-
}
204+
walk_expr(self, substring_from);
207205
if let Some(substring_for) = substring_for {
208206
walk_expr(self, substring_for);
209207
}

src/query/ast/src/visitors/visitor_mut.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -197,14 +197,11 @@ pub trait VisitorMut: Sized {
197197
&mut self,
198198
_span: &mut &[Token<'_>],
199199
expr: &mut Expr<'_>,
200-
substring_from: &mut Option<Box<Expr<'_>>>,
200+
substring_from: &mut Box<Expr<'_>>,
201201
substring_for: &mut Option<Box<Expr<'_>>>,
202202
) {
203203
walk_expr_mut(self, expr);
204-
205-
if let Some(substring_from) = substring_from {
206-
walk_expr_mut(self, substring_from);
207-
}
204+
walk_expr_mut(self, substring_from);
208205

209206
if let Some(substring_for) = substring_for {
210207
walk_expr_mut(self, substring_for);

src/query/ast/tests/it/testdata/expr.txt

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,20 +1582,18 @@ Substring {
15821582
span: Ident(10..11),
15831583
},
15841584
},
1585-
substring_from: Some(
1586-
ColumnRef {
1587-
span: [
1588-
Ident(17..18),
1589-
],
1590-
database: None,
1591-
table: None,
1592-
column: Identifier {
1593-
name: "b",
1594-
quote: None,
1595-
span: Ident(17..18),
1596-
},
1585+
substring_from: ColumnRef {
1586+
span: [
1587+
Ident(17..18),
1588+
],
1589+
database: None,
1590+
table: None,
1591+
column: Identifier {
1592+
name: "b",
1593+
quote: None,
1594+
span: Ident(17..18),
15971595
},
1598-
),
1596+
},
15991597
substring_for: Some(
16001598
ColumnRef {
16011599
span: [
@@ -1641,20 +1639,18 @@ Substring {
16411639
span: Ident(10..11),
16421640
},
16431641
},
1644-
substring_from: Some(
1645-
ColumnRef {
1646-
span: [
1647-
Ident(13..14),
1648-
],
1649-
database: None,
1650-
table: None,
1651-
column: Identifier {
1652-
name: "b",
1653-
quote: None,
1654-
span: Ident(13..14),
1655-
},
1642+
substring_from: ColumnRef {
1643+
span: [
1644+
Ident(13..14),
1645+
],
1646+
database: None,
1647+
table: None,
1648+
column: Identifier {
1649+
name: "b",
1650+
quote: None,
1651+
span: Ident(13..14),
16561652
},
1657-
),
1653+
},
16581654
substring_for: Some(
16591655
ColumnRef {
16601656
span: [

src/query/functions-v2/src/scalars/string.rs

Lines changed: 118 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use common_expression::types::number::SimpleDomain;
2020
use common_expression::types::number::UInt64Type;
2121
use common_expression::types::string::StringColumn;
2222
use common_expression::types::string::StringColumnBuilder;
23+
use common_expression::types::string::StringDomain;
2324
use common_expression::types::GenericMap;
2425
use common_expression::types::NumberType;
2526
use common_expression::types::StringType;
@@ -34,6 +35,12 @@ use common_expression::ValueRef;
3435
use itertools::izip;
3536

3637
pub fn register(registry: &mut FunctionRegistry) {
38+
registry.register_aliases("upper", &["ucase"]);
39+
registry.register_aliases("lower", &["lcase"]);
40+
registry.register_aliases("octet_length", &["length"]);
41+
registry.register_aliases("char_length", &["character_length"]);
42+
registry.register_aliases("substr", &["substring", "mid"]);
43+
3744
registry.register_passthrough_nullable_1_arg::<StringType, StringType, _, _>(
3845
"upper",
3946
FunctionProperty::default(),
@@ -58,7 +65,6 @@ pub fn register(registry: &mut FunctionRegistry) {
5865
},
5966
),
6067
);
61-
registry.register_aliases("upper", &["ucase"]);
6268

6369
registry.register_passthrough_nullable_1_arg::<StringType, StringType, _, _>(
6470
"lower",
@@ -84,7 +90,6 @@ pub fn register(registry: &mut FunctionRegistry) {
8490
},
8591
),
8692
);
87-
registry.register_aliases("lower", &["lcase"]);
8893

8994
registry.register_1_arg::<StringType, NumberType<u64>, _, _>(
9095
"bit_length",
@@ -99,7 +104,6 @@ pub fn register(registry: &mut FunctionRegistry) {
99104
|_| None,
100105
|val| val.len() as u64,
101106
);
102-
registry.register_aliases("octet_length", &["length"]);
103107

104108
registry.register_1_arg::<StringType, NumberType<u64>, _, _>(
105109
"char_length",
@@ -110,7 +114,6 @@ pub fn register(registry: &mut FunctionRegistry) {
110114
Err(_) => val.len() as u64,
111115
},
112116
);
113-
registry.register_aliases("char_length", &["character_length"]);
114117

115118
registry.register_passthrough_nullable_3_arg::<StringType, NumberType<u64>, StringType, StringType, _, _>(
116119
"lpad",
@@ -657,6 +660,92 @@ pub fn register(registry: &mut FunctionRegistry) {
657660
},
658661
),
659662
);
663+
664+
const SPACE: u8 = 0x20;
665+
registry.register_passthrough_nullable_1_arg::<NumberType<u64>, StringType, _, _>(
666+
"space",
667+
FunctionProperty::default(),
668+
|domain| {
669+
Some(StringDomain {
670+
min: vec![SPACE; domain.min as usize],
671+
max: Some(vec![SPACE; domain.max as usize]),
672+
})
673+
},
674+
|times, _| match times {
675+
ValueRef::Scalar(times) => Ok(Value::Scalar(vec![SPACE; times as usize])),
676+
ValueRef::Column(col) => {
677+
let mut total_space: u64 = 0;
678+
let mut offsets: Vec<u64> = Vec::with_capacity(col.len() + 1);
679+
offsets.push(0);
680+
for times in col.iter() {
681+
total_space += times;
682+
offsets.push(total_space);
683+
}
684+
let col = StringColumnBuilder {
685+
data: vec![SPACE; total_space as usize],
686+
offsets,
687+
}
688+
.build();
689+
Ok(Value::Column(col))
690+
}
691+
},
692+
);
693+
694+
registry.register_passthrough_nullable_2_arg::<StringType, NumberType<u64>, StringType, _, _>(
695+
"left",
696+
FunctionProperty::default(),
697+
|_, _| None,
698+
vectorize_with_builder_2_arg::<StringType, NumberType<u64>, StringType>(|s, n, output| {
699+
let n = n as usize;
700+
if n < s.len() {
701+
output.put_slice(&s[0..n]);
702+
} else {
703+
output.put_slice(s);
704+
}
705+
output.commit_row();
706+
Ok(())
707+
}),
708+
);
709+
710+
registry.register_passthrough_nullable_2_arg::<StringType, NumberType<u64>, StringType, _, _>(
711+
"right",
712+
FunctionProperty::default(),
713+
|_, _| None,
714+
vectorize_with_builder_2_arg::<StringType, NumberType<u64>, StringType>(|s, n, output| {
715+
let n = n as usize;
716+
if n < s.len() {
717+
output.put_slice(&s[s.len() - n..]);
718+
} else {
719+
output.put_slice(s);
720+
}
721+
output.commit_row();
722+
Ok(())
723+
}),
724+
);
725+
726+
registry.register_passthrough_nullable_2_arg::<StringType, NumberType<i64>, StringType, _, _>(
727+
"substr",
728+
FunctionProperty::default(),
729+
|_, _| None,
730+
vectorize_with_builder_2_arg::<StringType, NumberType<i64>, StringType>(
731+
|s, pos, output| {
732+
output.put_slice(substr(s, pos, s.len() as u64));
733+
output.commit_row();
734+
Ok(())
735+
},
736+
),
737+
);
738+
739+
registry.register_passthrough_nullable_3_arg::<StringType, NumberType<i64>, NumberType<u64>, StringType, _, _>(
740+
"substr",
741+
FunctionProperty::default(),
742+
|_, _, _| None,
743+
vectorize_with_builder_3_arg::<StringType, NumberType<i64>, NumberType<u64>, StringType>(|s, pos, len, output| {
744+
output.put_slice(substr(s, pos, len));
745+
output.commit_row();
746+
Ok(())
747+
}),
748+
);
660749
}
661750

662751
mod soundex {
@@ -688,7 +777,30 @@ mod soundex {
688777
}
689778
}
690779

691-
// Vectorize string to string function with customer estimate_bytes.
780+
#[inline]
781+
fn substr(str: &[u8], pos: i64, len: u64) -> &[u8] {
782+
if pos > 0 && pos <= str.len() as i64 {
783+
let l = str.len() as usize;
784+
let s = (pos - 1) as usize;
785+
let mut e = len as usize + s;
786+
if e > l {
787+
e = l;
788+
}
789+
return &str[s..e];
790+
}
791+
if pos < 0 && -(pos) <= str.len() as i64 {
792+
let l = str.len() as usize;
793+
let s = l - -pos as usize;
794+
let mut e = len as usize + s;
795+
if e > l {
796+
e = l;
797+
}
798+
return &str[s..e];
799+
}
800+
&str[0..0]
801+
}
802+
803+
/// String to String scalar function with estimiated ouput column capacity.
692804
fn vectorize_string_to_string(
693805
estimate_bytes: impl Fn(&StringColumn) -> usize + Copy,
694806
func: impl Fn(&[u8], &mut StringColumnBuilder) -> Result<(), String> + Copy,
@@ -711,7 +823,7 @@ fn vectorize_string_to_string(
711823
}
712824
}
713825

714-
// Vectorize (string, string) -> string function with customer estimate_bytes.
826+
/// (String, String) to String scalar function with estimiated ouput column capacity.
715827
fn vectorize_string_to_string_2_arg(
716828
estimate_bytes: impl Fn(&StringColumn, &StringColumn) -> usize + Copy,
717829
func: impl Fn(&[u8], &[u8], &mut StringColumnBuilder) -> Result<(), String> + Copy,

0 commit comments

Comments
 (0)