Skip to content

Commit 1bc00ce

Browse files
authored
Models SQL function expressions as functions with named arguments (#122)
- Add named parameter parsing for functions - Streaming translate SQL function expressions into functions with named arguemnts This translates calls like: - `substring(a from 2 for 3)` - `trim(leading 't' from 'ttthere')` - `cast(a as VARCHAR)` Into: - `substring(a, "from":2, "for":3)` - `trim("leading": 't', "from": 'ttthere')` - `cast(a, "as":VARCHAR)`
1 parent 710d341 commit 1bc00ce

File tree

9 files changed

+1271
-134
lines changed

9 files changed

+1271
-134
lines changed

partiql-ast/src/ast.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ pub type BagAst = AstBytePos<Bag>;
229229
pub type BetweenAst = AstBytePos<Between>;
230230
pub type BinOpAst = AstBytePos<BinOp>;
231231
pub type CallAggAst = AstBytePos<CallAgg>;
232+
pub type CallArgAst = AstBytePos<CallArg>;
232233
pub type CallAst = AstBytePos<Call>;
233234
pub type CaseAst = AstBytePos<Case>;
234235
pub type FromClauseAst = AstBytePos<FromClause>;
@@ -507,8 +508,20 @@ pub struct Path {
507508
#[derive(Clone, Debug, PartialEq)]
508509
pub struct Call {
509510
pub func_name: SymbolPrimitive,
510-
pub setq: Option<SetQuantifier>,
511-
pub args: Vec<Box<Expr>>,
511+
pub args: Vec<CallArgAst>,
512+
}
513+
514+
#[derive(Clone, Debug, PartialEq)]
515+
pub enum CallArg {
516+
/// `*` used as an argument to a function call (e.g., in `count(*)`)
517+
Star(),
518+
/// positional argument to a function call (e.g., all arguments in `foo(1, 'a', 3)`)
519+
Positional(Box<Expr>),
520+
/// named argument to a function call (e.g., the `"from" : 2` in `substring(a, "from":2)`
521+
Named {
522+
name: SymbolPrimitive,
523+
value: Option<Box<Expr>>,
524+
},
512525
}
513526

514527
#[derive(Clone, Debug, PartialEq)]

partiql-parser/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ logos = "~0.12.0"
3737

3838
itertools = "~0.10.3"
3939

40+
regex = "~1.5.5"
41+
lazy_static = "~1.4.0"
42+
4043
[dev-dependencies]
4144
criterion = "0.3"
4245

partiql-parser/benches/bench_parse.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,20 @@ const Q_COMPLEX: &str = r#"
1313
SELECT numRec, data
1414
FROM delta_full_transactions.deltas delta,
1515
(
16-
SELECT u.id, review, rindex
16+
SELECT foo(u.id), bar(review), rindex
17+
FROM delta.data as u CROSS JOIN UNPIVOT u.reviews as review AT rindex
18+
) as data,
19+
delta.numRec as numRec
20+
)
21+
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
22+
"#;
23+
24+
const Q_COMPLEX_FEXPR: &str = r#"
25+
SELECT (
26+
SELECT numRec, data
27+
FROM delta_full_transactions.deltas delta,
28+
(
29+
SELECT cast(trim(u.id) as VARCHAR(20)), substring(review from 2 for 5), rindex
1730
FROM delta.data as u CROSS JOIN UNPIVOT u.reviews as review AT rindex
1831
) as data,
1932
delta.numRec as numRec
@@ -27,11 +40,14 @@ fn parse_bench(c: &mut Criterion) {
2740
c.bench_function("parse-ion", |b| b.iter(|| parse(black_box(Q_ION))));
2841
c.bench_function("parse-group", |b| b.iter(|| parse(black_box(Q_GROUP))));
2942
c.bench_function("parse-complex", |b| b.iter(|| parse(black_box(Q_COMPLEX))));
43+
c.bench_function("parse-complex-fexpr", |b| {
44+
b.iter(|| parse(black_box(Q_COMPLEX_FEXPR)))
45+
});
3046
}
3147

3248
criterion_group! {
3349
name = parse;
34-
config = Criterion::default().measurement_time(Duration::new(10, 0));
50+
config = Criterion::default().measurement_time(Duration::new(5, 0));
3551
targets = parse_bench
3652
}
3753

partiql-parser/src/lexer.rs

Lines changed: 106 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ use partiql_source_map::location::{ByteOffset, BytePosition, ToLocated};
22
use std::borrow::Cow;
33

44
use logos::{Logos, Span};
5+
56
use std::cmp::max;
7+
68
use std::fmt;
79
use std::fmt::Formatter;
810

@@ -67,7 +69,7 @@ impl<'input, 'tracker> CommentLexer<'input, 'tracker> {
6769
}
6870

6971
/// Parses a single (possibly nested) block comment and returns it
70-
fn next(&mut self) -> Option<CommentStringResult<'input>> {
72+
fn next_internal(&mut self) -> Option<CommentStringResult<'input>> {
7173
let Span { start, .. } = self.lexer.span();
7274
let mut nesting = 0;
7375
let nesting_inc = if self.comment_nesting { 1 } else { 0 };
@@ -114,7 +116,7 @@ impl<'input, 'tracker> Iterator for CommentLexer<'input, 'tracker> {
114116

115117
#[inline(always)]
116118
fn next(&mut self) -> Option<Self::Item> {
117-
self.next()
119+
self.next_internal()
118120
}
119121
}
120122

@@ -177,7 +179,7 @@ impl<'input, 'tracker> EmbeddedIonLexer<'input, 'tracker> {
177179
}
178180

179181
/// Parses a single embedded ion value, quoted between backticks (`), and returns it
180-
fn next(&mut self) -> Option<EmbeddedIonStringResult<'input>> {
182+
fn next_internal(&mut self) -> Option<EmbeddedIonStringResult<'input>> {
181183
let next_token = self.lexer.next();
182184
match next_token {
183185
Some(EmbeddedIonToken::Embed) => {
@@ -197,7 +199,7 @@ impl<'input, 'tracker> EmbeddedIonLexer<'input, 'tracker> {
197199
let mut comment_tracker = LineOffsetTracker::default();
198200
let mut comment_lexer =
199201
CommentLexer::new(remaining, &mut comment_tracker);
200-
match comment_lexer.next() {
202+
match comment_lexer.next_internal() {
201203
Some(Ok((s, _c, e))) => {
202204
self.tracker.append(&comment_tracker, embed.start.into());
203205
self.lexer.bump((e - s).to_usize() - embed.len())
@@ -248,18 +250,19 @@ impl<'input, 'tracker> Iterator for EmbeddedIonLexer<'input, 'tracker> {
248250

249251
#[inline(always)]
250252
fn next(&mut self) -> Option<Self::Item> {
251-
self.next()
253+
self.next_internal()
252254
}
253255
}
254256

255-
/// A lexer from PartiQL text strings to [`LexicalToken`]s
257+
/// A lexer from PartiQL text strings to [`Token`]s
256258
pub(crate) struct PartiqlLexer<'input, 'tracker> {
257259
/// Wrap a logos-generated lexer
258260
lexer: logos::Lexer<'input, Token<'input>>,
259261
tracker: &'tracker mut LineOffsetTracker,
260262
}
261263

262-
type InternalLexResult<'input> = SpannedResult<Token<'input>, ByteOffset, LexError<'input>>;
264+
pub(crate) type InternalLexResult<'input> =
265+
SpannedResult<Token<'input>, ByteOffset, LexError<'input>>;
263266
pub(crate) type LexResult<'input> =
264267
Result<Spanned<Token<'input>, ByteOffset>, ParseError<'input, BytePosition>>;
265268

@@ -293,15 +296,20 @@ impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> {
293296
Err((start.into(), err_ctor(region.into()), end.into()))
294297
}
295298

296-
/// Wraps a [`Token`] into a [`LexicalToken`] at the current position of the lexer.
299+
pub fn slice(&self) -> &'input str {
300+
self.lexer.slice()
301+
}
302+
303+
/// Wraps a [`Token`] into a [`Token`] at the current position of the lexer.
297304
#[inline(always)]
298305
fn wrap(&mut self, token: Token<'input>) -> InternalLexResult<'input> {
299306
let Span { start, end } = self.lexer.span();
300307
Ok((start.into(), token, end.into()))
301308
}
302309

303-
/// Advances the iterator and returns the next [`LexicalToken`] or [`None`] when input is exhausted.
304-
fn next(&mut self) -> Option<InternalLexResult<'input>> {
310+
/// Advances the iterator and returns the next [`Token`] or [`None`] when input is exhausted.
311+
#[inline]
312+
pub(crate) fn next_internal(&mut self) -> Option<InternalLexResult<'input>> {
305313
'next_tok: loop {
306314
return match self.lexer.next() {
307315
None => None,
@@ -330,7 +338,7 @@ impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> {
330338
let remaining = &self.lexer.source()[embed.start..];
331339
let mut comment_tracker = LineOffsetTracker::default();
332340
let mut comment_lexer = CommentLexer::new(remaining, &mut comment_tracker).with_nesting();
333-
comment_lexer.next().map(|res| match res {
341+
comment_lexer.next_internal().map(|res| match res {
334342
Ok((s, comment, e)) => {
335343
let val_len = e - s;
336344
let val_start = embed.start.into(); // embed end is 1 past the starting '/*'
@@ -352,7 +360,7 @@ impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> {
352360
let remaining = &self.lexer.source()[embed.start..];
353361
let mut ion_tracker = LineOffsetTracker::default();
354362
let mut ion_lexer = EmbeddedIonLexer::new(remaining, &mut ion_tracker);
355-
ion_lexer.next().map(|res| match res {
363+
ion_lexer.next_internal().map(|res| match res {
356364
Ok((s, ion, e)) => {
357365
let val_len = e - s;
358366
let val_start = embed.end.into(); // embed end is 1 past the starting '`'
@@ -374,7 +382,7 @@ impl<'input, 'tracker> Iterator for PartiqlLexer<'input, 'tracker> {
374382

375383
#[inline(always)]
376384
fn next(&mut self) -> Option<Self::Item> {
377-
self.next().map(|res| res.map_err(|e| e.into()))
385+
self.next_internal().map(|res| res.map_err(|e| e.into()))
378386
}
379387
}
380388

@@ -534,6 +542,8 @@ pub enum Token<'input> {
534542
False,
535543
#[regex("(?i:First)")]
536544
First,
545+
#[regex("(?i:For)")]
546+
For,
537547
#[regex("(?i:Full)")]
538548
Full,
539549
#[regex("(?i:From)")]
@@ -614,6 +624,65 @@ pub enum Token<'input> {
614624
With,
615625
}
616626

627+
impl<'input> Token<'input> {
628+
pub fn is_keyword(&self) -> bool {
629+
matches!(
630+
self,
631+
Token::All
632+
| Token::Asc
633+
| Token::And
634+
| Token::As
635+
| Token::At
636+
| Token::Between
637+
| Token::By
638+
| Token::Cross
639+
| Token::Desc
640+
| Token::Distinct
641+
| Token::Escape
642+
| Token::Except
643+
| Token::First
644+
| Token::For
645+
| Token::Full
646+
| Token::From
647+
| Token::Group
648+
| Token::Having
649+
| Token::In
650+
| Token::Inner
651+
| Token::Is
652+
| Token::Intersect
653+
| Token::Join
654+
| Token::Last
655+
| Token::Lateral
656+
| Token::Left
657+
| Token::Like
658+
| Token::Limit
659+
| Token::Missing
660+
| Token::Natural
661+
| Token::Not
662+
| Token::Null
663+
| Token::Nulls
664+
| Token::Offset
665+
| Token::On
666+
| Token::Or
667+
| Token::Order
668+
| Token::Outer
669+
| Token::Partial
670+
| Token::Pivot
671+
| Token::Preserve
672+
| Token::Right
673+
| Token::Select
674+
| Token::Then
675+
| Token::Union
676+
| Token::Unpivot
677+
| Token::Using
678+
| Token::Value
679+
| Token::Values
680+
| Token::Where
681+
| Token::With
682+
)
683+
}
684+
}
685+
617686
impl<'input> fmt::Display for Token<'input> {
618687
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
619688
match self {
@@ -677,6 +746,7 @@ impl<'input> fmt::Display for Token<'input> {
677746
| Token::Except
678747
| Token::False
679748
| Token::First
749+
| Token::For
680750
| Token::Full
681751
| Token::From
682752
| Token::Group
@@ -740,7 +810,7 @@ mod tests {
740810
let keywords =
741811
"WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \
742812
On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \
743-
Intersect Is Inner In Having Group From Full First False Except Escape Desc \
813+
Intersect Is Inner In Having Group From For Full First False Except Escape Desc \
744814
Cross By Between At As And Asc All Values Case When Then Else End";
745815
let symbols = symbols.split(' ').chain(primitives.split(' '));
746816
let keywords = keywords.split(' ');
@@ -761,7 +831,7 @@ mod tests {
761831
"LIMIT", "/", "LIKE", "^", "LEFT", ".", "LATERAL", "||", "LAST", ":", "JOIN",
762832
"--", "INTERSECT", "/**/","IS", "<unquoted_ident:UNQUOTED_IDENT>", "INNER",
763833
"<quoted_ident:QUOTED_IDENT>", "IN", "<unquoted_atident:UNQUOTED_ATIDENT>", "HAVING",
764-
"<quoted_atident:QUOTED_ATIDENT>", "GROUP", "FROM", "FULL", "FIRST", "FALSE", "EXCEPT",
834+
"<quoted_atident:QUOTED_ATIDENT>", "GROUP", "FROM", "FOR", "FULL", "FIRST", "FALSE", "EXCEPT",
765835
"ESCAPE", "DESC", "CROSS", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES",
766836
"CASE", "WHEN", "THEN", "ELSE", "END",
767837
];
@@ -854,18 +924,30 @@ mod tests {
854924
let lexer = PartiqlLexer::new(query, &mut offset_tracker);
855925
let toks: Vec<_> = lexer.collect::<Result<_, _>>()?;
856926

927+
let mut pre_offset_tracker = LineOffsetTracker::default();
928+
let pre_lexer = PartiqlLexer::new(query, &mut pre_offset_tracker);
929+
let pre_toks: Vec<_> = pre_lexer.collect::<Result<_, _>>()?;
930+
931+
let expected_toks = vec![
932+
Token::Select,
933+
Token::UnquotedIdent("g"),
934+
Token::From,
935+
Token::QuotedIdent("data"),
936+
Token::Group,
937+
Token::By,
938+
Token::UnquotedIdent("a"),
939+
];
857940
assert_eq!(
858-
vec![
859-
Token::Select,
860-
Token::UnquotedIdent("g"),
861-
Token::From,
862-
Token::QuotedIdent("data"),
863-
Token::Group,
864-
Token::By,
865-
Token::UnquotedIdent("a")
866-
],
941+
expected_toks,
867942
toks.into_iter().map(|(_s, t, _e)| t).collect::<Vec<_>>()
868943
);
944+
assert_eq!(
945+
expected_toks,
946+
pre_toks
947+
.into_iter()
948+
.map(|(_s, t, _e)| t)
949+
.collect::<Vec<_>>()
950+
);
869951

870952
assert_eq!(offset_tracker.num_lines(), 3);
871953
assert_eq!(

partiql-parser/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
2222
mod lexer;
2323
mod parse;
24+
mod preprocessor;
2425
mod result;
26+
mod token_parser;
2527

2628
pub use result::LexError;
2729
pub use result::LexicalError;

0 commit comments

Comments
 (0)