Skip to content

Commit d257e3e

Browse files
authored
Adds the scanner rules for punctuation (#30)
Adds the Pest grammar rules for these tokens and particularly adds the lookahead assertions for the various edge cases around the punctuation tokens that interact with each other, comments, and decimals. Adds special `Content` enum variants for `.`/`*`/`?` and a basic variant for `Operator` and `Delimiter`. This should the final pre-requisite for all of the terminal parse rules in the PEG and allow us to start adding the parser rules for expressions. An explicit TODO is around modeling the various operators as their own enum or enum variants, right now they are returned as normalized string content.
1 parent d309746 commit d257e3e

File tree

2 files changed

+208
-7
lines changed

2 files changed

+208
-7
lines changed

partiql-parser/src/partiql.pest

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,88 @@ Token = _{
2121
| String
2222
| Number
2323
| Identifier
24+
| Punctuation
2425
}
2526

27+
//
28+
// Punctuation
29+
//
30+
31+
Punctuation = _{
32+
Dot_
33+
| Star_
34+
| Parameter
35+
| Operator
36+
| Delimiter
37+
}
38+
39+
// pathing operator and has some special meaning beyond a normal operator
40+
// (e.g. wildcard paths)
41+
Dot_ = { "." }
42+
43+
// multiplication operator and wildcard
44+
Star_ = { "*" }
45+
46+
// parameter variable
47+
Parameter = @{ QuestionMark_ }
48+
QuestionMark_ = { "?" }
49+
50+
// punctuation that are operators--does not count keyword operators
51+
Operator = @{
52+
LexicalScope_
53+
| Plus_
54+
| Minus_
55+
| Divide_
56+
| Modulus_
57+
| Less_
58+
| LessEq_
59+
| Greater_
60+
| GreaterEq_
61+
| Eq_
62+
| NotEq_
63+
| Concat_
64+
}
65+
66+
LexicalScope_ = { "@" }
67+
Plus_ = { "+" }
68+
Minus_ = @{ "-" ~ !"-" }
69+
Divide_ = @{ "/" ~ !"*" }
70+
Modulus_ = { "%" }
71+
Less_ = @{ "<" ~ !("<" | "=" | ">") }
72+
LessEq_ = { "<=" }
73+
Greater_ = @{ ">" ~ !(">" | "=") }
74+
GreaterEq_ = { ">=" }
75+
Eq_ = { "=" }
76+
NotEq_ = { ("<>" | "!=") }
77+
Concat_ = { "||" }
78+
79+
// punctuation that delimit things in the grammar
80+
Delimiter = @ {
81+
Comma_
82+
| Colon_
83+
| SemiColon_
84+
| LeftParen_
85+
| RightParen_
86+
| LeftBracket_
87+
| RightBracket_
88+
| LeftCurly_
89+
| RightCurly_
90+
| LeftDoubleAngle_
91+
| RightDoubleAngle_
92+
}
93+
94+
Comma_ = { "," }
95+
Colon_ = { ":" }
96+
SemiColon_ = { ";" }
97+
LeftParen_ = { "(" }
98+
RightParen_ = { ")" }
99+
LeftBracket_ = { "[" }
100+
RightBracket_ = { "]" }
101+
LeftCurly_ = { "{" }
102+
RightCurly_ = { "}" }
103+
LeftDoubleAngle_ = { "<<" }
104+
RightDoubleAngle_ = { ">>" }
105+
26106
//
27107
// Numeric Literals
28108
//
@@ -37,12 +117,14 @@ DecimalExp = {
37117
Decimal ~ ("e" | "E") ~ Integer
38118
}
39119

120+
// XXX `.nnn` and `nnn.` are okay, but `.` on its own definitely is not
40121
Decimal = {
41-
Integer? ~ "." ~ Fraction
122+
Integer? ~ "." ~ Fraction
123+
| Integer ~ "."
42124
}
43125

44126
Fraction = {
45-
Digit*
127+
Digit+
46128
}
47129

48130
// XXX this explicitly supports arbitrary zero prefixing in various places

partiql-parser/src/scanner.rs

Lines changed: 124 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ use pest::iterators::Pair;
1616
use pest::{Parser, RuleType};
1717
use std::borrow::Cow;
1818

19+
// TODO turn operator/delimiter into enums of their own (nested or otherwise)
20+
1921
/// The parsed content associated with a [`Token`] that has been scanned.
2022
#[derive(Clone, Debug, Eq, PartialEq)]
2123
pub enum Content<'val> {
@@ -36,7 +38,22 @@ pub enum Content<'val> {
3638

3739
/// A string literal. Contains the slice for the content of the literal.
3840
StringLiteral(Cow<'val, str>),
39-
// TODO things like literals, punctuation, etc.
41+
42+
/// The `.` punctuation
43+
Dot,
44+
45+
/// The `*` operator and wildcard.
46+
Star,
47+
48+
/// The `?` placeholder for a query parameter.
49+
Parameter,
50+
51+
/// An operator represented by punctuation (as opposed to a keyword based operator).
52+
/// Contains the slice for the operator.
53+
Operator(Cow<'val, str>),
54+
55+
/// A separator character. Contains the slice for the delimiter character.
56+
Delimiter(Cow<'val, str>),
4057
}
4158

4259
/// Convenience constructor for a [`Content::Keyword`].
@@ -64,6 +81,16 @@ pub fn string_literal<'val, S: Into<Cow<'val, str>>>(text: S) -> Content<'val> {
6481
Content::StringLiteral(text.into())
6582
}
6683

84+
/// Convenience constructor for a [`Content::Operator`].
85+
pub fn operator<'val, S: Into<Cow<'val, str>>>(text: S) -> Content<'val> {
86+
Content::Operator(text.into())
87+
}
88+
89+
/// Convenience constructor for a [`Content::Operator`].
90+
pub fn delimiter<'val, S: Into<Cow<'val, str>>>(text: S) -> Content<'val> {
91+
Content::Delimiter(text.into())
92+
}
93+
6794
/// Internal type to keep track of remaining input and relative line/column information.
6895
///
6996
/// This is used to leverage the PEG to do continuation parsing and calculating the line/offset
@@ -174,6 +201,14 @@ where
174201
}
175202
}
176203

204+
fn normalize_operator(raw_text: &str) -> Cow<str> {
205+
match raw_text {
206+
"!=" => "<>",
207+
_ => raw_text,
208+
}
209+
.into()
210+
}
211+
177212
impl<'val> PartiQLScanner<'val> {
178213
fn do_next_token(&mut self) -> ParserResult<Token<'val>> {
179214
// the scanner rule is expected to return a single node
@@ -186,14 +221,14 @@ impl<'val> PartiQLScanner<'val> {
186221
self.remainder = self.remainder.consume(start_off + text.len(), pair.end()?);
187222

188223
let content = match pair.as_rule() {
189-
Rule::Keyword => Content::Keyword(text.to_uppercase().into()),
190-
Rule::String => Content::StringLiteral(normalize_string_lit(pair.as_str())),
224+
Rule::Keyword => keyword(text.to_uppercase()),
225+
Rule::String => string_literal(normalize_string_lit(pair.as_str())),
191226
Rule::Identifier => {
192227
let ident_pair = pair.into_inner().exactly_one()?;
193228
match ident_pair.as_rule() {
194-
Rule::NonQuotedIdentifier => Content::Identifier(ident_pair.as_str().into()),
229+
Rule::NonQuotedIdentifier => identifier(ident_pair.as_str()),
195230
Rule::QuotedIdentifier => {
196-
Content::Identifier(normalize_quoted_ident(ident_pair.as_str()))
231+
identifier(normalize_quoted_ident(ident_pair.as_str()))
197232
}
198233
_ => return ident_pair.unexpected(),
199234
}
@@ -208,6 +243,11 @@ impl<'val> PartiQLScanner<'val> {
208243
_ => return number_pair.unexpected(),
209244
}
210245
}
246+
Rule::Dot_ => Content::Dot,
247+
Rule::Star_ => Content::Star,
248+
Rule::Parameter => Content::Parameter,
249+
Rule::Operator => operator(normalize_operator(text)),
250+
Rule::Delimiter => delimiter(text),
211251
_ => return pair.unexpected(),
212252
};
213253

@@ -533,6 +573,85 @@ mod test {
533573
"0.0e000" => decimal_literal_from_str("0.0")
534574
]
535575
)]
576+
#[case::no_trailing_zeros(scanner_test_case!["1231231." => decimal_literal_from_str("1231231")])]
577+
#[case::delimiters(
578+
scanner_test_case![
579+
"[" => delimiter("["),
580+
"]" => delimiter("]"),
581+
"(" => delimiter("("),
582+
")" => delimiter(")"),
583+
"{" => delimiter("{"),
584+
"}" => delimiter("}"),
585+
"<<" => delimiter("<<"),
586+
">>" => delimiter(">>"),
587+
"," => delimiter(","),
588+
":" => delimiter(":"),
589+
";" => delimiter(";"),
590+
]
591+
)]
592+
#[case::operators(
593+
scanner_test_case![
594+
"@" => operator("@"),
595+
"+" => operator("+"),
596+
"-" => operator("-"),
597+
"/" => operator("/"),
598+
"%" => operator("%"),
599+
"<" => operator("<"),
600+
" ",
601+
"<=" => operator("<="),
602+
">" => operator(">"),
603+
" ",
604+
">=" => operator(">="),
605+
"=" => operator("="),
606+
"<>" => operator("<>"),
607+
"!=" => operator("<>"),
608+
]
609+
)]
610+
#[case::left_angles(
611+
scanner_test_case![
612+
"<<" => delimiter("<<"),
613+
"<<" => delimiter("<<"),
614+
"<" => operator("<"),
615+
]
616+
)]
617+
#[case::right_angles(
618+
scanner_test_case![
619+
">>" => delimiter(">>"),
620+
">>" => delimiter(">>"),
621+
">" => operator(">"),
622+
]
623+
)]
624+
#[case::balanced_angles(
625+
scanner_test_case![
626+
"<<" => delimiter("<<"),
627+
"<<" => delimiter("<<"),
628+
"<>" => operator("<>"),
629+
">>" => delimiter(">>"),
630+
">>" => delimiter(">>"),
631+
" ",
632+
"<<" => delimiter("<<"),
633+
"<=" => operator("<="),
634+
">>" => delimiter(">>"),
635+
">" => operator(">"),
636+
]
637+
)]
638+
#[case::dot(scanner_test_case!["." => Content::Dot])]
639+
#[case::star(scanner_test_case!["*" => Content::Star])]
640+
#[case::parameter(scanner_test_case!["?" => Content::Parameter])]
641+
#[case::comment_no_minus(
642+
scanner_test_case![
643+
"-------- a line comment with no minus...\n"
644+
]
645+
)]
646+
#[case::divide_block_comment(
647+
scanner_test_case![
648+
"/" => operator("/"),
649+
"/" => operator("/"),
650+
"/**/",
651+
"/" => operator("/"),
652+
"/" => operator("/"),
653+
]
654+
)]
536655
#[case::select_from(
537656
scanner_test_case![
538657
"SelEct" => keyword("SELECT"),

0 commit comments

Comments
 (0)