@@ -2,7 +2,9 @@ use partiql_source_map::location::{ByteOffset, BytePosition, ToLocated};
2
2
use std:: borrow:: Cow ;
3
3
4
4
use logos:: { Logos , Span } ;
5
+
5
6
use std:: cmp:: max;
7
+
6
8
use std:: fmt;
7
9
use std:: fmt:: Formatter ;
8
10
@@ -67,7 +69,7 @@ impl<'input, 'tracker> CommentLexer<'input, 'tracker> {
67
69
}
68
70
69
71
/// Parses a single (possibly nested) block comment and returns it
70
- fn next ( & mut self ) -> Option < CommentStringResult < ' input > > {
72
+ fn next_internal ( & mut self ) -> Option < CommentStringResult < ' input > > {
71
73
let Span { start, .. } = self . lexer . span ( ) ;
72
74
let mut nesting = 0 ;
73
75
let nesting_inc = if self . comment_nesting { 1 } else { 0 } ;
@@ -114,7 +116,7 @@ impl<'input, 'tracker> Iterator for CommentLexer<'input, 'tracker> {
114
116
115
117
#[ inline( always) ]
116
118
fn next ( & mut self ) -> Option < Self :: Item > {
117
- self . next ( )
119
+ self . next_internal ( )
118
120
}
119
121
}
120
122
@@ -177,7 +179,7 @@ impl<'input, 'tracker> EmbeddedIonLexer<'input, 'tracker> {
177
179
}
178
180
179
181
/// Parses a single embedded ion value, quoted between backticks (`), and returns it
180
- fn next ( & mut self ) -> Option < EmbeddedIonStringResult < ' input > > {
182
+ fn next_internal ( & mut self ) -> Option < EmbeddedIonStringResult < ' input > > {
181
183
let next_token = self . lexer . next ( ) ;
182
184
match next_token {
183
185
Some ( EmbeddedIonToken :: Embed ) => {
@@ -197,7 +199,7 @@ impl<'input, 'tracker> EmbeddedIonLexer<'input, 'tracker> {
197
199
let mut comment_tracker = LineOffsetTracker :: default ( ) ;
198
200
let mut comment_lexer =
199
201
CommentLexer :: new ( remaining, & mut comment_tracker) ;
200
- match comment_lexer. next ( ) {
202
+ match comment_lexer. next_internal ( ) {
201
203
Some ( Ok ( ( s, _c, e) ) ) => {
202
204
self . tracker . append ( & comment_tracker, embed. start . into ( ) ) ;
203
205
self . lexer . bump ( ( e - s) . to_usize ( ) - embed. len ( ) )
@@ -248,18 +250,19 @@ impl<'input, 'tracker> Iterator for EmbeddedIonLexer<'input, 'tracker> {
248
250
249
251
#[ inline( always) ]
250
252
fn next ( & mut self ) -> Option < Self :: Item > {
251
- self . next ( )
253
+ self . next_internal ( )
252
254
}
253
255
}
254
256
255
- /// A lexer from PartiQL text strings to [`LexicalToken `]s
257
+ /// A lexer from PartiQL text strings to [`Token `]s
256
258
pub ( crate ) struct PartiqlLexer < ' input , ' tracker > {
257
259
/// Wrap a logos-generated lexer
258
260
lexer : logos:: Lexer < ' input , Token < ' input > > ,
259
261
tracker : & ' tracker mut LineOffsetTracker ,
260
262
}
261
263
262
- type InternalLexResult < ' input > = SpannedResult < Token < ' input > , ByteOffset , LexError < ' input > > ;
264
+ pub ( crate ) type InternalLexResult < ' input > =
265
+ SpannedResult < Token < ' input > , ByteOffset , LexError < ' input > > ;
263
266
pub ( crate ) type LexResult < ' input > =
264
267
Result < Spanned < Token < ' input > , ByteOffset > , ParseError < ' input , BytePosition > > ;
265
268
@@ -293,15 +296,20 @@ impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> {
293
296
Err ( ( start. into ( ) , err_ctor ( region. into ( ) ) , end. into ( ) ) )
294
297
}
295
298
296
- /// Wraps a [`Token`] into a [`LexicalToken`] at the current position of the lexer.
299
+ pub fn slice ( & self ) -> & ' input str {
300
+ self . lexer . slice ( )
301
+ }
302
+
303
+ /// Wraps a [`Token`] into a [`Token`] at the current position of the lexer.
297
304
#[ inline( always) ]
298
305
fn wrap ( & mut self , token : Token < ' input > ) -> InternalLexResult < ' input > {
299
306
let Span { start, end } = self . lexer . span ( ) ;
300
307
Ok ( ( start. into ( ) , token, end. into ( ) ) )
301
308
}
302
309
303
- /// Advances the iterator and returns the next [`LexicalToken`] or [`None`] when input is exhausted.
304
- fn next ( & mut self ) -> Option < InternalLexResult < ' input > > {
310
+ /// Advances the iterator and returns the next [`Token`] or [`None`] when input is exhausted.
311
+ #[ inline]
312
+ pub ( crate ) fn next_internal ( & mut self ) -> Option < InternalLexResult < ' input > > {
305
313
' next_tok: loop {
306
314
return match self . lexer . next ( ) {
307
315
None => None ,
@@ -330,7 +338,7 @@ impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> {
330
338
let remaining = & self . lexer . source ( ) [ embed. start ..] ;
331
339
let mut comment_tracker = LineOffsetTracker :: default ( ) ;
332
340
let mut comment_lexer = CommentLexer :: new ( remaining, & mut comment_tracker) . with_nesting ( ) ;
333
- comment_lexer. next ( ) . map ( |res| match res {
341
+ comment_lexer. next_internal ( ) . map ( |res| match res {
334
342
Ok ( ( s, comment, e) ) => {
335
343
let val_len = e - s;
336
344
let val_start = embed. start . into ( ) ; // embed end is 1 past the starting '/*'
@@ -352,7 +360,7 @@ impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> {
352
360
let remaining = & self . lexer . source ( ) [ embed. start ..] ;
353
361
let mut ion_tracker = LineOffsetTracker :: default ( ) ;
354
362
let mut ion_lexer = EmbeddedIonLexer :: new ( remaining, & mut ion_tracker) ;
355
- ion_lexer. next ( ) . map ( |res| match res {
363
+ ion_lexer. next_internal ( ) . map ( |res| match res {
356
364
Ok ( ( s, ion, e) ) => {
357
365
let val_len = e - s;
358
366
let val_start = embed. end . into ( ) ; // embed end is 1 past the starting '`'
@@ -374,7 +382,7 @@ impl<'input, 'tracker> Iterator for PartiqlLexer<'input, 'tracker> {
374
382
375
383
#[ inline( always) ]
376
384
fn next ( & mut self ) -> Option < Self :: Item > {
377
- self . next ( ) . map ( |res| res. map_err ( |e| e. into ( ) ) )
385
+ self . next_internal ( ) . map ( |res| res. map_err ( |e| e. into ( ) ) )
378
386
}
379
387
}
380
388
@@ -534,6 +542,8 @@ pub enum Token<'input> {
534
542
False ,
535
543
#[ regex( "(?i:First)" ) ]
536
544
First ,
545
+ #[ regex( "(?i:For)" ) ]
546
+ For ,
537
547
#[ regex( "(?i:Full)" ) ]
538
548
Full ,
539
549
#[ regex( "(?i:From)" ) ]
@@ -614,6 +624,65 @@ pub enum Token<'input> {
614
624
With ,
615
625
}
616
626
627
+ impl < ' input > Token < ' input > {
628
+ pub fn is_keyword ( & self ) -> bool {
629
+ matches ! (
630
+ self ,
631
+ Token :: All
632
+ | Token :: Asc
633
+ | Token :: And
634
+ | Token :: As
635
+ | Token :: At
636
+ | Token :: Between
637
+ | Token :: By
638
+ | Token :: Cross
639
+ | Token :: Desc
640
+ | Token :: Distinct
641
+ | Token :: Escape
642
+ | Token :: Except
643
+ | Token :: First
644
+ | Token :: For
645
+ | Token :: Full
646
+ | Token :: From
647
+ | Token :: Group
648
+ | Token :: Having
649
+ | Token :: In
650
+ | Token :: Inner
651
+ | Token :: Is
652
+ | Token :: Intersect
653
+ | Token :: Join
654
+ | Token :: Last
655
+ | Token :: Lateral
656
+ | Token :: Left
657
+ | Token :: Like
658
+ | Token :: Limit
659
+ | Token :: Missing
660
+ | Token :: Natural
661
+ | Token :: Not
662
+ | Token :: Null
663
+ | Token :: Nulls
664
+ | Token :: Offset
665
+ | Token :: On
666
+ | Token :: Or
667
+ | Token :: Order
668
+ | Token :: Outer
669
+ | Token :: Partial
670
+ | Token :: Pivot
671
+ | Token :: Preserve
672
+ | Token :: Right
673
+ | Token :: Select
674
+ | Token :: Then
675
+ | Token :: Union
676
+ | Token :: Unpivot
677
+ | Token :: Using
678
+ | Token :: Value
679
+ | Token :: Values
680
+ | Token :: Where
681
+ | Token :: With
682
+ )
683
+ }
684
+ }
685
+
617
686
impl < ' input > fmt:: Display for Token < ' input > {
618
687
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
619
688
match self {
@@ -677,6 +746,7 @@ impl<'input> fmt::Display for Token<'input> {
677
746
| Token :: Except
678
747
| Token :: False
679
748
| Token :: First
749
+ | Token :: For
680
750
| Token :: Full
681
751
| Token :: From
682
752
| Token :: Group
@@ -740,7 +810,7 @@ mod tests {
740
810
let keywords =
741
811
"WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \
742
812
On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \
743
- Intersect Is Inner In Having Group From Full First False Except Escape Desc \
813
+ Intersect Is Inner In Having Group From For Full First False Except Escape Desc \
744
814
Cross By Between At As And Asc All Values Case When Then Else End";
745
815
let symbols = symbols. split ( ' ' ) . chain ( primitives. split ( ' ' ) ) ;
746
816
let keywords = keywords. split ( ' ' ) ;
@@ -761,7 +831,7 @@ mod tests {
761
831
"LIMIT" , "/" , "LIKE" , "^" , "LEFT" , "." , "LATERAL" , "||" , "LAST" , ":" , "JOIN" ,
762
832
"--" , "INTERSECT" , "/**/" , "IS" , "<unquoted_ident:UNQUOTED_IDENT>" , "INNER" ,
763
833
"<quoted_ident:QUOTED_IDENT>" , "IN" , "<unquoted_atident:UNQUOTED_ATIDENT>" , "HAVING" ,
764
- "<quoted_atident:QUOTED_ATIDENT>" , "GROUP" , "FROM" , "FULL" , "FIRST" , "FALSE" , "EXCEPT" ,
834
+ "<quoted_atident:QUOTED_ATIDENT>" , "GROUP" , "FROM" , "FOR" , " FULL", "FIRST" , "FALSE" , "EXCEPT" ,
765
835
"ESCAPE" , "DESC" , "CROSS" , "BY" , "BETWEEN" , "AT" , "AS" , "AND" , "ASC" , "ALL" , "VALUES" ,
766
836
"CASE" , "WHEN" , "THEN" , "ELSE" , "END" ,
767
837
] ;
@@ -854,18 +924,30 @@ mod tests {
854
924
let lexer = PartiqlLexer :: new ( query, & mut offset_tracker) ;
855
925
let toks: Vec < _ > = lexer. collect :: < Result < _ , _ > > ( ) ?;
856
926
927
+ let mut pre_offset_tracker = LineOffsetTracker :: default ( ) ;
928
+ let pre_lexer = PartiqlLexer :: new ( query, & mut pre_offset_tracker) ;
929
+ let pre_toks: Vec < _ > = pre_lexer. collect :: < Result < _ , _ > > ( ) ?;
930
+
931
+ let expected_toks = vec ! [
932
+ Token :: Select ,
933
+ Token :: UnquotedIdent ( "g" ) ,
934
+ Token :: From ,
935
+ Token :: QuotedIdent ( "data" ) ,
936
+ Token :: Group ,
937
+ Token :: By ,
938
+ Token :: UnquotedIdent ( "a" ) ,
939
+ ] ;
857
940
assert_eq ! (
858
- vec![
859
- Token :: Select ,
860
- Token :: UnquotedIdent ( "g" ) ,
861
- Token :: From ,
862
- Token :: QuotedIdent ( "data" ) ,
863
- Token :: Group ,
864
- Token :: By ,
865
- Token :: UnquotedIdent ( "a" )
866
- ] ,
941
+ expected_toks,
867
942
toks. into_iter( ) . map( |( _s, t, _e) | t) . collect:: <Vec <_>>( )
868
943
) ;
944
+ assert_eq ! (
945
+ expected_toks,
946
+ pre_toks
947
+ . into_iter( )
948
+ . map( |( _s, t, _e) | t)
949
+ . collect:: <Vec <_>>( )
950
+ ) ;
869
951
870
952
assert_eq ! ( offset_tracker. num_lines( ) , 3 ) ;
871
953
assert_eq ! (
0 commit comments