9
9
10
10
use crate :: peg:: { PairExt , PairsExt , PartiQLParser , Rule } ;
11
11
use crate :: prelude:: * ;
12
+ use bigdecimal:: BigDecimal ;
13
+ use num_bigint:: BigInt ;
14
+ use num_traits:: Num ;
12
15
use pest:: iterators:: Pair ;
13
- use pest:: Parser ;
16
+ use pest:: { Parser , RuleType } ;
14
17
use std:: borrow:: Cow ;
15
18
16
19
/// The parsed content associated with a [`Token`] that has been scanned.
17
20
#[ derive( Clone , Debug , Eq , PartialEq ) ]
18
21
pub enum Content < ' val > {
19
22
/// A PartiQL keyword. Contains the slice for the keyword case folded to upper case.
20
23
Keyword ( Cow < ' val , str > ) ,
24
+
21
25
/// An identifier. Contains the slice for the text of the identifier.
22
26
Identifier ( Cow < ' val , str > ) ,
27
+
28
+ /// An integer literal. Stores this as an as a [`BigInt`].
29
+ ///
30
+ /// Users will likely deal with smaller integers and encode this in execution/compilation
31
+ /// as `i64` or the like, but the parser need not deal with that detail.
32
+ IntegerLiteral ( BigInt ) ,
33
+
34
+ /// A decimal literal. Contains the parsed [`BigDecimal`] for the literal.
35
+ DecimalLiteral ( BigDecimal ) ,
36
+
23
37
/// A string literal. Contains the slice for the content of the literal.
24
38
StringLiteral ( Cow < ' val , str > ) ,
25
39
// TODO things like literals, punctuation, etc.
@@ -123,6 +137,18 @@ fn normalize_quoted_ident(raw_text: &str) -> Cow<str> {
123
137
. into ( )
124
138
}
125
139
140
+ fn parse_num < T , R , E > ( pair : Pair < R > ) -> ParserResult < T >
141
+ where
142
+ T : Num < FromStrRadixErr = E > ,
143
+ R : RuleType ,
144
+ E : std:: fmt:: Display ,
145
+ {
146
+ match T :: from_str_radix ( pair. as_str ( ) , 10 ) {
147
+ Ok ( value) => Ok ( value) ,
148
+ Err ( e) => pair. syntax_error ( format ! ( "Could not parse number {}: {}" , pair. as_str( ) , e) ) ,
149
+ }
150
+ }
151
+
126
152
impl < ' val > PartiQLScanner < ' val > {
127
153
fn do_next_token ( & mut self ) -> ParserResult < Token < ' val > > {
128
154
// the scanner rule is expected to return a single node
@@ -144,10 +170,20 @@ impl<'val> PartiQLScanner<'val> {
144
170
Rule :: QuotedIdentifier => {
145
171
Content :: Identifier ( normalize_quoted_ident ( ident_pair. as_str ( ) ) )
146
172
}
147
- _ => return ident_pair. syntax_error ( ) ,
173
+ _ => return ident_pair. unexpected ( ) ,
174
+ }
175
+ }
176
+ Rule :: Number => {
177
+ let number_pair = pair. into_inner ( ) . exactly_one ( ) ?;
178
+ match number_pair. as_rule ( ) {
179
+ Rule :: Integer => Content :: IntegerLiteral ( parse_num ( number_pair) ?) ,
180
+ Rule :: Decimal | Rule :: DecimalExp => {
181
+ Content :: DecimalLiteral ( parse_num ( number_pair) ?)
182
+ }
183
+ _ => return number_pair. unexpected ( ) ,
148
184
}
149
185
}
150
- _ => return pair. syntax_error ( ) ,
186
+ _ => return pair. unexpected ( ) ,
151
187
} ;
152
188
153
189
Ok ( Token {
@@ -292,7 +328,7 @@ mod test {
292
328
]
293
329
) ]
294
330
#[ case:: bad_identifier(
295
- " 99ranch " ,
331
+ " 💩 " ,
296
332
vec![
297
333
syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 9 ) ) ,
298
334
]
@@ -349,6 +385,174 @@ mod test {
349
385
syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 32 ) ) ,
350
386
]
351
387
) ]
388
+ #[ case:: numeric_literals(
389
+ "1 -0099 1.1 +00055.023100 99.1234e0010" ,
390
+ vec![
391
+ Ok ( Token {
392
+ content: Content :: IntegerLiteral ( 1 . into( ) ) ,
393
+ start: LineAndColumn :: at( 1 , 1 ) ,
394
+ end: LineAndColumn :: at( 1 , 2 ) ,
395
+ text: "1" ,
396
+ remainder: Remainder {
397
+ input: " -0099 1.1 +00055.023100 99.1234e0010" ,
398
+ offset: LineAndColumn :: at( 1 , 2 )
399
+ }
400
+ } ) ,
401
+ Ok ( Token {
402
+ content: Content :: IntegerLiteral ( BigInt :: from( -99 ) ) ,
403
+ start: LineAndColumn :: at( 1 , 3 ) ,
404
+ end: LineAndColumn :: at( 1 , 8 ) ,
405
+ text: "-0099" ,
406
+ remainder: Remainder {
407
+ input: " 1.1 +00055.023100 99.1234e0010" ,
408
+ offset: LineAndColumn :: at( 1 , 8 )
409
+ }
410
+ } ) ,
411
+ Ok ( Token {
412
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "1.1" , 10 ) . unwrap( ) ) ,
413
+ start: LineAndColumn :: at( 1 , 9 ) ,
414
+ end: LineAndColumn :: at( 1 , 12 ) ,
415
+ text: "1.1" ,
416
+ remainder: Remainder {
417
+ input: " +00055.023100 99.1234e0010" ,
418
+ offset: LineAndColumn :: at( 1 , 12 )
419
+ }
420
+ } ) ,
421
+ Ok ( Token {
422
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "55.023100" , 10 ) . unwrap( ) ) ,
423
+ start: LineAndColumn :: at( 1 , 13 ) ,
424
+ end: LineAndColumn :: at( 1 , 26 ) ,
425
+ text: "+00055.023100" ,
426
+ remainder: Remainder {
427
+ input: " 99.1234e0010" ,
428
+ offset: LineAndColumn :: at( 1 , 26 )
429
+ }
430
+ } ) ,
431
+ Ok ( Token {
432
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "99.1234e10" , 10 ) . unwrap( ) ) ,
433
+ start: LineAndColumn :: at( 1 , 27 ) ,
434
+ end: LineAndColumn :: at( 1 , 39 ) ,
435
+ text: "99.1234e0010" ,
436
+ remainder: Remainder {
437
+ input: "" ,
438
+ offset: LineAndColumn :: at( 1 , 39 )
439
+ }
440
+ } ) ,
441
+ syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 39 ) ) ,
442
+ ]
443
+ ) ]
444
+ #[ case:: numeric_literals_with_pads(
445
+ "+0005 .0001 -00.0002 000003.004E+001" ,
446
+ vec![
447
+ Ok ( Token {
448
+ content: Content :: IntegerLiteral ( 5 . into( ) ) ,
449
+ start: LineAndColumn :: at( 1 , 1 ) ,
450
+ end: LineAndColumn :: at( 1 , 6 ) ,
451
+ text: "+0005" ,
452
+ remainder: Remainder {
453
+ input: " .0001 -00.0002 000003.004E+001" ,
454
+ offset: LineAndColumn :: at( 1 , 6 )
455
+ }
456
+ } ) ,
457
+ Ok ( Token {
458
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0001" , 10 ) . unwrap( ) ) ,
459
+ start: LineAndColumn :: at( 1 , 7 ) ,
460
+ end: LineAndColumn :: at( 1 , 12 ) ,
461
+ text: ".0001" ,
462
+ remainder: Remainder {
463
+ input: " -00.0002 000003.004E+001" ,
464
+ offset: LineAndColumn :: at( 1 , 12 )
465
+ }
466
+ } ) ,
467
+ Ok ( Token {
468
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "-0.0002" , 10 ) . unwrap( ) ) ,
469
+ start: LineAndColumn :: at( 1 , 13 ) ,
470
+ end: LineAndColumn :: at( 1 , 21 ) ,
471
+ text: "-00.0002" ,
472
+ remainder: Remainder {
473
+ input: " 000003.004E+001" ,
474
+ offset: LineAndColumn :: at( 1 , 21 )
475
+ }
476
+ } ) ,
477
+ Ok ( Token {
478
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "3.004e1" , 10 ) . unwrap( ) ) ,
479
+ start: LineAndColumn :: at( 1 , 22 ) ,
480
+ end: LineAndColumn :: at( 1 , 37 ) ,
481
+ text: "000003.004E+001" ,
482
+ remainder: Remainder {
483
+ input: "" ,
484
+ offset: LineAndColumn :: at( 1 , 37 )
485
+ }
486
+ } ) ,
487
+ syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 37 ) ) ,
488
+ ]
489
+ ) ]
490
+ #[ case:: zeroes(
491
+ "0 000 .0 000.000 .0e0 0.0e000" ,
492
+ vec![
493
+ Ok ( Token {
494
+ content: Content :: IntegerLiteral ( 0 . into( ) ) ,
495
+ start: LineAndColumn :: at( 1 , 1 ) ,
496
+ end: LineAndColumn :: at( 1 , 2 ) ,
497
+ text: "0" ,
498
+ remainder: Remainder {
499
+ input: " 000 .0 000.000 .0e0 0.0e000" ,
500
+ offset: LineAndColumn :: at( 1 , 2 )
501
+ }
502
+ } ) ,
503
+ Ok ( Token {
504
+ content: Content :: IntegerLiteral ( 0 . into( ) ) ,
505
+ start: LineAndColumn :: at( 1 , 3 ) ,
506
+ end: LineAndColumn :: at( 1 , 6 ) ,
507
+ text: "000" ,
508
+ remainder: Remainder {
509
+ input: " .0 000.000 .0e0 0.0e000" ,
510
+ offset: LineAndColumn :: at( 1 , 6 )
511
+ }
512
+ } ) ,
513
+ Ok ( Token {
514
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0" , 10 ) . unwrap( ) ) ,
515
+ start: LineAndColumn :: at( 1 , 7 ) ,
516
+ end: LineAndColumn :: at( 1 , 9 ) ,
517
+ text: ".0" ,
518
+ remainder: Remainder {
519
+ input: " 000.000 .0e0 0.0e000" ,
520
+ offset: LineAndColumn :: at( 1 , 9 )
521
+ }
522
+ } ) ,
523
+ Ok ( Token {
524
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.000" , 10 ) . unwrap( ) ) ,
525
+ start: LineAndColumn :: at( 1 , 10 ) ,
526
+ end: LineAndColumn :: at( 1 , 17 ) ,
527
+ text: "000.000" ,
528
+ remainder: Remainder {
529
+ input: " .0e0 0.0e000" ,
530
+ offset: LineAndColumn :: at( 1 , 17 )
531
+ }
532
+ } ) ,
533
+ Ok ( Token {
534
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0" , 10 ) . unwrap( ) ) ,
535
+ start: LineAndColumn :: at( 1 , 18 ) ,
536
+ end: LineAndColumn :: at( 1 , 22 ) ,
537
+ text: ".0e0" ,
538
+ remainder: Remainder {
539
+ input: " 0.0e000" ,
540
+ offset: LineAndColumn :: at( 1 , 22 )
541
+ }
542
+ } ) ,
543
+ Ok ( Token {
544
+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0" , 10 ) . unwrap( ) ) ,
545
+ start: LineAndColumn :: at( 1 , 23 ) ,
546
+ end: LineAndColumn :: at( 1 , 30 ) ,
547
+ text: "0.0e000" ,
548
+ remainder: Remainder {
549
+ input: "" ,
550
+ offset: LineAndColumn :: at( 1 , 30 )
551
+ }
552
+ } ) ,
553
+ syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 30 ) ) ,
554
+ ]
555
+ ) ]
352
556
#[ case:: select_from(
353
557
r#"SelEct '✨✨✨' fROM "┬─┬" "# ,
354
558
vec![
0 commit comments