@@ -67,6 +67,34 @@ impl From<ProtobufFloatParseError> for LexerError {
67
67
}
68
68
}
69
69
70
+ /// The raw bytes for a single char or escape sequence in a string literal
71
+ ///
72
+ /// The raw bytes are available via an `into_iter` implementation.
73
+ pub ( crate ) struct DecodedBytes {
74
+ // a single char can be up to 4-bytes when encoded in utf-8
75
+ buf : [ u8 ; 4 ] ,
76
+ len : usize ,
77
+ }
78
+
79
+ impl DecodedBytes {
80
+ fn byte ( b : u8 ) -> DecodedBytes {
81
+ DecodedBytes {
82
+ buf : [ b, 0 , 0 , 0 ] ,
83
+ len : 1 ,
84
+ }
85
+ }
86
+
87
+ fn char ( value : char ) -> Self {
88
+ let mut buf = [ 0 ; 4 ] ;
89
+ let len = value. encode_utf8 ( & mut buf) . len ( ) ;
90
+ DecodedBytes { buf, len }
91
+ }
92
+
93
+ pub ( crate ) fn bytes ( & self ) -> & [ u8 ] {
94
+ & self . buf [ ..self . len ]
95
+ }
96
+ }
97
+
70
98
#[ derive( Copy , Clone ) ]
71
99
pub struct Lexer < ' a > {
72
100
language : ParserLanguage ,
@@ -440,24 +468,24 @@ impl<'a> Lexer<'a> {
440
468
// octEscape = '\' octalDigit octalDigit octalDigit
441
469
// charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
442
470
// quote = "'" | '"'
443
- pub fn next_byte_value ( & mut self ) -> LexerResult < u8 > {
471
+ pub ( crate ) fn next_str_lit_bytes ( & mut self ) -> LexerResult < DecodedBytes > {
444
472
match self . next_char ( ) ? {
445
473
'\\' => {
446
474
match self . next_char ( ) ? {
447
- '\'' => Ok ( b'\'' ) ,
448
- '"' => Ok ( b'"' ) ,
449
- '\\' => Ok ( b'\\' ) ,
450
- 'a' => Ok ( b'\x07' ) ,
451
- 'b' => Ok ( b'\x08' ) ,
452
- 'f' => Ok ( b'\x0c' ) ,
453
- 'n' => Ok ( b'\n' ) ,
454
- 'r' => Ok ( b'\r' ) ,
455
- 't' => Ok ( b'\t' ) ,
456
- 'v' => Ok ( b'\x0b' ) ,
475
+ '\'' => Ok ( DecodedBytes :: byte ( b'\'' ) ) ,
476
+ '"' => Ok ( DecodedBytes :: byte ( b'"' ) ) ,
477
+ '\\' => Ok ( DecodedBytes :: byte ( b'\\' ) ) ,
478
+ 'a' => Ok ( DecodedBytes :: byte ( b'\x07' ) ) ,
479
+ 'b' => Ok ( DecodedBytes :: byte ( b'\x08' ) ) ,
480
+ 'f' => Ok ( DecodedBytes :: byte ( b'\x0c' ) ) ,
481
+ 'n' => Ok ( DecodedBytes :: byte ( b'\n' ) ) ,
482
+ 'r' => Ok ( DecodedBytes :: byte ( b'\r' ) ) ,
483
+ 't' => Ok ( DecodedBytes :: byte ( b'\t' ) ) ,
484
+ 'v' => Ok ( DecodedBytes :: byte ( b'\x0b' ) ) ,
457
485
'x' => {
458
486
let d1 = self . next_hex_digit ( ) ? as u8 ;
459
487
let d2 = self . next_hex_digit ( ) ? as u8 ;
460
- Ok ( ( ( d1 << 4 ) | d2) as u8 )
488
+ Ok ( DecodedBytes :: byte ( ( d1 << 4 ) | d2) )
461
489
}
462
490
d if d >= '0' && d <= '7' => {
463
491
let mut r = d as u8 - b'0' ;
@@ -467,16 +495,14 @@ impl<'a> Lexer<'a> {
467
495
Ok ( d) => r = ( r << 3 ) + d as u8 ,
468
496
}
469
497
}
470
- Ok ( r )
498
+ Ok ( DecodedBytes :: byte ( r ) )
471
499
}
472
500
// https://github.com/google/protobuf/issues/4562
473
- // TODO: overflow
474
- c => Ok ( c as u8 ) ,
501
+ c => Ok ( DecodedBytes :: char ( c) ) ,
475
502
}
476
503
}
477
504
'\n' | '\0' => Err ( LexerError :: IncorrectInput ) ,
478
- // TODO: check overflow
479
- c => Ok ( c as u8 ) ,
505
+ c => Ok ( DecodedBytes :: char ( c) ) ,
480
506
}
481
507
}
482
508
@@ -530,7 +556,7 @@ impl<'a> Lexer<'a> {
530
556
} ;
531
557
first = false ;
532
558
while self . lookahead_char ( ) != Some ( q) {
533
- self . next_byte_value ( ) ?;
559
+ self . next_str_lit_bytes ( ) ?;
534
560
}
535
561
self . next_char_expect_eq ( q) ?;
536
562
0 commit comments