17
17
// This makes for some ugly code, but it is faster. Hopefully in the future
18
18
// with MIR support the compiler will get smarter about this.
19
19
20
- use std:: { str, slice, char } ;
20
+ use std:: { str, slice } ;
21
21
use object:: Object ;
22
22
use number:: Number ;
23
23
use { JsonValue , Error , Result } ;
@@ -31,16 +31,6 @@ const MAX_PRECISION: u64 = 576460752303423500;
31
31
const DEPTH_LIMIT : usize = 512 ;
32
32
33
33
34
- // Position is only used when we stumble upon an unexpected character. We don't
35
- // track lines during parsing, as that would mean doing unnecessary work.
36
- // Instead, if an error occurs, we figure out the line and column from the
37
- // current index position of the parser.
38
- struct Position {
39
- pub line : usize ,
40
- pub column : usize ,
41
- }
42
-
43
-
44
34
// The `Parser` struct keeps track of indexing over our buffer. All niceness
45
35
// has been abandonned in favor of raw pointer magic. Does that make you feel
46
36
// dirty? _Good._
@@ -91,7 +81,7 @@ macro_rules! expect_sequence {
91
81
$(
92
82
match expect_byte!( $parser) {
93
83
$ch => { } ,
94
- ch => return $parser. unexpected_character( ch ) ,
84
+ _ => return $parser. unexpected_character( ) ,
95
85
}
96
86
) *
97
87
}
@@ -131,9 +121,9 @@ macro_rules! expect_eof {
131
121
while !$parser. is_eof( ) {
132
122
match $parser. read_byte( ) {
133
123
9 ... 13 | 32 => $parser. bump( ) ,
134
- ch => {
124
+ _ => {
135
125
$parser. bump( ) ;
136
- return $parser. unexpected_character( ch ) ;
126
+ return $parser. unexpected_character( ) ;
137
127
}
138
128
}
139
129
}
@@ -147,7 +137,7 @@ macro_rules! expect {
147
137
let ch = expect_byte_ignore_whitespace!( $parser) ;
148
138
149
139
if ch != $byte {
150
- return $parser. unexpected_character( ch )
140
+ return $parser. unexpected_character( )
151
141
}
152
142
} ) ;
153
143
@@ -158,7 +148,7 @@ macro_rules! expect {
158
148
$(
159
149
$byte => $then,
160
150
) *
161
- _ => return $parser. unexpected_character( ch )
151
+ _ => return $parser. unexpected_character( )
162
152
}
163
153
164
154
} )
@@ -222,7 +212,7 @@ macro_rules! expect_string {
222
212
break ;
223
213
}
224
214
225
- return $parser. unexpected_character( ch ) ;
215
+ return $parser. unexpected_character( ) ;
226
216
}
227
217
228
218
result
@@ -324,7 +314,7 @@ macro_rules! expect_fraction {
324
314
}
325
315
}
326
316
} ,
327
- _ => return $parser. unexpected_character( ch )
317
+ _ => return $parser. unexpected_character( )
328
318
}
329
319
330
320
loop {
@@ -403,66 +393,28 @@ impl<'a> Parser<'a> {
403
393
self . index = self . index . wrapping_add ( 1 ) ;
404
394
}
405
395
406
- // Figure out the `Position` in the source. This doesn't look like it's
407
- // very fast - it probably isn't, and it doesn't really have to be.
408
- // This method is only called when an unexpected character error occurs.
409
- fn source_position_from_index ( & self , index : usize ) -> Position {
410
- let ( bytes, _) = self . source . split_at ( index-1 ) ;
411
-
412
- Position {
413
- line : bytes. lines ( ) . count ( ) ,
414
- column : bytes. lines ( ) . last ( ) . map_or ( 1 , |line| {
415
- line. chars ( ) . count ( ) + 1
416
- } )
417
- }
418
- }
419
-
420
396
// So we got an unexpected character, now what? Well, figure out where
421
397
// it is, and throw an error!
422
- fn unexpected_character < T : Sized > ( & mut self , byte : u8 ) -> Result < T > {
423
- let pos = self . source_position_from_index ( self . index ) ;
424
-
425
- // If the first byte is non ASCII (> 127), attempt to read the
426
- // codepoint from the following UTF-8 sequence. This can lead
427
- // to a fun scenario where an unexpected character error can
428
- // produce an end of json or UTF-8 failure error first :).
429
- let ch = if byte & 0x80 != 0 {
430
- let mut buf = [ byte, 0 , 0 , 0 ] ;
431
- let mut len = 0usize ;
432
-
433
- if byte & 0xE0 == 0xCE {
434
- // 2 bytes, 11 bits
435
- len = 2 ;
436
- buf[ 1 ] = expect_byte ! ( self ) ;
437
- } else if byte & 0xF0 == 0xE0 {
438
- // 3 bytes, 16 bits
439
- len = 3 ;
440
- buf[ 1 ] = expect_byte ! ( self ) ;
441
- buf[ 2 ] = expect_byte ! ( self ) ;
442
- } else if byte & 0xF8 == 0xF0 {
443
- // 4 bytes, 21 bits
444
- len = 4 ;
445
- buf[ 1 ] = expect_byte ! ( self ) ;
446
- buf[ 2 ] = expect_byte ! ( self ) ;
447
- buf[ 3 ] = expect_byte ! ( self ) ;
448
- }
398
+ fn unexpected_character < T : Sized > ( & mut self ) -> Result < T > {
399
+ let at = self . index - 1 ;
449
400
450
- let slice = try! (
451
- str :: from_utf8 ( & buf [ 0 ..len ] )
452
- . map_err ( |_| Error :: FailedUtf8Parsing )
453
- ) ;
401
+ let ch = self . source [ at.. ]
402
+ . chars ( )
403
+ . next ( )
404
+ . expect ( "Must have a character" ) ;
454
405
455
- slice. chars ( ) . next ( ) . unwrap ( )
456
- } else {
406
+ let ( lineno, col) = self . source [ ..at]
407
+ . lines ( )
408
+ . enumerate ( )
409
+ . last ( )
410
+ . unwrap_or ( ( 0 , "" ) ) ;
457
411
458
- // codepoints < 128 are safe ASCII compatibles
459
- unsafe { char:: from_u32_unchecked ( byte as u32 ) }
460
- } ;
412
+ let colno = col. chars ( ) . count ( ) ;
461
413
462
414
Err ( Error :: UnexpectedCharacter {
463
415
ch : ch,
464
- line : pos . line ,
465
- column : pos . column ,
416
+ line : lineno + 1 ,
417
+ column : colno + 1 ,
466
418
} )
467
419
}
468
420
@@ -473,7 +425,7 @@ impl<'a> Parser<'a> {
473
425
b'0' ... b'9' => ( ch - b'0' ) ,
474
426
b'a' ... b'f' => ( ch + 10 - b'a' ) ,
475
427
b'A' ... b'F' => ( ch + 10 - b'A' ) ,
476
- ch => return self . unexpected_character ( ch ) ,
428
+ _ => return self . unexpected_character ( ) ,
477
429
} as u32 )
478
430
}
479
431
@@ -575,11 +527,11 @@ impl<'a> Parser<'a> {
575
527
b't' => b'\t' ,
576
528
b'r' => b'\r' ,
577
529
b'n' => b'\n' ,
578
- _ => return self . unexpected_character ( escaped )
530
+ _ => return self . unexpected_character ( )
579
531
} ;
580
532
self . buffer . push ( escaped) ;
581
533
} ,
582
- _ => return self . unexpected_character ( ch )
534
+ _ => return self . unexpected_character ( )
583
535
}
584
536
ch = expect_byte ! ( self ) ;
585
537
}
@@ -656,7 +608,7 @@ impl<'a> Parser<'a> {
656
608
657
609
let mut e = match ch {
658
610
b'0' ... b'9' => ( ch - b'0' ) as i16 ,
659
- _ => return self . unexpected_character ( ch ) ,
611
+ _ => return self . unexpected_character ( ) ,
660
612
} ;
661
613
662
614
loop {
@@ -708,7 +660,7 @@ impl<'a> Parser<'a> {
708
660
let mut object = Object :: with_capacity ( 3 ) ;
709
661
710
662
if ch != b'"' {
711
- return self . unexpected_character ( ch )
663
+ return self . unexpected_character ( )
712
664
}
713
665
714
666
object. insert ( expect_string ! ( self ) , JsonValue :: Null ) ;
@@ -733,7 +685,7 @@ impl<'a> Parser<'a> {
733
685
JsonValue :: Number ( - match ch {
734
686
b'0' => allow_number_extensions ! ( self ) ,
735
687
b'1' ... b'9' => expect_number ! ( self , ch) ,
736
- _ => return self . unexpected_character ( ch )
688
+ _ => return self . unexpected_character ( )
737
689
} )
738
690
}
739
691
b't' => {
@@ -748,7 +700,7 @@ impl<'a> Parser<'a> {
748
700
expect_sequence ! ( self , b'u' , b'l' , b'l' ) ;
749
701
JsonValue :: Null
750
702
} ,
751
- _ => return self . unexpected_character ( ch )
703
+ _ => return self . unexpected_character ( )
752
704
} ;
753
705
754
706
' popping: loop {
@@ -776,7 +728,7 @@ impl<'a> Parser<'a> {
776
728
value = JsonValue :: Array ( array) ;
777
729
continue ' popping;
778
730
} ,
779
- _ => return self . unexpected_character ( ch )
731
+ _ => return self . unexpected_character ( )
780
732
}
781
733
} ,
782
734
@@ -802,7 +754,7 @@ impl<'a> Parser<'a> {
802
754
803
755
continue ' popping;
804
756
} ,
805
- _ => return self . unexpected_character ( ch )
757
+ _ => return self . unexpected_character ( )
806
758
}
807
759
} ,
808
760
}
0 commit comments