Skip to content

Commit 01f1ed5

Browse files
committed
0.8.6 predictive parsing, no rollbacks on index
1 parent 1bf0881 commit 01f1ed5

File tree

1 file changed

+107
-86
lines changed

1 file changed

+107
-86
lines changed

src/parser.rs

Lines changed: 107 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,22 @@ struct Parser<'a> {
1818
length: usize,
1919
}
2020

21-
macro_rules! next_byte {
22-
($parser:ident || $alt:expr) => {
23-
if $parser.index < $parser.length {
24-
let ch = unsafe { *$parser.byte_ptr.offset($parser.index as isize) };
25-
$parser.index += 1;
26-
ch
27-
} else {
28-
$alt
21+
macro_rules! expect_byte {
22+
($parser:ident) => ({
23+
if $parser.is_eof() {
24+
return Err(JsonError::UnexpectedEndOfJson);
2925
}
30-
};
3126

32-
($parser:ident) => {
33-
next_byte!($parser || return Err(JsonError::UnexpectedEndOfJson))
34-
}
27+
let ch = $parser.read_byte();
28+
$parser.bump();
29+
ch
30+
})
3531
}
3632

3733
macro_rules! sequence {
3834
($parser:ident, $( $ch:pat ),*) => {
3935
$(
40-
match next_byte!($parser) {
36+
match expect_byte!($parser) {
4137
$ch => {},
4238
ch => return $parser.unexpected_character(ch),
4339
}
@@ -48,16 +44,15 @@ macro_rules! sequence {
4844
macro_rules! read_num {
4945
($parser:ident, $num:ident, $then:expr) => {
5046
loop {
51-
let ch = next_byte!($parser || break);
47+
if $parser.is_eof() { break; }
48+
let ch = $parser.read_byte();
5249
match ch {
5350
b'0' ... b'9' => {
51+
$parser.bump();
5452
let $num = ch - b'0';
5553
$then;
5654
},
57-
_ => {
58-
$parser.index -= 1;
59-
break;
60-
}
55+
_ => break
6156
}
6257
}
6358
}
@@ -69,7 +64,7 @@ macro_rules! consume_whitespace {
6964
// whitespace
7065
9 ... 13 | 32 => {
7166
loop {
72-
match next_byte!($parser) {
67+
match expect_byte!($parser) {
7368
9 ... 13 | 32 => {},
7469
ch => { $ch = ch; break }
7570
}
@@ -82,7 +77,7 @@ macro_rules! consume_whitespace {
8277

8378
macro_rules! expect {
8479
($parser:ident, $byte:expr) => ({
85-
let mut ch = next_byte!($parser);
80+
let mut ch = expect_byte!($parser);
8681

8782
consume_whitespace!($parser, ch);
8883

@@ -92,7 +87,7 @@ macro_rules! expect {
9287
});
9388

9489
{$parser:ident $(, $byte:pat => $then:expr )*} => ({
95-
let mut ch = next_byte!($parser);
90+
let mut ch = expect_byte!($parser);
9691

9792
consume_whitespace!($parser, ch);
9893

@@ -136,7 +131,7 @@ macro_rules! expect_string {
136131
let start = $parser.index;
137132

138133
loop {
139-
let ch = next_byte!($parser);
134+
let ch = expect_byte!($parser);
140135
if CHARCODES[ch as usize] == 0 {
141136
continue;
142137
}
@@ -192,37 +187,35 @@ fn make_float(num: u64, e: i32) -> f64 {
192187
macro_rules! expect_number {
193188
($parser:ident, $first:ident) => ({
194189
let mut num = ($first - b'0') as u64;
195-
let mut digits = 0u8;
196190

197191
let result: f64;
198192

199193
// Cap on how many iterations we do while reading to u64
200194
// in order to avoid an overflow.
201195
loop {
202-
if digits == 18 {
196+
if num >= 576460752303423500 {
203197
result = try!($parser.read_big_number(num));
204198
break;
205199
}
206200

207-
digits += 1;
208-
209-
let ch = next_byte!($parser || {
201+
if $parser.is_eof() {
210202
result = num as f64;
211203
break;
212-
});
204+
}
205+
206+
let ch = $parser.read_byte();
213207

214208
match ch {
215209
b'0' ... b'9' => {
210+
$parser.bump();
216211
// Avoid multiplication with bitshifts and addition
217212
num = (num << 1) + (num << 3) + (ch - b'0') as u64;
218213
},
219214
b'.' | b'e' | b'E' => {
220-
$parser.index -= 1;
221215
result = try!($parser.read_number_with_fraction(num, 0));
222216
break;
223217
},
224218
_ => {
225-
$parser.index -= 1;
226219
result = num as f64;
227220
break;
228221
}
@@ -235,7 +228,7 @@ macro_rules! expect_number {
235228

236229
macro_rules! expect_value {
237230
{$parser:ident $(, $byte:pat => $then:expr )*} => ({
238-
let mut ch = next_byte!($parser);
231+
let mut ch = expect_byte!($parser);
239232

240233
consume_whitespace!($parser, ch);
241234

@@ -255,7 +248,7 @@ macro_rules! expect_value {
255248
JsonValue::Number(num)
256249
},
257250
b'-' => {
258-
let ch = next_byte!($parser);
251+
let ch = expect_byte!($parser);
259252
let num = match ch {
260253
b'0' => try!($parser.read_number_with_fraction(0, 0)),
261254
b'1' ... b'9' => expect_number!($parser, ch),
@@ -290,7 +283,22 @@ impl<'a> Parser<'a> {
290283
}
291284
}
292285

293-
pub fn source_position_from_index(&self, index: usize) -> Position {
286+
#[inline(always)]
287+
fn is_eof(&mut self) -> bool {
288+
self.index == self.length
289+
}
290+
291+
#[inline(always)]
292+
fn read_byte(&mut self) -> u8 {
293+
unsafe { *self.byte_ptr.offset(self.index as isize) }
294+
}
295+
296+
#[inline(always)]
297+
fn bump(&mut self) {
298+
self.index += 1;
299+
}
300+
301+
fn source_position_from_index(&self, index: usize) -> Position {
294302
let (bytes, _) = self.source.split_at(index-1);
295303

296304
Position {
@@ -311,18 +319,18 @@ impl<'a> Parser<'a> {
311319
if byte & 0xE0 == 0xCE {
312320
// 2 bytes, 11 bits
313321
len = 2;
314-
buf[1] = next_byte!(self);
322+
buf[1] = expect_byte!(self);
315323
} else if byte & 0xF0 == 0xE0 {
316324
// 3 bytes, 16 bits
317325
len = 3;
318-
buf[1] = next_byte!(self);
319-
buf[2] = next_byte!(self);
326+
buf[1] = expect_byte!(self);
327+
buf[2] = expect_byte!(self);
320328
} else if byte & 0xF8 == 0xF0 {
321329
// 4 bytes, 21 bits
322330
len = 4;
323-
buf[1] = next_byte!(self);
324-
buf[2] = next_byte!(self);
325-
buf[3] = next_byte!(self);
331+
buf[1] = expect_byte!(self);
332+
buf[2] = expect_byte!(self);
333+
buf[3] = expect_byte!(self);
326334
}
327335

328336
let slice = try!(
@@ -345,7 +353,7 @@ impl<'a> Parser<'a> {
345353
}
346354

347355
fn read_hexdec_digit(&mut self) -> JsonResult<u32> {
348-
let ch = next_byte!(self);
356+
let ch = expect_byte!(self);
349357
Ok(match ch {
350358
b'0' ... b'9' => (ch - b'0'),
351359
b'a' ... b'f' => (ch + 10 - b'a'),
@@ -418,17 +426,17 @@ impl<'a> Parser<'a> {
418426
loop {
419427
if CHARCODES[ch as usize] == 0 {
420428
buffer.push(ch);
421-
ch = next_byte!(self);
429+
ch = expect_byte!(self);
422430
continue;
423431
}
424432
match ch {
425433
b'"' => break,
426434
b'\\' => {
427-
let escaped = next_byte!(self);
435+
let escaped = expect_byte!(self);
428436
let escaped = match escaped {
429437
b'u' => {
430438
try!(self.read_codepoint(&mut buffer));
431-
ch = next_byte!(self);
439+
ch = expect_byte!(self);
432440
continue;
433441
},
434442
b'"' |
@@ -445,7 +453,7 @@ impl<'a> Parser<'a> {
445453
},
446454
_ => return self.unexpected_character(ch)
447455
}
448-
ch = next_byte!(self);
456+
ch = expect_byte!(self);
449457
}
450458

451459
// Since the original source is already valid UTF-8, and `\`
@@ -459,64 +467,75 @@ impl<'a> Parser<'a> {
459467

460468
let mut e = 0i32;
461469
loop {
462-
match next_byte!(self || break) {
463-
b'0' ... b'9' => e += 1,
464-
_ => {
465-
self.index -= 1;
466-
break;
467-
}
470+
if self.is_eof() {
471+
return Ok(make_float(num, e));
472+
}
473+
match self.read_byte() {
474+
b'0' ... b'9' => {
475+
self.bump();
476+
e += 1;
477+
},
478+
_ => break
468479
}
469480
}
470481

471482
self.read_number_with_fraction(num, e)
472483
}
473484

474485
fn read_number_with_fraction(&mut self, mut num: u64, mut e: i32) -> JsonResult<f64> {
475-
if next_byte!(self || return Ok(make_float(num, e))) == b'.' {
486+
if self.is_eof() {
487+
return Ok(make_float(num, e));
488+
}
489+
490+
let mut ch = self.read_byte();
491+
492+
if ch == b'.' {
493+
self.bump();
494+
476495
loop {
477-
let ch = next_byte!(self || break);
496+
if self.is_eof() {
497+
return Ok(make_float(num, e));
498+
}
499+
ch = self.read_byte();
478500

479501
match ch {
480502
b'0' ... b'9' => {
503+
self.bump();
481504
if num < MAX_FLOAT_PRECISION {
482-
num = num * 10 + (ch - b'0') as u64;
505+
num = (num << 3) + (num << 1) + (ch - b'0') as u64;
483506
e -= 1;
484507
}
485508
},
486-
_ => {
487-
self.index -= 1;
488-
break;
489-
}
509+
_ => break
490510
}
491511
}
492-
} else {
493-
self.index -= 1;
494512
}
495513

496-
match next_byte!(self || return Ok(make_float(num, e))) {
497-
b'e' | b'E' => {
498-
let sign = match next_byte!(self) {
499-
b'-' => -1,
500-
b'+' => 1,
501-
_ => {
502-
self.index -= 1;
503-
1
504-
},
505-
};
514+
if ch == b'e' || ch == b'E' {
515+
self.bump();
516+
ch = expect_byte!(self);
517+
let sign = match ch {
518+
b'-' => {
519+
ch = expect_byte!(self);
520+
-1
521+
},
522+
b'+' => {
523+
ch = expect_byte!(self);
524+
1
525+
},
526+
_ => 1
527+
};
506528

507-
let num = make_float(num, e);
529+
let num = make_float(num, e);
508530

509-
let ch = next_byte!(self);
510-
let mut e = match ch {
511-
b'0' ... b'9' => (ch - b'0') as i32,
512-
_ => return self.unexpected_character(ch),
513-
};
531+
let mut e = match ch {
532+
b'0' ... b'9' => (ch - b'0') as i32,
533+
_ => return self.unexpected_character(ch),
534+
};
514535

515-
read_num!(self, digit, e = (e << 3) + (e << 1) + digit as i32);
536+
read_num!(self, digit, e = (e << 3) + (e << 1) + digit as i32);
516537

517-
return Ok(num * exponent_to_power(e * sign));
518-
},
519-
_ => self.index -= 1
538+
return Ok(num * exponent_to_power(e * sign));
520539
}
521540

522541
Ok(make_float(num, e))
@@ -571,15 +590,17 @@ impl<'a> Parser<'a> {
571590
}
572591

573592
fn ensure_end(&mut self) -> JsonResult<()> {
574-
let mut ch = next_byte!(self || return Ok(()));
575-
loop {
576-
match ch {
577-
// whitespace
578-
9 ... 13 | 32 => {},
579-
_ => return self.unexpected_character(ch)
593+
while !self.is_eof() {
594+
match self.read_byte() {
595+
9 ... 13 | 32 => self.bump(),
596+
ch => {
597+
self.bump();
598+
return self.unexpected_character(ch);
599+
}
580600
}
581-
ch = next_byte!(self || return Ok(()));
582601
}
602+
603+
Ok(())
583604
}
584605

585606
fn value(&mut self) -> JsonResult<JsonValue> {

0 commit comments

Comments
 (0)