Skip to content

Commit 538af0c

Browse files
committed
chore: Add tokenizing for the JJU parser and an AST parser to benchmarks
1 parent 08adb12 commit 538af0c

File tree

7 files changed

+873
-83
lines changed

7 files changed

+873
-83
lines changed

benchmarks/jju/extended.js

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,7 @@ function parse (input, options) {
9292
var length = input.length
9393

9494
var lineNumber = 0
95-
9695
var lineStart = 0
97-
9896
var position = 0
9997

10098
var stack = []

benchmarks/jju/original.js

Lines changed: 55 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -97,38 +97,29 @@ function parse (input, options) {
9797

9898
var stack = []
9999

100-
var tokenStart = function () {}
101-
var tokenEnd = function (v) { return v }
102-
103-
/* tokenize({
104-
raw: '...',
105-
type: 'whitespace'|'comment'|'key'|'literal'|'separator'|'newline',
106-
value: 'number'|'string'|'whatever',
107-
path: [...],
108-
})
109-
*/
110-
if (options._tokenize) {
111-
;(function () {
112-
var start = null
113-
tokenStart = function () {
114-
if (start !== null) throw Error('internal error, token overlap')
115-
start = position
116-
}
117-
118-
tokenEnd = function (v, type) {
119-
if (start !== position) {
120-
var hash = {
121-
raw: input.substr(start, position - start),
122-
type: type,
123-
stack: stack.slice(0)
124-
}
125-
if (v !== undefined) hash.value = v
126-
options._tokenize.call(null, hash)
100+
var startToken = function () {}
101+
var endToken = function (v) { return v }
102+
103+
var tokenize = options.tokenize
104+
if (tokenize) {
105+
var tokenStart = null
106+
startToken = function () {
107+
if (tokenStart !== null) throw Error('internal error, token overlap')
108+
tokenStart = position
109+
}
110+
endToken = function (v, type) {
111+
if (tokenStart !== position) {
112+
var hash = {
113+
raw: input.substr(tokenStart, position - tokenStart),
114+
type: type,
115+
stack: stack.slice(0)
127116
}
128-
start = null
129-
return v
117+
if (v !== undefined) hash.value = v
118+
tokenize(hash)
130119
}
131-
})()
120+
tokenStart = null
121+
return v
122+
}
132123
}
133124

134125
function fail (message) {
@@ -165,36 +156,36 @@ function parse (input, options) {
165156

166157
function parseGeneric () {
167158
while (position < length) {
168-
tokenStart()
159+
startToken()
169160
var chr = input[position++]
170161

171162
if (chr === '"' || (chr === '\'' && json5)) {
172-
return tokenEnd(parseString(chr), 'literal')
163+
return endToken(parseString(chr), 'literal')
173164
} else if (chr === '{') {
174-
tokenEnd(undefined, 'separator')
165+
endToken(undefined, 'separator')
175166
return parseObject()
176167
} else if (chr === '[') {
177-
tokenEnd(undefined, 'separator')
168+
endToken(undefined, 'separator')
178169
return parseArray()
179170
} else if (chr === '-' ||
180171
chr === '.' ||
181172
isDecDigit(chr) ||
182173
// + number Infinity NaN
183174
(json5 && (chr === '+' || chr === 'I' || chr === 'N'))
184175
) {
185-
return tokenEnd(parseNumber(), 'literal')
176+
return endToken(parseNumber(), 'literal')
186177
} else if (chr === 'n') {
187178
parseKeyword('null')
188-
return tokenEnd(null, 'literal')
179+
return endToken(null, 'literal')
189180
} else if (chr === 't') {
190181
parseKeyword('true')
191-
return tokenEnd(true, 'literal')
182+
return endToken(true, 'literal')
192183
} else if (chr === 'f') {
193184
parseKeyword('false')
194-
return tokenEnd(false, 'literal')
185+
return endToken(false, 'literal')
195186
} else {
196187
position--
197-
return tokenEnd(undefined)
188+
return endToken(undefined)
198189
}
199190
}
200191
}
@@ -203,20 +194,20 @@ function parse (input, options) {
203194
var result
204195

205196
while (position < length) {
206-
tokenStart()
197+
startToken()
207198
var chr = input[position++]
208199

209200
if (chr === '"' || (chr === '\'' && json5)) {
210-
return tokenEnd(parseString(chr), 'key')
201+
return endToken(parseString(chr), 'key')
211202
} else if (chr === '{') {
212-
tokenEnd(undefined, 'separator')
203+
endToken(undefined, 'separator')
213204
return parseObject()
214205
} else if (chr === '[') {
215-
tokenEnd(undefined, 'separator')
206+
endToken(undefined, 'separator')
216207
return parseArray()
217208
} else if (chr === '.' || isDecDigit(chr)
218209
) {
219-
return tokenEnd(parseNumber(true), 'key')
210+
return endToken(parseNumber(true), 'key')
220211
} else if ((json5 && Uni.isIdentifierStart(chr)) ||
221212
(chr === '\\' && input[position] === 'u')) {
222213
// unicode char or a unicode sequence
@@ -225,30 +216,30 @@ function parse (input, options) {
225216

226217
if (result === undefined) {
227218
position = rollback
228-
return tokenEnd(undefined)
219+
return endToken(undefined)
229220
} else {
230-
return tokenEnd(result, 'key')
221+
return endToken(result, 'key')
231222
}
232223
} else {
233224
position--
234-
return tokenEnd(undefined)
225+
return endToken(undefined)
235226
}
236227
}
237228
}
238229

239230
function skipWhiteSpace () {
240-
tokenStart()
231+
startToken()
241232
while (position < length) {
242233
var chr = input[position++]
243234

244235
if (isLineTerminator(chr)) {
245236
position--
246-
tokenEnd(undefined, 'whitespace')
247-
tokenStart()
237+
endToken(undefined, 'whitespace')
238+
startToken()
248239
position++
249240
newLine(chr)
250-
tokenEnd(undefined, 'newline')
251-
tokenStart()
241+
endToken(undefined, 'newline')
242+
startToken()
252243
} else if (isWhiteSpace(chr)) {
253244
// nothing
254245

@@ -257,18 +248,18 @@ function parse (input, options) {
257248
(input[position] === '/' || input[position] === '*')
258249
) {
259250
position--
260-
tokenEnd(undefined, 'whitespace')
261-
tokenStart()
251+
endToken(undefined, 'whitespace')
252+
startToken()
262253
position++
263254
skipComment(input[position++] === '*')
264-
tokenEnd(undefined, 'comment')
265-
tokenStart()
255+
endToken(undefined, 'comment')
256+
startToken()
266257
} else {
267258
position--
268259
break
269260
}
270261
}
271-
return tokenEnd(undefined, 'whitespace')
262+
return endToken(undefined, 'whitespace')
272263
}
273264

274265
function skipComment (multi) {
@@ -322,9 +313,9 @@ function parse (input, options) {
322313
skipWhiteSpace()
323314
var key = parseKey()
324315
skipWhiteSpace()
325-
tokenStart()
316+
startToken()
326317
var chr = input[position++]
327-
tokenEnd(undefined, 'separator')
318+
endToken(undefined, 'separator')
328319

329320
if (chr === '}' && key === undefined) {
330321
if (!json5 && isNotEmpty) {
@@ -369,9 +360,9 @@ function parse (input, options) {
369360

370361
skipWhiteSpace()
371362

372-
tokenStart()
363+
startToken()
373364
chr = input[position++]
374-
tokenEnd(undefined, 'separator')
365+
endToken(undefined, 'separator')
375366

376367
if (chr === ',') {
377368
continue
@@ -398,9 +389,9 @@ function parse (input, options) {
398389
var item = parseGeneric()
399390
stack.pop()
400391
skipWhiteSpace()
401-
tokenStart()
392+
startToken()
402393
var chr = input[position++]
403-
tokenEnd(undefined, 'separator')
394+
endToken(undefined, 'separator')
404395

405396
if (item !== undefined) {
406397
if (typeof (options.reviver) === 'function') {
@@ -713,7 +704,7 @@ exports.parse = function parseJSON (input, options) {
713704
exports.tokenize = function tokenizeJSON (input, options) {
714705
if (options == null) options = {}
715706

716-
options._tokenize = function (smth) {
707+
options.tokenize = function (smth) {
717708
if (options._addstack) smth.stack.unshift.apply(smth.stack, options._addstack)
718709
tokens.push(smth)
719710
}

0 commit comments

Comments
 (0)