Skip to content

Partial JSON parser #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 35 additions & 35 deletions src/json/JsonDecoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ const isUndefined = (u8: Uint8Array, x: number) =>

const fromCharCode = String.fromCharCode;

const readShortUtf8StrAndUnescape = (reader: Reader): string => {
export const readKey = (reader: Reader): string => {
const buf = reader.uint8;
const len = buf.length;
const points: number[] = [];
Expand Down Expand Up @@ -202,10 +202,8 @@ export class JsonDecoder implements BinaryJsonDecoder {
const uint8 = reader.uint8;
const char = uint8[x];
switch (char) {
case 34: {
// "
if (uint8[x + 1] === 0x64) {
// d
case 34 /* " */: {
if (uint8[x + 1] === 0x64 /* d */) {
const bin = this.tryReadBin();
if (bin) return bin;
if (isUndefined(uint8, x + 2)) {
Expand All @@ -215,18 +213,18 @@ export class JsonDecoder implements BinaryJsonDecoder {
}
return this.readStr();
}
case 91: // [
case 91 /* [ */:
return this.readArr();
case 102: // f
case 102 /* f */:
return this.readFalse();
case 110: // n
case 110 /* n */:
return this.readNull();
case 116: // t
case 116 /* t */:
return this.readTrue();
case 123: // {
case 123 /* { */:
return this.readObj();
default:
if ((char >= 48 && char <= 57) || char === 45) return this.readNum();
if ((char >= 48 /* 0 */ && char <= 57) /* 9 */ || char === 45 /* - */) return this.readNum();
throw new Error('Invalid JSON');
}
}
Expand All @@ -239,10 +237,10 @@ export class JsonDecoder implements BinaryJsonDecoder {
while (true) {
char = uint8[x];
switch (char) {
case 32: // space
case 9: // tab
case 10: // line feed
case 13: // carriage return
case 32 /* <space> */:
case 9 /* <tab> */:
case 10 /* <line feed> */:
case 13 /* <carriage return> */:
x++;
continue;
default:
Expand All @@ -253,27 +251,27 @@ export class JsonDecoder implements BinaryJsonDecoder {
}

public readNull(): null {
if (this.reader.u32() !== 0x6e756c6c) throw new Error('Invalid JSON');
if (this.reader.u32() !== 0x6e756c6c /* null */) throw new Error('Invalid JSON');
return null;
}

public readTrue(): true {
if (this.reader.u32() !== 0x74727565) throw new Error('Invalid JSON');
if (this.reader.u32() !== 0x74727565 /* true */) throw new Error('Invalid JSON');
return true;
}

public readFalse(): false {
const reader = this.reader;
if (reader.u8() !== 0x66 || reader.u32() !== 0x616c7365) throw new Error('Invalid JSON');
if (reader.u8() !== 0x66 /* f */ || reader.u32() !== 0x616c7365 /* alse */) throw new Error('Invalid JSON');
return false;
}

public readBool(): unknown {
const reader = this.reader;
switch (reader.uint8[reader.x]) {
case 102: // f
case 102 /* f */:
return this.readFalse();
case 116: // t
case 116 /* t */:
return this.readTrue();
default:
throw new Error('Invalid JSON');
Expand Down Expand Up @@ -642,42 +640,44 @@ export class JsonDecoder implements BinaryJsonDecoder {

public readArr(): unknown[] {
const reader = this.reader;
if (reader.u8() !== 0x5b) throw new Error('Invalid JSON');
if (reader.u8() !== 0x5b /* [ */) throw new Error('Invalid JSON');
const arr: unknown[] = [];
const uint8 = reader.uint8;
let first = true;
while (true) {
this.skipWhitespace();
const char = uint8[reader.x];
if (char === 0x5d) return reader.x++, arr; // ]
if (char === 0x2c) {
reader.x++;
continue;
} // ,
if (char === 0x5d /* ] */) return reader.x++, arr;
if (char === 0x2c /* , */) reader.x++;
else if (!first) throw new Error('Invalid JSON');
this.skipWhitespace();
arr.push(this.readAny());
first = false;
}
}

public readObj(): PackValue | Record<string, unknown> | unknown {
const reader = this.reader;
if (reader.u8() !== 0x7b) throw new Error('Invalid JSON');
if (reader.u8() !== 0x7b /* { */) throw new Error('Invalid JSON');
const obj: Record<string, unknown> = {};
const uint8 = reader.uint8;
let first = true;
while (true) {
this.skipWhitespace();
let char = uint8[reader.x];
if (char === 0x7d) return reader.x++, obj; // }
if (char === 0x2c) {
reader.x++;
continue;
} // ,
if (char === 0x7d /* } */) return reader.x++, obj;
if (char === 0x2c /* , */) reader.x++;
else if (!first) throw new Error('Invalid JSON');
this.skipWhitespace();
char = uint8[reader.x++];
if (char !== 0x22) throw new Error('Invalid JSON');
const key = readShortUtf8StrAndUnescape(reader);
if (char !== 0x22 /* " */) throw new Error('Invalid JSON');
const key = readKey(reader);
if (key === '__proto__') throw new Error('Invalid JSON');
this.skipWhitespace();
if (reader.u8() !== 0x3a) throw new Error('Invalid JSON');
if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON');
this.skipWhitespace();
obj[key] = this.readAny();
first = false;
}
}
}
103 changes: 103 additions & 0 deletions src/json/JsonDecoderPartial.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import {JsonDecoder, readKey} from './JsonDecoder';
import type {PackValue} from '../types';

export class DecodeFinishError extends Error {
constructor(public readonly value: unknown) {
super('DECODE_FINISH');
}
}

/**
* This class parses JSON which is mostly correct but not necessarily complete
* or with missing parts. It can be used to parse JSON that is being streamed
* in chunks or JSON output of an LLM model.
*
* If the end of a nested JSON value (array, object) is missing, this parser
* will return the initial correct part for that value, which it was able to
* parse, until the point where the JSON is no longer valid.
*
* Examples:
*
* ```js
* // Missing closing brace
* decoder.readAny('[1, 2, 3'); // [1, 2, 3]
*
* // Trailing comma and missing closing brace
* decoder.readAny('[1, 2, '); // [1, 2]
*
* // Corrupt second element and missing closing brace
* decoder.readAny('{"foo": 1, "bar":'); // {"foo": 1}
* ```
*/
export class JsonDecoderPartial extends JsonDecoder {
public readAny(): unknown {
try {
return super.readAny();
} catch (error) {
if (error instanceof DecodeFinishError) return error.value;
throw error;
}
}

public readArr(): unknown[] {
const reader = this.reader;
if (reader.u8() !== 0x5b /* [ */) throw new Error('Invalid JSON');
const arr: unknown[] = [];
const uint8 = reader.uint8;
let first = true;
while (true) {
this.skipWhitespace();
const char = uint8[reader.x];
if (char === 0x5d /* ] */) return reader.x++, arr;
if (char === 0x2c /* , */) reader.x++;
else if (!first) return arr;
this.skipWhitespace();
try {
arr.push(this.readAny());
} catch (error) {
if (error instanceof DecodeFinishError) return arr.push(error.value), arr;
if (error instanceof Error && error.message === 'Invalid JSON') throw new DecodeFinishError(arr);
throw error;
}
first = false;
}
}

public readObj(): PackValue | Record<string, unknown> | unknown {
const reader = this.reader;
if (reader.u8() !== 0x7b /* { */) throw new Error('Invalid JSON');
const obj: Record<string, unknown> = {};
const uint8 = reader.uint8;
while (true) {
this.skipWhitespace();
let char = uint8[reader.x];
if (char === 0x7d /* } */) return reader.x++, obj;
if (char === 0x2c /* , */) {
reader.x++;
continue;
}
try {
char = uint8[reader.x++];
if (char !== 0x22 /* " */) throw new Error('Invalid JSON');
const key = readKey(reader);
if (key === '__proto__') throw new Error('Invalid JSON');
this.skipWhitespace();
if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON');
this.skipWhitespace();
try {
obj[key] = this.readAny();
} catch (error) {
if (error instanceof DecodeFinishError) {
obj[key] = error.value;
return obj;
}
throw error;
}
} catch (error) {
if (error instanceof DecodeFinishError) return obj;
if (error instanceof Error && error.message === 'Invalid JSON') throw new DecodeFinishError(obj);
throw error;
}
}
}
}
26 changes: 26 additions & 0 deletions src/json/__tests__/JsonDecoder.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,19 @@ describe('array', () => {
expect(value).toEqual([1, 2.2, -3.3]);
});

test('simple array', () => {
const data = Buffer.from('[1, 2, 3]', 'utf-8');
decoder.reader.reset(data);
const value = decoder.readAny();
expect(value).toEqual([1, 2, 3]);
});

test('missing comma', () => {
const data = Buffer.from('[1, 2 3]', 'utf-8');
decoder.reader.reset(data);
expect(() => decoder.readAny()).toThrow(new Error('Invalid JSON'));
});

test('nested arrays', () => {
const data = Buffer.from(' \n \r \t [[],\n[ 4,\t5] , [null]] \n \r \t ', 'utf-8');
decoder.reader.reset(data);
Expand Down Expand Up @@ -366,6 +379,19 @@ describe('object', () => {
expect(value).toEqual({foo: 'bar'});
});

test('simple object', () => {
const data = Buffer.from('{"foo": 1, "bar": 2}', 'utf-8');
decoder.reader.reset(data);
const value = decoder.readAny();
expect(value).toEqual({foo: 1, bar: 2});
});

test('missing comma', () => {
const data = Buffer.from('{"foo": 1 "bar": 2}', 'utf-8');
decoder.reader.reset(data);
expect(() => decoder.readAny()).toThrow(new Error('Invalid JSON'));
});

test('nested object', () => {
const data = Buffer.from('{"":{}}', 'utf-8');
decoder.reader.reset(data);
Expand Down
39 changes: 39 additions & 0 deletions src/json/__tests__/JsonDecoderPartial.automated.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import {Writer} from '@jsonjoy.com/util/lib/buffers/Writer';
import {JsonValue} from '../../types';
import {JsonEncoder} from '../JsonEncoder';
import {JsonEncoderStable} from '../JsonEncoderStable';
import {JsonDecoderPartial} from '../JsonDecoderPartial';
import {documents} from '../../__tests__/json-documents';
import {binaryDocuments} from '../../__tests__/binary-documents';

const writer = new Writer(8);
const encoder = new JsonEncoder(writer);
const encoderStable = new JsonEncoderStable(writer);
const decoder = new JsonDecoderPartial();

const assertEncoder = (value: JsonValue) => {
const encoded = encoder.encode(value);
const encoded2 = encoderStable.encode(value);
// const json = Buffer.from(encoded).toString('utf-8');
// console.log('json', json);
const decoded = decoder.decode(encoded);
const decoded2 = decoder.decode(encoded2);
expect(decoded).toEqual(value);
expect(decoded2).toEqual(value);
};

describe('Sample JSON documents', () => {
for (const t of documents) {
(t.only ? test.only : test)(t.name, () => {
assertEncoder(t.json as any);
});
}
});

describe('Sample binary documents', () => {
for (const t of binaryDocuments) {
(t.only ? test.only : test)(t.name, () => {
assertEncoder(t.json as any);
});
}
});
Loading