Skip to content

Commit 3e367e7

Browse files
author
ochafik
committed
json: revert default of additionalProperties to false
1 parent 29a2d4f commit 3e367e7

File tree

6 files changed

+35
-45
lines changed

6 files changed

+35
-45
lines changed

common/json-schema-to-grammar.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ class SchemaConverter {
720720
}
721721
prop_names.push_back(prop_name);
722722
}
723-
if (!(additional_properties.is_boolean() && !additional_properties.get<bool>())) {
723+
if ((additional_properties.is_boolean() && additional_properties.get<bool>()) || additional_properties.is_object()) {
724724
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
725725
std::string value_rule =
726726
additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")

examples/json_schema_to_grammar.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ def add_component(comp_schema, is_required):
602602
else:
603603
add_component(t, is_required=True)
604604

605-
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=[]))
605+
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=None))
606606

607607
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
608608
items = schema.get('items') or schema['prefixItems']
@@ -691,7 +691,7 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st
691691
required_props = [k for k in sorted_props if k in required]
692692
optional_props = [k for k in sorted_props if k not in required]
693693

694-
if additional_properties != False:
694+
if additional_properties is not None and additional_properties != False:
695695
sub_name = f'{name}{"-" if name else ""}additional'
696696
value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
697697
self._add_primitive('value', PRIMITIVE_RULES['value'])

examples/server/public/json-schema-to-grammar.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,7 @@ export class SchemaConverter {
751751
const requiredProps = sortedProps.filter(k => required.has(k));
752752
const optionalProps = sortedProps.filter(k => !required.has(k));
753753

754-
if (additionalProperties !== false) {
754+
if (additionalProperties) {
755755
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
756756
const valueRule =
757757
additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)

grammars/README.md

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ space ::= | " " | "\n" [ \t]{0,20}
182182

183183
Here is also a list of known limitations (contributions welcome):
184184

185+
- `additionalProperties` defaults to `false` (faster grammars + reduces hallucinations)
185186
- Unsupported features are skipped silently. It is currently advised to use the command-line Python converter (see above) to see any warnings, and to inspect the resulting grammar / test it w/ [llama-gbnf-validator](../examples/gbnf-validator/gbnf-validator.cpp).
186187
- Can't mix `properties` w/ `anyOf` / `oneOf` in the same type (https://github.com/ggerganov/llama.cpp/issues/7703)
187188
- [prefixItems](https://json-schema.org/draft/2020-12/json-schema-core#name-prefixitems) is broken (but [items](https://json-schema.org/draft/2020-12/json-schema-core#name-items) works)
@@ -203,10 +204,11 @@ And a non-exhaustive list of other unsupported features that are unlikely to be
203204
### A word about additionalProperties
204205

205206
> [!WARNING]
206-
> By default, `object`s accept [additional properties](https://json-schema.org/understanding-json-schema/reference/object#additionalproperties), which you might not want / not expect, and which will make sampling slower (not just because of the extra tokens, but also generates a slower grammar).
207-
> You can set `"additionalProperties": false` on the schema of any object to ensure only properties listed in `properties` are generated (not needed for non-`object` types, e.g. `array` or `string`).
207+
> The JSON schemas spec states `object`s accept [additional properties](https://json-schema.org/understanding-json-schema/reference/object#additionalproperties) by default.
208+
> Since this is slow and seems prone to hallucinations, we default to no additional properties.
209+
> You can set `"additionalProperties": true` in the the schema of any object to explicitly allow additional properties.
208210
209-
If you're using [Pydantic](https://pydantic.dev/) to generate schemas, you can disable additional properties with the `extra` config on each model class:
211+
If you're using [Pydantic](https://pydantic.dev/) to generate schemas, you can enable additional properties with the `extra` config on each model class:
210212

211213
```python
212214
# pip install pydantic
@@ -215,14 +217,14 @@ from typing import Annotated, List
215217
from pydantic import BaseModel, Extra, Field
216218
class QAPair(BaseModel):
217219
class Config:
218-
extra = 'forbid' # triggers additionalProperties: false in the JSON schema
220+
extra = 'allow' # triggers additionalProperties: true in the JSON schema
219221
question: str
220222
concise_answer: str
221223
justification: str
222224

223225
class Summary(BaseModel):
224226
class Config:
225-
extra = 'forbid'
227+
extra = 'allow'
226228
key_facts: List[Annotated[str, Field(pattern='- .{5,}')]]
227229
question_answers: List[Annotated[List[QAPair], Field(min_items=5)]]
228230

@@ -236,7 +238,7 @@ print(json.dumps(Summary.model_json_schema(), indent=2))
236238
{
237239
"$defs": {
238240
"QAPair": {
239-
"additionalProperties": false,
241+
"additionalProperties": true,
240242
"properties": {
241243
"question": {
242244
"title": "Question",
@@ -260,7 +262,7 @@ print(json.dumps(Summary.model_json_schema(), indent=2))
260262
"type": "object"
261263
}
262264
},
263-
"additionalProperties": false,
265+
"additionalProperties": true,
264266
"properties": {
265267
"key_facts": {
266268
"items": {
@@ -292,30 +294,40 @@ print(json.dumps(Summary.model_json_schema(), indent=2))
292294
```
293295

294296
```
295-
QAPair ::= "{" space QAPair-question-kv "," space QAPair-concise-answer-kv "," space QAPair-justification-kv "}" space
297+
QAPair ::= "{" space QAPair-question-kv "," space QAPair-concise-answer-kv "," space QAPair-justification-kv ( "," space ( QAPair-additional-kv ( "," space QAPair-additional-kv )* ) )? "}" space
298+
QAPair-additional-k ::= ["] ( [c] ([o] ([n] ([c] ([i] ([s] ([e] ([_] ([a] ([n] ([s] ([w] ([e] ([r] char+ | [^"r] char*) | [^"e] char*) | [^"w] char*) | [^"s] char*) | [^"n] char*) | [^"a] char*) | [^"_] char*) | [^"e] char*) | [^"s] char*) | [^"i] char*) | [^"c] char*) | [^"n] char*) | [^"o] char*) | [j] ([u] ([s] ([t] ([i] ([f] ([i] ([c] ([a] ([t] ([i] ([o] ([n] char+ | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"a] char*) | [^"c] char*) | [^"i] char*) | [^"f] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"u] char*) | [q] ([u] ([e] ([s] ([t] ([i] ([o] ([n] char+ | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"e] char*) | [^"u] char*) | [^"cjq] char* )? ["] space
299+
QAPair-additional-kv ::= QAPair-additional-k ":" space value
296300
QAPair-concise-answer-kv ::= "\"concise_answer\"" space ":" space string
297301
QAPair-justification-kv ::= "\"justification\"" space ":" space string
298302
QAPair-question-kv ::= "\"question\"" space ":" space string
303+
additional-k ::= ["] ( [k] ([e] ([y] ([_] ([f] ([a] ([c] ([t] ([s] char+ | [^"s] char*) | [^"t] char*) | [^"c] char*) | [^"a] char*) | [^"f] char*) | [^"_] char*) | [^"y] char*) | [^"e] char*) | [q] ([u] ([e] ([s] ([t] ([i] ([o] ([n] ([_] ([a] ([n] ([s] ([w] ([e] ([r] ([s] char+ | [^"s] char*) | [^"r] char*) | [^"e] char*) | [^"w] char*) | [^"s] char*) | [^"n] char*) | [^"a] char*) | [^"_] char*) | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"e] char*) | [^"u] char*) | [^"kq] char* )? ["] space
304+
additional-kv ::= additional-k ":" space value
305+
array ::= "[" space ( value ("," space value)* )? "]" space
306+
boolean ::= ("true" | "false") space
299307
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
308+
decimal-part ::= [0-9]{1,16}
300309
dot ::= [^\x0A\x0D]
310+
integral-part ::= [0] | [1-9] [0-9]{0,15}
301311
key-facts ::= "[" space (key-facts-item ("," space key-facts-item)*)? "]" space
302312
key-facts-item ::= "\"" "- " key-facts-item-1{5,} "\"" space
303313
key-facts-item-1 ::= dot
304314
key-facts-kv ::= "\"key_facts\"" space ":" space key-facts
315+
null ::= "null" space
316+
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
317+
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
305318
question-answers ::= "[" space (question-answers-item ("," space question-answers-item)*)? "]" space
306319
question-answers-item ::= "[" space question-answers-item-item ("," space question-answers-item-item){4,} "]" space
307320
question-answers-item-item ::= QAPair
308321
question-answers-kv ::= "\"question_answers\"" space ":" space question-answers
309-
root ::= "{" space key-facts-kv "," space question-answers-kv "}" space
322+
root ::= "{" space key-facts-kv "," space question-answers-kv ( "," space ( additional-kv ( "," space additional-kv )* ) )? "}" space
310323
space ::= | " " | "\n" [ \t]{0,20}
311324
string ::= "\"" char* "\"" space
325+
value ::= object | array | string | number | boolean | null
312326
```
313327

314328
</details>
315329

316-
If you're using [Zod](https://zod.dev/), you can make your objects explicitly strict w/ `z.object(...).strict()` or `z.strictObject(...)`.
317-
318-
Note however that [zod-to-json-schema](https://github.com/StefanTerdell/zod-to-json-schema) currently always seems to set `"additionalProperties": false` anyway (even w/ zod schemas on which `nonstrict()` / `passthrough()` was called).
330+
If you're using [Zod](https://zod.dev/), you can make your objects to explicitly allow extra properties w/ `nonstrict()` / `passthrough()` (or explicitly no extra props w/ `z.object(...).strict()` or `z.strictObject(...)`) but note that [zod-to-json-schema](https://github.com/StefanTerdell/zod-to-json-schema) currently always sets `"additionalProperties": false` anyway.
319331

320332
```js
321333
import { z } from 'zod';

tests/test-grammar-integration.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,8 +1096,6 @@ static void test_json_schema() {
10961096
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
10971097
// "By extension, even an empty object is valid"
10981098
R"""({})""",
1099-
// "By default, providing additional properties is valid"
1100-
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
11011099
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
11021100
},
11031101
// Failing strings
@@ -1108,6 +1106,9 @@ static void test_json_schema() {
11081106
R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
11091107
// Reorder properties
11101108
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1109+
// "Additional properties default to false for generation, even though the spec says true.
1110+
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
1111+
11111112
}
11121113
);
11131114

tests/test-json-schema-to-grammar.cpp

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,28 +1120,15 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
11201120
R"""(
11211121
alternative-0 ::= foo
11221122
alternative-1 ::= bar
1123-
array ::= "[" space ( value ("," space value)* )? "]" space
1124-
bar ::= "{" space (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space
1125-
bar-additional-k ::= ["] ( [b] char+ | [^"b] char* )? ["] space
1126-
bar-additional-kv ::= bar-additional-k ":" space value
1123+
bar ::= "{" space (bar-b-kv )? "}" space
11271124
bar-b-kv ::= "\"b\"" space ":" space number
1128-
bar-b-rest ::= ( "," space bar-additional-kv )*
1129-
boolean ::= ("true" | "false") space
1130-
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
11311125
decimal-part ::= [0-9]{1,16}
1132-
foo ::= "{" space (foo-a-kv foo-a-rest | foo-additional-kv ( "," space foo-additional-kv )* )? "}" space
1126+
foo ::= "{" space (foo-a-kv )? "}" space
11331127
foo-a-kv ::= "\"a\"" space ":" space number
1134-
foo-a-rest ::= ( "," space foo-additional-kv )*
1135-
foo-additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
1136-
foo-additional-kv ::= foo-additional-k ":" space value
11371128
integral-part ::= [0] | [1-9] [0-9]{0,15}
1138-
null ::= "null" space
11391129
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
1140-
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
11411130
root ::= alternative-0 | alternative-1
11421131
space ::= | " " | "\n" [ \t]{0,20}
1143-
string ::= "\"" char* "\"" space
1144-
value ::= object | array | string | number | boolean | null
11451132
)"""
11461133
});
11471134

@@ -1177,25 +1164,15 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
11771164
})""",
11781165
R"""(
11791166
a-kv ::= "\"a\"" space ":" space number
1180-
additional-k ::= ["] ( [a] char+ | [b] char+ | [c] char+ | [d] char+ | [^"abcd] char* )? ["] space
1181-
additional-kv ::= additional-k ":" space value
1182-
array ::= "[" space ( value ("," space value)* )? "]" space
11831167
b-kv ::= "\"b\"" space ":" space number
1184-
boolean ::= ("true" | "false") space
11851168
c-kv ::= "\"c\"" space ":" space number
1186-
c-rest ::= ( "," space additional-kv )*
1187-
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
11881169
d-kv ::= "\"d\"" space ":" space number
1189-
d-rest ::= ( "," space c-kv )? c-rest
1170+
d-rest ::= ( "," space c-kv )?
11901171
decimal-part ::= [0-9]{1,16}
11911172
integral-part ::= [0] | [1-9] [0-9]{0,15}
1192-
null ::= "null" space
11931173
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
1194-
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
1195-
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv c-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
1174+
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
11961175
space ::= | " " | "\n" [ \t]{0,20}
1197-
string ::= "\"" char* "\"" space
1198-
value ::= object | array | string | number | boolean | null
11991176
)"""
12001177
});
12011178

0 commit comments

Comments
 (0)