From 12a061c8ebab92c1a606a0f55d5ab1dc7fd96a34 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 9 Jun 2024 21:03:52 +0100 Subject: [PATCH 01/14] json: default additionalProperty to true --- common/json-schema-to-grammar.cpp | 2 +- examples/json_schema_to_grammar.py | 5 ++++- .../server/public/json-schema-to-grammar.mjs | 6 +++++- tests/test-json-schema-to-grammar.cpp | 20 +++++++++++++++++-- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 737bae27c7206..58f3c2ae908d8 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -625,7 +625,7 @@ class SchemaConverter { return _add_rule(rule_name, _build_object_rule( properties, required, name, - schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); + schema.contains("additionalProperties") ? schema["additionalProperties"] : json::object())); } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 7d889c3fe1287..15631d53c26bd 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -372,7 +372,10 @@ def visit(self, schema, name): ('additionalProperties' in schema and schema['additionalProperties'] is not True)): required = set(schema.get('required', [])) properties = list(schema.get('properties', {}).items()) - return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties'))) + additional_properties = schema.get('additionalProperties', True) + if additional_properties is None: + additional_properties = True + return self._add_rule(rule_name, self._build_object_rule(properties, required, name, additional_properties)) elif schema_type in (None, 'object') and 'allOf' in schema: required = set() diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index cef11eab83a46..ded37c9813a55 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -374,7 +374,11 @@ export class SchemaConverter { ('additionalProperties' in schema && schema.additionalProperties !== true))) { const required = new Set(schema.required || []); const properties = Object.entries(schema.properties ?? {}); - return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties)); + let additionalProperties = schema.additionalProperties; + if (additionalProperties === undefined) { + additionalProperties = true; + } + return this._addRule(ruleName, this._buildObjectRule(properties, required, name, additionalProperties)); } else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) { const required = new Set(); const properties = []; diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 052c0807310ce..780e1932aa273 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -751,15 +751,31 @@ static void test_all(const std::string & lang, std::function Date: Sun, 9 Jun 2024 22:38:58 +0100 Subject: [PATCH 02/14] json: don't force additional props after normal properties! --- common/json-schema-to-grammar.cpp | 12 +++------ examples/json_schema_to_grammar.py | 12 +++------ .../server/public/json-schema-to-grammar.mjs | 12 +++------ tests/test-json-schema-to-grammar.cpp | 27 +++++++------------ 4 files changed, 22 insertions(+), 41 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 58f3c2ae908d8..716801b5c6740 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -452,15 +452,11 @@ class SchemaConverter { } std::string k = ks[0]; std::string kv_rule_name = prop_kv_rule_names[k]; - if (k == "*") { - res = _add_rule( - name + (name.empty() ? "" : "-") + "additional-kvs", - kv_rule_name + " ( \",\" space " + kv_rule_name + " )*" - ); - } else if (first_is_optional) { - res = "( \",\" space " + kv_rule_name + " )?"; + std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; + if (first_is_optional) { + res = comma_ref + (k == "*" ? "*" : "?"); } else { - res = kv_rule_name; + res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : ""); } if (ks.size() > 1) { res += " " + _add_rule( diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 15631d53c26bd..65ce2d364f334 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -489,15 +489,11 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st def get_recursive_refs(ks, first_is_optional): [k, *rest] = ks kv_rule_name = prop_kv_rule_names[k] - if k == '*': - res = self._add_rule( - f'{name}{"-" if name else ""}additional-kvs', - f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*' - ) - elif first_is_optional: - res = f'( "," space {kv_rule_name} )?' + comma_ref = f'( "," space {kv_rule_name} )' + if first_is_optional: + res = comma_ref + ('*' if k == '*' else '?') else: - res = kv_rule_name + res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '') if len(rest) > 0: res += ' ' + self._add_rule( f'{name}{"-" if name else ""}{k}-rest', diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index ded37c9813a55..842659bf8b9bc 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -506,15 +506,11 @@ export class SchemaConverter { const [k, ...rest] = ks; const kvRuleName = propKvRuleNames[k]; let res; - if (k === '*') { - res = this._addRule( - `${name ?? ''}${name ? '-' : ''}additional-kvs`, - `${kvRuleName} ( "," space ` + kvRuleName + ` )*` - ) - } else if (firstIsOptional) { - res = `( "," space ${kvRuleName} )?`; + const commaRef = `( "," space ${kvRuleName} )`; + if (firstIsOptional) { + res = commaRef + (k === '*' ? '*' : '?'); } else { - res = kvRuleName; + res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : ''); } if (rest.length > 0) { res += ' ' + this._addRule( diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 780e1932aa273..478a2e2e9f8f4 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -552,13 +552,12 @@ static void test_all(const std::string & lang, std::function Date: Sun, 9 Jun 2024 22:45:43 +0100 Subject: [PATCH 03/14] json: allow space after enum/const --- common/json-schema-to-grammar.cpp | 4 ++-- examples/json_schema_to_grammar.py | 4 ++-- examples/server/public/json-schema-to-grammar.mjs | 4 ++-- tests/test-json-schema-to-grammar.cpp | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 716801b5c6740..5b6e4f327266a 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -594,13 +594,13 @@ class SchemaConverter { } return _add_rule(rule_name, _generate_union_rule(name, schema_types)); } else if (schema.contains("const")) { - return _add_rule(rule_name, _generate_constant_rule(schema["const"])); + return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); } else if (schema.contains("enum")) { std::vector enum_values; for (const auto & v : schema["enum"]) { enum_values.push_back(_generate_constant_rule(v)); } - return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | ")); + return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space"); } else if ((schema_type.is_null() || schema_type == "object") && (schema.contains("properties") || (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 65ce2d364f334..f5c2b3e6b8a7d 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -361,10 +361,10 @@ def visit(self, schema, name): return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type])) elif 'const' in schema: - return self._add_rule(rule_name, self._generate_constant_rule(schema['const'])) + return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space') elif 'enum' in schema: - rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space' return self._add_rule(rule_name, rule) elif schema_type in (None, 'object') and \ diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 842659bf8b9bc..d08db261d5bd3 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -365,9 +365,9 @@ export class SchemaConverter { } else if (Array.isArray(schemaType)) { return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t })))); } else if ('const' in schema) { - return this._addRule(ruleName, this._generateConstantRule(schema.const)); + return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space'); } else if ('enum' in schema) { - const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | '); + const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space'; return this._addRule(ruleName, rule); } else if ((schemaType === undefined || schemaType === 'object') && ('properties' in schema || diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 478a2e2e9f8f4..3b1c0758956e3 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -247,7 +247,7 @@ static void test_all(const std::string & lang, std::function Date: Sun, 9 Jun 2024 23:14:18 +0100 Subject: [PATCH 04/14] json: update pydantic example to set additionalProperties: false --- examples/json-schema-pydantic-example.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/json-schema-pydantic-example.py b/examples/json-schema-pydantic-example.py index 69ebfd4093824..ca43278c75b51 100644 --- a/examples/json-schema-pydantic-example.py +++ b/examples/json-schema-pydantic-example.py @@ -3,7 +3,7 @@ #! pip install pydantic #! python json-schema-pydantic-example.py -from pydantic import BaseModel, TypeAdapter +from pydantic import BaseModel, Extra, TypeAdapter from annotated_types import MinLen from typing import Annotated, List, Optional import json, requests @@ -50,11 +50,15 @@ def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1 if __name__ == '__main__': class QAPair(BaseModel): + class Config: + extra = 'forbid' # triggers additionalProperties: false in the JSON schema question: str concise_answer: str justification: str class PyramidalSummary(BaseModel): + class Config: + extra = 'forbid' # triggers additionalProperties: false in the JSON schema title: str summary: str question_answers: Annotated[List[QAPair], MinLen(2)] From adca9af2f6de293200d7194e674dae807b2c0843 Mon Sep 17 00:00:00 2001 From: ochafik Date: Tue, 11 Jun 2024 00:16:12 +0100 Subject: [PATCH 05/14] json: prevent additional props to redefine a typed prop --- common/json-schema-to-grammar.cpp | 65 ++++++++++++++++++++++++++- tests/test-json-schema-to-grammar.cpp | 33 +++++++------- 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 5b6e4f327266a..13bf409d45819 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -160,6 +160,63 @@ static std::string format_literal(const std::string & literal) { return "\"" + escaped + "\""; } +/* + Returns a rule that matches a JSON string that is none of the provided strings + + not_strings({"and", "also"}) + -> ["] ( [a] ([l] ([s] ([^"o]) | [^"s]) | [n] ([^"d]) | [^"ln]) | [^"a] ) char* ["] +*/ +std::string not_strings(const std::vector & strings) { + + struct TrieNode { + std::map children; + bool is_end_of_string; + + void insert(const std::string & string) { + auto node = this; + for (char c : string) { + node = &node->children[c]; + } + node->is_end_of_string = true; + } + }; + + TrieNode trie; + for (const auto & s : strings) { + trie.insert(s); + } + + std::ostringstream out; + out << "[\"] ( "; + std::function visit = [&](const TrieNode & node) { + std::ostringstream rejects; + auto first = true; + for (const auto & kv : node.children) { + rejects << kv.first; + if (kv.second.is_end_of_string) { + continue; + } + if (first) { + first = false; + } else { + out << " | "; + } + out << "[" << kv.first << "] ("; + visit(kv.second); + out << ")"; + } + if (!node.children.empty()) { + if (!first) { + out << " | "; + } + out << "[^\"" << rejects.str() << "]"; + } + }; + visit(trie); + + out << " ) char* [\"]"; + return out.str(); +} class SchemaConverter { private: @@ -408,6 +465,7 @@ class SchemaConverter { std::vector required_props; std::vector optional_props; std::unordered_map prop_kv_rule_names; + std::vector prop_names; for (const auto & kv : properties) { const auto &prop_name = kv.first; const auto &prop_schema = kv.second; @@ -422,11 +480,16 @@ class SchemaConverter { } else { optional_props.push_back(prop_name); } + prop_names.push_back(prop_name); } if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value"); - std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule); + + auto key_rule = + prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string")) + : _add_rule(sub_name + "-k", not_strings(prop_names)); + std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); prop_kv_rule_names["*"] = kv_rule; optional_props.push_back("*"); } diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 3b1c0758956e3..8858acd7e831a 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -634,7 +634,8 @@ static void test_all(const std::string & lang, std::function Date: Tue, 11 Jun 2024 02:54:07 +0100 Subject: [PATCH 06/14] port not_strings to python, add trailing space --- common/json-schema-to-grammar.cpp | 2 +- examples/json_schema_to_grammar.py | 46 ++++++++++++++++++++++++++- tests/test-json-schema-to-grammar.cpp | 10 +++--- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 4d2c36d8836af..fe41c7295f5cc 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -214,7 +214,7 @@ std::string not_strings(const std::vector & strings) { }; visit(trie); - out << " ) char* [\"]"; + out << " ) char* [\"] space"; return out.str(); } diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 67ae921c0a0f0..252d63fd230f6 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -71,6 +71,47 @@ def __init__(self, content: str, deps: list = None): ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?') +def not_strings(strings): + class TrieNode: + def __init__(self): + self.children = {} + self.is_end_of_string = False + + def insert(self, string): + node = self + for c in string: + node = node.children.setdefault(c, TrieNode()) + node.is_end_of_string = True + + trie = TrieNode() + for s in strings: + trie.insert(s) + + out = ['["] ( '] + + def visit(node): + rejects = [] + first = True + for c, child in node.children.items(): + rejects.append(c) + if child.is_end_of_string: + continue + if first: + first = False + else: + out.append(' | ') + out.append(f'[{c}] (') + visit(child) + out.append(')') + if node.children: + if not first: + out.append(' | ') + out.append(f'[^"{"".join(rejects)}]') + visit(trie) + + out.append(' ) char* ["] space') + return ''.join(out) + class SchemaConverter: def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern): self._prop_order = prop_order @@ -471,9 +512,12 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st if additional_properties == True or isinstance(additional_properties, dict): sub_name = f'{name}{"-" if name else ""}additional' value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') + key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ + else self._add_rule(f'{sub_name}-k', not_strings(sorted_props)) + prop_kv_rule_names["*"] = self._add_rule( f'{sub_name}-kv', - self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}' + f'{key_rule} ":" space {value_rule}' ) optional_props.append("*") diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 9fa704c5e8903..2da6b88e9d01a 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -634,7 +634,7 @@ static void test_all(const std::string & lang, std::function Date: Tue, 11 Jun 2024 04:21:06 +0100 Subject: [PATCH 07/14] fix not_strings & port to js+py --- common/json-schema-to-grammar.cpp | 125 +++++++++--------- examples/json_schema_to_grammar.py | 90 +++++++------ .../server/public/json-schema-to-grammar.mjs | 64 ++++++++- tests/test-json-schema-to-grammar.cpp | 12 +- 4 files changed, 182 insertions(+), 109 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index fe41c7295f5cc..dfcbd58edc831 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -160,64 +160,6 @@ static std::string format_literal(const std::string & literal) { return "\"" + escaped + "\""; } -/* - Returns a rule that matches a JSON string that is none of the provided strings - - not_strings({"and", "also"}) - -> ["] ( [a] ([l] ([s] ([^"o]) | [^"s]) | [n] ([^"d]) | [^"ln]) | [^"a] ) char* ["] -*/ -std::string not_strings(const std::vector & strings) { - - struct TrieNode { - std::map children; - bool is_end_of_string; - - void insert(const std::string & string) { - auto node = this; - for (char c : string) { - node = &node->children[c]; - } - node->is_end_of_string = true; - } - }; - - TrieNode trie; - for (const auto & s : strings) { - trie.insert(s); - } - - std::ostringstream out; - out << "[\"] ( "; - std::function visit = [&](const TrieNode & node) { - std::ostringstream rejects; - auto first = true; - for (const auto & kv : node.children) { - rejects << kv.first; - if (kv.second.is_end_of_string) { - continue; - } - if (first) { - first = false; - } else { - out << " | "; - } - out << "[" << kv.first << "] ("; - visit(kv.second); - out << ")"; - } - if (!node.children.empty()) { - if (!first) { - out << " | "; - } - out << "[^\"" << rejects.str() << "]"; - } - }; - visit(trie); - - out << " ) char* [\"] space"; - return out.str(); -} - class SchemaConverter { private: std::function _fetch_json; @@ -445,6 +387,67 @@ class SchemaConverter { return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space"); } + /* + Returns a rule that matches a JSON string that is none of the provided strings + + not_strings({"and", "also"}) + -> ["] ( [a] ([l] ([s] ([^"o]) | [^"s]) | [n] ([^"d]) | [^"ln]) | [^"a] ) char* ["] + */ + std::string _not_strings(const std::vector & strings) { + + struct TrieNode { + std::map children; + bool is_end_of_string; + + void insert(const std::string & string) { + auto node = this; + for (char c : string) { + node = &node->children[c]; + } + node->is_end_of_string = true; + } + }; + + TrieNode trie; + for (const auto & s : strings) { + trie.insert(s); + } + + std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); + std::ostringstream out; + out << "[\"] ( "; + std::function visit = [&](const TrieNode & node) { + std::ostringstream rejects; + auto first = true; + for (const auto & kv : node.children) { + rejects << kv.first; + if (first) { + first = false; + } else { + out << " | "; + } + out << "[" << kv.first << "]"; + if (kv.second.is_end_of_string) { + out << " " << char_rule << "+"; + } else { + out << " ("; + visit(kv.second); + out << ")"; + } + } + if (!node.children.empty()) { + if (!first) { + out << " | "; + } + out << "[^\"" << rejects.str() << "] " << char_rule << "*"; + } + }; + visit(trie); + + out << " )? [\"] space"; + return out.str(); + } + std::string _resolve_ref(const std::string & ref) { std::string ref_name = ref.substr(ref.find_last_of('/') + 1); if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { @@ -484,11 +487,13 @@ class SchemaConverter { } if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; - std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value"); + std::string value_rule = + additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") + : _add_primitive("value", PRIMITIVE_RULES.at("value")); auto key_rule = prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string")) - : _add_rule(sub_name + "-k", not_strings(prop_names)); + : _add_rule(sub_name + "-k", _not_strings(prop_names)); std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); prop_kv_rule_names["*"] = kv_rule; optional_props.push_back("*"); diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 252d63fd230f6..cb255a6d02c52 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -71,47 +71,6 @@ def __init__(self, content: str, deps: list = None): ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?') -def not_strings(strings): - class TrieNode: - def __init__(self): - self.children = {} - self.is_end_of_string = False - - def insert(self, string): - node = self - for c in string: - node = node.children.setdefault(c, TrieNode()) - node.is_end_of_string = True - - trie = TrieNode() - for s in strings: - trie.insert(s) - - out = ['["] ( '] - - def visit(node): - rejects = [] - first = True - for c, child in node.children.items(): - rejects.append(c) - if child.is_end_of_string: - continue - if first: - first = False - else: - out.append(' | ') - out.append(f'[{c}] (') - visit(child) - out.append(')') - if node.children: - if not first: - out.append(' | ') - out.append(f'[^"{"".join(rejects)}]') - visit(trie) - - out.append(' ) char* ["] space') - return ''.join(out) - class SchemaConverter: def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern): self._prop_order = prop_order @@ -153,6 +112,51 @@ def recurse(i: int): return ''.join(('(', *recurse(0), ')')) + def _not_strings(self, strings): + class TrieNode: + def __init__(self): + self.children = {} + self.is_end_of_string = False + + def insert(self, string): + node = self + for c in string: + node = node.children.setdefault(c, TrieNode()) + node.is_end_of_string = True + + trie = TrieNode() + for s in strings: + trie.insert(s) + + char_rule = self._add_primitive('char', PRIMITIVE_RULES['char']) + out = ['["] ( '] + + def visit(node): + rejects = [] + first = True + for c in sorted(node.children.keys()): + child = node.children[c] + rejects.append(c) + if first: + first = False + else: + out.append(' | ') + out.append(f'[{c}]') + if (child.is_end_of_string): + out.append(f' {char_rule}+') + else: + out.append(f' (') + visit(child) + out.append(')') + if node.children: + if not first: + out.append(' | ') + out.append(f'[^"{"".join(rejects)}] {char_rule}*') + visit(trie) + + out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space') + return ''.join(out) + def _add_rule(self, name, rule): esc_name = INVALID_RULE_CHARS_RE.sub('-', name) if esc_name not in self._rules or self._rules[esc_name] == rule: @@ -513,8 +517,8 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st sub_name = f'{name}{"-" if name else ""}additional' value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ - else self._add_rule(f'{sub_name}-k', not_strings(sorted_props)) - + else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props)) + prop_kv_rule_names["*"] = self._add_rule( f'{sub_name}-kv', f'{key_rule} ":" space {value_rule}' diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index ff763a1e0bed5..96e4daae290f8 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -337,6 +337,63 @@ export class SchemaConverter { return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space") } + _notStrings(strings) { + class TrieNode { + constructor() { + this.children = {}; + this.isEndOfString = false; + } + + insert(str) { + let node = this; + for (const c of str) { + node = node.children[c] = node.children[c] || new TrieNode(); + } + node.isEndOfString = true; + } + } + + const trie = new TrieNode(); + for (const s of strings) { + trie.insert(s); + } + + const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']); + const out = ['["] ( ']; + + const visit = (node) => { + const rejects = []; + let first = true; + for (const c of Object.keys(node.children).sort()) { + const child = node.children[c]; + rejects.push(c); + if (!first) { + out.push(' | '); + } + out.push(`[${c}]`); + if (child.isEndOfString) { + out.push(` ${charRuleName}+`); + } else { + out.push(' ('); + visit(child); + out.push(')'); + } + first = false; + } + if (Object.keys(node.children).length > 0) { + if (!first) { + out.push(' | '); + } + out.push(`[^"${rejects.join('')}] ${charRuleName}*`); + } + }; + + visit(trie); + + out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`); + return out.join(''); + } + _resolveRef(ref) { let refName = ref.split('/').pop(); if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) { @@ -487,9 +544,14 @@ export class SchemaConverter { if (typeof additionalProperties === 'object' || additionalProperties === true) { const subName = `${name ?? ''}${name ? '-' : ''}additional`; const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); + + const key_rule = + sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string']) + : this._addRule(`${subName}-k`, this._notStrings(sortedProps)); + propKvRuleNames['*'] = this._addRule( `${subName}-kv`, - `${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`); + `${key_rule} ":" space ${valueRule}`); optionalProps.push('*'); } diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 2da6b88e9d01a..20a7227689015 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -634,7 +634,7 @@ static void test_all(const std::string & lang, std::function Date: Tue, 11 Jun 2024 04:41:56 +0100 Subject: [PATCH 08/14] Update json-schema-to-grammar.cpp --- common/json-schema-to-grammar.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index dfcbd58edc831..ebcd46c16b651 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -390,8 +390,10 @@ class SchemaConverter { /* Returns a rule that matches a JSON string that is none of the provided strings + not_strings({"a"}) + -> ["] ( [a] char+ | [^"a] char* )? ["] space not_strings({"and", "also"}) - -> ["] ( [a] ([l] ([s] ([^"o]) | [^"s]) | [n] ([^"d]) | [^"ln]) | [^"a] ) char* ["] + -> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space */ std::string _not_strings(const std::vector & strings) { From 0e48ea8ec2ee4435745bf58f5a6c948257fd4356 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Tue, 11 Jun 2024 22:48:33 +0100 Subject: [PATCH 09/14] fix _not_strings for substring overlaps --- common/json-schema-to-grammar.cpp | 12 ++- examples/json_schema_to_grammar.py | 6 +- .../server/public/json-schema-to-grammar.mjs | 11 +-- tests/test-json-schema-to-grammar.cpp | 76 +++++++++++++++++++ 4 files changed, 93 insertions(+), 12 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index ebcd46c16b651..275bdb97cf808 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -429,12 +429,12 @@ class SchemaConverter { out << " | "; } out << "[" << kv.first << "]"; - if (kv.second.is_end_of_string) { - out << " " << char_rule << "+"; - } else { + if (!kv.second.children.empty()) { out << " ("; visit(kv.second); out << ")"; + } else if (kv.second.is_end_of_string) { + out << " " << char_rule << "+"; } } if (!node.children.empty()) { @@ -446,7 +446,11 @@ class SchemaConverter { }; visit(trie); - out << " )? [\"] space"; + out << " )"; + if (!trie.is_end_of_string) { + out << "?"; + } + out << " [\"] space"; return out.str(); } diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index cb255a6d02c52..6c2e0992a8d6a 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -142,12 +142,12 @@ def visit(node): else: out.append(' | ') out.append(f'[{c}]') - if (child.is_end_of_string): - out.append(f' {char_rule}+') - else: + if child.children: out.append(f' (') visit(child) out.append(')') + elif child.is_end_of_string: + out.append(f' {char_rule}+') if node.children: if not first: out.append(' | ') diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 96e4daae290f8..3a7b6c86affdc 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -367,18 +367,19 @@ export class SchemaConverter { for (const c of Object.keys(node.children).sort()) { const child = node.children[c]; rejects.push(c); - if (!first) { + if (first) { + first = false; + } else { out.push(' | '); } out.push(`[${c}]`); - if (child.isEndOfString) { - out.push(` ${charRuleName}+`); - } else { + if (Object.keys(child.children).length > 0) { out.push(' ('); visit(child); out.push(')'); + } else if (child.isEndOfString) { + out.push(` ${charRuleName}+`); } - first = false; } if (Object.keys(node.children).length > 0) { if (!first) { diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 20a7227689015..a332372055390 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -697,6 +697,82 @@ static void test_all(const std::string & lang, std::function Date: Sat, 22 Jun 2024 20:52:02 +0100 Subject: [PATCH 10/14] json: fix additionalProperties default, uncomment tests --- common/json-schema-to-grammar.cpp | 6 ++++-- examples/json_schema_to_grammar.py | 14 ++++++------- .../server/public/json-schema-to-grammar.mjs | 14 ++++++------- tests/test-grammar-integration.cpp | 17 +--------------- tests/test-json-schema-to-grammar.cpp | 20 +++++++++++++------ 5 files changed, 31 insertions(+), 40 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 275bdb97cf808..0bc4d2229e391 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -401,6 +401,8 @@ class SchemaConverter { std::map children; bool is_end_of_string; + TrieNode() : is_end_of_string(false) {} + void insert(const std::string & string) { auto node = this; for (char c : string) { @@ -491,7 +493,7 @@ class SchemaConverter { } prop_names.push_back(prop_name); } - if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { + if (additional_properties.is_null() || additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; std::string value_rule = additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") @@ -695,7 +697,7 @@ class SchemaConverter { return _add_rule(rule_name, _build_object_rule( properties, required, name, - schema.contains("additionalProperties") ? schema["additionalProperties"] : json::object())); + schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 2f8f5ae9cd8de..9f0bccfcf7478 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -4,7 +4,7 @@ import json import re import sys -from typing import Any, Dict, List, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union def _build_repetition(item_rule, min_items, max_items, separator_rule=None): @@ -416,10 +416,7 @@ def visit(self, schema, name): ('additionalProperties' in schema and schema['additionalProperties'] is not True)): required = set(schema.get('required', [])) properties = list(schema.get('properties', {}).items()) - additional_properties = schema.get('additionalProperties', True) - if additional_properties is None: - additional_properties = True - return self._add_rule(rule_name, self._build_object_rule(properties, required, name, additional_properties)) + return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties'))) elif schema_type in (None, 'object') and 'allOf' in schema: required = set() @@ -498,7 +495,7 @@ def _add_primitive(self, name: str, rule: BuiltinRule): self._add_primitive(dep, dep_rule) return n - def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]): + def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]): prop_order = self._prop_order # sort by position in prop_order (if specified) then by original order sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))] @@ -513,9 +510,10 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st required_props = [k for k in sorted_props if k in required] optional_props = [k for k in sorted_props if k not in required] - if additional_properties == True or isinstance(additional_properties, dict): + if additional_properties != False: sub_name = f'{name}{"-" if name else ""}additional' - value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') + value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \ + self._add_primitive('value', PRIMITIVE_RULES['value']) key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props)) diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 3a7b6c86affdc..9bf32631bca6e 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -432,11 +432,7 @@ export class SchemaConverter { ('additionalProperties' in schema && schema.additionalProperties !== true))) { const required = new Set(schema.required || []); const properties = Object.entries(schema.properties ?? {}); - let additionalProperties = schema.additionalProperties; - if (additionalProperties === undefined) { - additionalProperties = true; - } - return this._addRule(ruleName, this._buildObjectRule(properties, required, name, additionalProperties)); + return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties)); } else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) { const required = new Set(); const properties = []; @@ -466,7 +462,7 @@ export class SchemaConverter { } } - return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false)); + return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null)); } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) { const items = schema.items ?? schema.prefixItems; if (Array.isArray(items)) { @@ -542,9 +538,11 @@ export class SchemaConverter { const requiredProps = sortedProps.filter(k => required.has(k)); const optionalProps = sortedProps.filter(k => !required.has(k)); - if (typeof additionalProperties === 'object' || additionalProperties === true) { + if (additionalProperties !== false) { const subName = `${name ?? ''}${name ? '-' : ''}additional`; - const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); + const valueRule = + additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`) + : this._addPrimitive('value', PRIMITIVE_RULES['value']); const key_rule = sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string']) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 96f90c01e0d97..6b2181a6a4863 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -15,8 +15,6 @@ using json = nlohmann::ordered_json; -//#define INCLUDE_FAILING_TESTS 1 - static llama_grammar* build_grammar(const std::string & grammar_str) { auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); @@ -823,12 +821,8 @@ static void test_json_schema() { // "By extension, even an empty object is valid" R"""({})""", // "By default, providing additional properties is valid" -#ifdef INCLUDE_FAILING_TESTS - // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { @@ -861,20 +855,13 @@ static void test_json_schema() { { // "By extension, even an empty object is valid" R"""({})""", -#ifdef INCLUDE_FAILING_TESTS - // TODO: Following line should pass and doesn't R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""", // "By default, leaving out properties is valid" - // TODO: Following line should pass and doesn't R"""({ "street_name": "Pennsylvania" })""", - // TODO: Following line should pass and doesn't R"""({ "number": 1600, "street_name": "Pennsylvania" })""", // "By default, providing additional properties is valid" - // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { @@ -906,10 +893,8 @@ static void test_json_schema() { R"""({ "number": 1600, "street_type":"Avenue"})""", R"""({ "number": 1600, "street_name": "Pennsylvania" })""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", -#ifdef INCLUDE_FAILING_TESTS - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + // Spaces are permitted around enum values R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index b5ce8d7c7b18f..6399a51090b22 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -827,8 +827,7 @@ static void test_all(const std::string & lang, std::function Date: Sat, 22 Jun 2024 20:55:46 +0100 Subject: [PATCH 11/14] json: add integ. test case for additionalProperties --- tests/test-grammar-integration.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 6b2181a6a4863..3d950e44c7deb 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -835,6 +835,29 @@ static void test_json_schema() { } ); + test_schema( + "additional properties can't override other properties", + R"""({ + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"} + }, + "additionalProperties": true + })""", + // Passing strings + { + "{\"a\": 42}", + "{\"c\": \"\"}", + "{\"a\": 42, \"c\": \"\"}", + "{\"a_\": \"\"}", + }, + // Failing strings + { + "", + "{\"a\": \"\"}", + "{\"a\": \"\", \"b\": \"\"}", + } + ); // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) test_schema( From f714d7f1a7fe48e671daf4a016927c0cdbdf91f7 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sat, 22 Jun 2024 21:15:51 +0100 Subject: [PATCH 12/14] json: nit: simplify condition --- common/json-schema-to-grammar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 0bc4d2229e391..46a643de3db84 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -493,7 +493,7 @@ class SchemaConverter { } prop_names.push_back(prop_name); } - if (additional_properties.is_null() || additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { + if (!(additional_properties.is_boolean() && !additional_properties.get())) { std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; std::string value_rule = additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") From 3c64db18d70216010890a96034ffbd0461fddd6b Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 24 Jun 2024 00:27:32 +0100 Subject: [PATCH 13/14] reformat grammar integ tests w/ R"""()""" strings where there's escapes --- tests/test-grammar-integration.cpp | 294 +++++++++++++---------------- 1 file changed, 130 insertions(+), 164 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 3d950e44c7deb..617942f2eb455 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -508,7 +508,7 @@ static void test_json_schema() { )""", // Passing strings { - "{}", + R"""({})""", R"""({"foo": "bar"})""", }, // Failing strings @@ -516,7 +516,7 @@ static void test_json_schema() { "", "[]", "null", - "\"\"", + R"""("")""", "true", } ); @@ -524,16 +524,14 @@ static void test_json_schema() { test_schema( "exotic formats (list)", // Schema - R"""( - { + R"""({ "items": [ { "format": "date" }, { "format": "uuid" }, { "format": "time" }, { "format": "date-time" } ] - } - )""", + })""", // Passing strings { // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? @@ -552,125 +550,113 @@ static void test_json_schema() { test_schema( "string", // Schema - R"""( - { - "type": "string" - } - )""", + R"""({ + "type": "string" + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"\"", + R"""("foo")""", + R"""("bar")""", + R"""("")""", }, // Failing strings { - "{}", - "\"foo\": \"bar\"", + R"""({})""", + R"""("foo": "bar")""", } ); test_schema( "string w/ min length 1", // Schema - R"""( - { - "type": "string", - "minLength": 1 - } - )""", + R"""({ + "type": "string", + "minLength": 1 + })""", // Passing strings { - "\"foo\"", - "\"bar\"", + R"""("foo")""", + R"""("bar")""", }, // Failing strings { - "\"\"", - "{}", - "\"foo\": \"bar\"", + R"""("")""", + R"""({})""", + R"""("foo": "bar")""", } ); test_schema( "string w/ min length 3", // Schema - R"""( - { + R"""({ "type": "string", "minLength": 3 - } - )""", + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"foobar\"", + R"""("foo")""", + R"""("bar")""", + R"""("foobar")""", }, // Failing strings { - "\"\"", - "\"f\"", - "\"fo\"", + R"""("")""", + R"""("f")""", + R"""("fo")""", } ); test_schema( "string w/ max length", // Schema - R"""( - { - "type": "string", - "maxLength": 3 - } - )""", + R"""({ + "type": "string", + "maxLength": 3 + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"\"", - "\"f\"", - "\"fo\"", + R"""("foo")""", + R"""("bar")""", + R"""("")""", + R"""("f")""", + R"""("fo")""", }, // Failing strings { - "\"foobar\"", + R"""("foobar")""", } ); test_schema( "string w/ min & max length", // Schema - R"""( - { - "type": "string", - "minLength": 1, - "maxLength": 4 - } - )""", + R"""({ + "type": "string", + "minLength": 1, + "maxLength": 4 + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"f\"", - "\"barf\"", + R"""("foo")""", + R"""("bar")""", + R"""("f")""", + R"""("barf")""", }, // Failing strings { - "\"\"", - "\"barfo\"", - "\"foobar\"", + R"""("")""", + R"""("barfo")""", + R"""("foobar")""", } ); test_schema( "boolean", // Schema - R"""( - { - "type": "boolean" - } - )""", + R"""({ + "type": "boolean" + })""", // Passing strings { "true", @@ -678,96 +664,88 @@ static void test_json_schema() { }, // Failing strings { - "\"\"", - "\"true\"", - "True", - "FALSE", + R"""("")""", + R"""("true")""", + R"""(True)""", + R"""(FALSE)""", } ); test_schema( "integer", // Schema - R"""( - { - "type": "integer" - } - )""", + R"""({ + "type": "integer" + })""", // Passing strings { - "0", - "12345", - "1234567890123456" + R"""(0)""", + R"""(12345)""", + R"""(1234567890123456)""", }, // Failing strings { - "", - "01", - "007", - "12345678901234567" + R"""()""", + R"""(01)""", + R"""(007)""", + R"""(12345678901234567 )""", } ); test_schema( "string const", // Schema - R"""( - { - "const": "foo" - } - )""", + R"""({ + "const": "foo" + })""", // Passing strings { - "\"foo\"", + R"""("foo")""", }, // Failing strings { - "foo", - "\"bar\"", + R"""(foo)""", + R"""("bar")""", } ); test_schema( "non-string const", // Schema - R"""( - { - "const": true - } - )""", + R"""({ + "const": true + })""", // Passing strings { - "true", + R"""(true)""", }, // Failing strings { - "", - "foo", - "\"true\"", + R"""()""", + R"""(foo)""", + R"""("true")""", } ); test_schema( "non-string const", // Schema - R"""( - { - "enum": ["red", "amber", "green", null, 42, ["foo"]] - } - )""", + R"""({ + "enum": ["red", "amber", "green", null, 42, ["foo"]] + })""", // Passing strings { - "\"red\"", - "null", - "42", - "[\"foo\"]", + R"""("red")""", + R"""(null)""", + R"""(42)""", + R"""(["foo"])""", }, // Failing strings { - "", - "420", - "true", - "foo", + R"""()""", + R"""(420)""", + R"""(true)""", + R"""(foo)""", } ); @@ -775,26 +753,24 @@ static void test_json_schema() { test_schema( "min+max items", // Schema - R"""( - { - "items": { - "type": ["number", "integer"] - }, - "minItems": 3, - "maxItems": 5 - } - )""", + R"""({ + "items": { + "type": ["number", "integer"] + }, + "minItems": 3, + "maxItems": 5 + })""", // Passing strings { - "[1, 2, 3]", - "[1, 2, 3, 4]", - "[1, 2, 3, 4, 5]", + R"""([1, 2, 3])""", + R"""([1, 2, 3, 4])""", + R"""([1, 2, 3, 4, 5])""", }, // Failing strings { - "[1, 2]", - "[1, 2, 3, 4, 5, 6]", - "1" + R"""([1, 2])""", + R"""([1, 2, 3, 4, 5, 6])""", + R"""(1)""", } ); @@ -802,16 +778,14 @@ static void test_json_schema() { test_schema( "object properties", // Schema - R"""( - { + R"""({ "type": "object", "properties": { "number": { "type": "number" }, "street_name": { "type": "string" }, "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } } - } - )""", + })""", // Passing strings { R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", @@ -846,16 +820,16 @@ static void test_json_schema() { })""", // Passing strings { - "{\"a\": 42}", - "{\"c\": \"\"}", - "{\"a\": 42, \"c\": \"\"}", - "{\"a_\": \"\"}", + R"""({"a": 42})""", + R"""({"c": ""})""", + R"""({"a": 42, "c": ""})""", + R"""({"a_": ""})""", }, // Failing strings { - "", - "{\"a\": \"\"}", - "{\"a\": \"\", \"b\": \"\"}", + R"""()""", + R"""({"a": ""})""", + R"""({"a": "", "b": ""})""", } ); @@ -863,8 +837,7 @@ static void test_json_schema() { test_schema( "object properties, additionalProperties: true", // Schema - R"""( - { + R"""({ "type": "object", "properties": { "number": { "type": "number" }, @@ -872,8 +845,7 @@ static void test_json_schema() { "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } }, "additionalProperties": true - } - )""", + })""", // Passing strings { // "By extension, even an empty object is valid" @@ -899,8 +871,7 @@ static void test_json_schema() { test_schema( "required + optional props each in original order", // Schema - R"""( - { + R"""({ "type": "object", "properties": { "number": { "type": "number" }, @@ -908,8 +879,7 @@ static void test_json_schema() { "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } }, "additionalProperties": false - } - )""", + })""", // Passing strings { R"""({ "street_name": "Pennsylvania" })""", @@ -931,18 +901,16 @@ static void test_json_schema() { test_schema( "required + optional props each in original order", // Schema - R"""( - { - "properties": { - "b": {"type": "string"}, - "a": {"type": "string"}, - "d": {"type": "string"}, - "c": {"type": "string"} - }, - "required": ["a", "b"], - "additionalProperties": false - } - )""", + R"""({ + "properties": { + "b": {"type": "string"}, + "a": {"type": "string"}, + "d": {"type": "string"}, + "c": {"type": "string"} + }, + "required": ["a", "b"], + "additionalProperties": false + })""", // Passing strings { R"""({"b": "foo", "a": "bar"})""", @@ -962,8 +930,7 @@ static void test_json_schema() { test_schema( "required props", // Schema - R"""( - { + R"""({ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://example.com/product.schema.json", "title": "Product", @@ -1009,8 +976,7 @@ static void test_json_schema() { } }, "required": [ "productId", "productName", "price" ] - } - )""", + })""", // Passing strings { R"""({"productId": 1, "productName": "A green door", "price": 12.50})""", From 23beed22a3c5190c42cc3140a97af450101733da Mon Sep 17 00:00:00 2001 From: ochafik Date: Tue, 25 Jun 2024 21:59:23 +0100 Subject: [PATCH 14/14] update # tokens in server test: consts can now have trailing space --- examples/server/tests/features/server.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index d21c09135243a..b55971454afc3 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -82,7 +82,7 @@ Feature: llama.cpp server Examples: Prompts | response_format | n_predicted | re_content | - | {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" | + | {"type": "json_object", "schema": {"const": "42"}} | 6 | "42" | | {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] | | {"type": "json_object"} | 10 | \{ " Jacky. |