Skip to content

Commit 72114ed

Browse files
authored
json-schema-to-grammar : fix order of props + non-str const/enum (#6232)
* json: ordered json in server/schema converter to respect orig order * json: ws nits * json: support non-string const / enums
1 parent 2f0e81e commit 72114ed

8 files changed

+1452
-1481
lines changed

common/json-schema-to-grammar.cpp

+6-10
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include <unordered_set>
1010
#include <vector>
1111

12-
using json = nlohmann::json;
12+
using json = nlohmann::ordered_json;
1313

1414
const std::string SPACE_RULE = "\" \"?";
1515

@@ -124,7 +124,7 @@ static std::string replacePattern(const std::string & input, const std::regex &
124124
}
125125

126126
static std::string format_literal(const std::string & literal) {
127-
std::string escaped = replacePattern(json(literal).dump(), GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) {
127+
std::string escaped = replacePattern(literal, GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) {
128128
char c = match.str()[0];
129129
return GRAMMAR_LITERAL_ESCAPES.at(c);
130130
});
@@ -137,7 +137,7 @@ class SchemaConverter {
137137
std::function<json(const std::string &)> _fetch_json;
138138
bool _dotall;
139139
std::map<std::string, std::string> _rules;
140-
std::unordered_map<std::string, nlohmann::json> _refs;
140+
std::unordered_map<std::string, json> _refs;
141141
std::unordered_set<std::string> _refs_being_resolved;
142142
std::vector<std::string> _errors;
143143
std::vector<std::string> _warnings;
@@ -413,7 +413,7 @@ class SchemaConverter {
413413
std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name);
414414
prop_kv_rule_names[prop_name] = _add_rule(
415415
name + (name.empty() ? "" : "-") + prop_name + "-kv",
416-
format_literal(prop_name) + " space \":\" space " + prop_rule_name
416+
format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name
417417
);
418418
if (required.find(prop_name) != required.end()) {
419419
required_props.push_back(prop_name);
@@ -495,7 +495,7 @@ class SchemaConverter {
495495
_rules["space"] = SPACE_RULE;
496496
}
497497

498-
void resolve_refs(nlohmann::json & schema, const std::string & url) {
498+
void resolve_refs(json & schema, const std::string & url) {
499499
/*
500500
* Resolves all $ref fields in the given schema, fetching any remote schemas,
501501
* replacing each $ref with absolute reference URL and populates _refs with the
@@ -557,11 +557,7 @@ class SchemaConverter {
557557
}
558558

559559
std::string _generate_constant_rule(const json & value) {
560-
if (!value.is_string()) {
561-
_errors.push_back("Only std::string constants are supported, got " + value.dump());
562-
return "";
563-
}
564-
return format_literal(value.get<std::string>());
560+
return format_literal(value.dump());
565561
}
566562

567563
std::string visit(const json & schema, const std::string & name) {

common/json-schema-to-grammar.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#pragma once
22
#include "json.hpp"
33

4-
std::string json_schema_to_grammar(const nlohmann::json& schema);
4+
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);

examples/json-schema-to-grammar.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
6161

6262
def _format_literal(self, literal):
6363
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
64-
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), json.dumps(literal)
64+
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), literal
6565
)
6666
return f'"{escaped}"'
6767

@@ -308,8 +308,7 @@ def _resolve_ref(self, ref):
308308
return ref_name
309309

310310
def _generate_constant_rule(self, value):
311-
assert isinstance(value, str), f'Only string constants are supported, got {value}'
312-
return self._format_literal(value)
311+
return self._format_literal(json.dumps(value))
313312

314313
def visit(self, schema, name):
315314
schema_type = schema.get('type')
@@ -428,7 +427,7 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st
428427
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
429428
prop_kv_rule_names[prop_name] = self._add_rule(
430429
f'{name}{"-" if name else ""}{prop_name}-kv',
431-
fr'{self._format_literal(prop_name)} space ":" space {prop_rule_name}'
430+
fr'{self._format_literal(json.dumps(prop_name))} space ":" space {prop_rule_name}'
432431
)
433432
required_props = [k for k in sorted_props if k in required]
434433
optional_props = [k for k in sorted_props if k not in required]

examples/server/json-schema-to-grammar.mjs.hpp

+1,384-1,407
Large diffs are not rendered by default.

examples/server/public/json-schema-to-grammar.mjs

+3-9
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ export class SchemaConverter {
4848
}
4949

5050
_formatLiteral(literal) {
51-
const escaped = JSON.stringify(literal).replace(
51+
const escaped = literal.replace(
5252
GRAMMAR_LITERAL_ESCAPE_RE,
5353
m => GRAMMAR_LITERAL_ESCAPES[m]
5454
);
@@ -327,10 +327,7 @@ export class SchemaConverter {
327327
}
328328

329329
_generateConstantRule(value) {
330-
if (typeof value !== 'string') {
331-
throw new Error('Only string constants are supported, got ' + JSON.stringify(value));
332-
}
333-
return this._formatLiteral(value);
330+
return this._formatLiteral(JSON.stringify(value));
334331
}
335332

336333
visit(schema, name) {
@@ -346,9 +343,6 @@ export class SchemaConverter {
346343
} else if (Array.isArray(schemaType)) {
347344
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
348345
} else if ('const' in schema) {
349-
if (typeof schema.const !== 'string') {
350-
throw new Error('Only string constants are supported, got ' + JSON.stringify(schema.const));
351-
}
352346
return this._addRule(ruleName, this._generateConstantRule(schema.const));
353347
} else if ('enum' in schema) {
354348
const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
@@ -457,7 +451,7 @@ export class SchemaConverter {
457451
const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
458452
propKvRuleNames[propName] = this._addRule(
459453
`${name ?? ''}${name ? '-' : ''}${propName}-kv`,
460-
`${this._formatLiteral(propName)} space ":" space ${propRuleName}`
454+
`${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}`
461455
);
462456
}
463457
const requiredProps = sortedProps.filter(k => required.has(k));

examples/server/server.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
#include <signal.h>
3131
#include <memory>
3232

33-
using json = nlohmann::json;
33+
using json = nlohmann::ordered_json;
3434

3535
bool server_verbose = false;
3636
bool server_log_json = true;

examples/server/utils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
1414

15-
using json = nlohmann::json;
15+
using json = nlohmann::ordered_json;
1616

1717
// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
1818
enum error_type {

tests/test-json-schema-to-grammar.cpp

+53-48
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
9090

9191
test({
9292
FAILURE,
93-
"invalid type type",
93+
"invalid type",
9494
R"""({
9595
"type": 123
9696
})""",
@@ -193,21 +193,27 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
193193
});
194194

195195
test({
196-
FAILURE,
196+
SUCCESS,
197197
"non-string const",
198198
R"""({
199199
"const": 123
200200
})""",
201-
""
201+
R"""(
202+
root ::= "123"
203+
space ::= " "?
204+
)"""
202205
});
203206

204207
test({
205-
FAILURE,
208+
SUCCESS,
206209
"non-string enum",
207210
R"""({
208-
"enum": [123]
211+
"enum": ["red", "amber", "green", null, 42, ["foo"]]
209212
})""",
210-
""
213+
R"""(
214+
root ::= "\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]"
215+
space ::= " "?
216+
)"""
211217
});
212218

213219
test({
@@ -378,28 +384,27 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
378384

379385
test({
380386
SUCCESS,
381-
"required props",
387+
"required props in original order",
382388
R"""({
383389
"type": "object",
384390
"properties": {
385-
"a": {
386-
"type": "string"
387-
},
388-
"b": {
389-
"type": "string"
390-
}
391+
"b": {"type": "string"},
392+
"c": {"type": "string"},
393+
"a": {"type": "string"}
391394
},
392395
"required": [
393396
"a",
394-
"b"
397+
"b",
398+
"c"
395399
],
396400
"additionalProperties": false,
397401
"definitions": {}
398402
})""",
399403
R"""(
400404
a-kv ::= "\"a\"" space ":" space string
401405
b-kv ::= "\"b\"" space ":" space string
402-
root ::= "{" space a-kv "," space b-kv "}" space
406+
c-kv ::= "\"c\"" space ":" space string
407+
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
403408
space ::= " "?
404409
string ::= "\"" (
405410
[^"\\] |
@@ -458,13 +463,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
458463

459464
test({
460465
SUCCESS,
461-
"required + optional props",
466+
"required + optional props each in original order",
462467
R"""({
463468
"properties": {
464-
"a": {"type": "string"},
465469
"b": {"type": "string"},
466-
"c": {"type": "string"},
467-
"d": {"type": "string"}
470+
"a": {"type": "string"},
471+
"d": {"type": "string"},
472+
"c": {"type": "string"}
468473
},
469474
"required": ["a", "b"],
470475
"additionalProperties": false
@@ -473,14 +478,14 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
473478
a-kv ::= "\"a\"" space ":" space string
474479
b-kv ::= "\"b\"" space ":" space string
475480
c-kv ::= "\"c\"" space ":" space string
476-
c-rest ::= ( "," space d-kv )?
477481
d-kv ::= "\"d\"" space ":" space string
478-
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space
482+
d-rest ::= ( "," space c-kv )?
483+
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
479484
space ::= " "?
480485
string ::= "\"" (
481486
[^"\\] |
482487
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
483-
)* "\"" space
488+
)* "\"" space
484489
)"""
485490
});
486491

@@ -648,16 +653,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
648653
"$ref": "#/definitions/MyType",
649654
"definitions": {
650655
"MyType": {
651-
"type": "object",
652-
"properties": {
653-
"a": {
654-
"type": "string"
655-
}
656-
},
657-
"required": [
658-
"a"
659-
],
660-
"additionalProperties": false
656+
"type": "object",
657+
"properties": {
658+
"a": {
659+
"type": "string"
660+
}
661+
},
662+
"required": [
663+
"a"
664+
],
665+
"additionalProperties": false
661666
}
662667
}
663668
})""",
@@ -683,10 +688,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
683688
],
684689
"definitions": {
685690
"foo": {
686-
"properties": {"a": {"type": "number"}}
691+
"properties": {"a": {"type": "number"}}
687692
},
688693
"bar": {
689-
"properties": {"b": {"type": "number"}}
694+
"properties": {"b": {"type": "number"}}
690695
}
691696
},
692697
"type": "object"
@@ -720,16 +725,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
720725
],
721726
"definitions": {
722727
"foo": {
723-
"properties": {"a": {"type": "number"}}
728+
"properties": {"a": {"type": "number"}}
724729
},
725730
"bar": {
726-
"properties": {"b": {"type": "number"}}
731+
"properties": {"b": {"type": "number"}}
727732
},
728733
"bam": {
729-
"properties": {"c": {"type": "number"}}
734+
"properties": {"c": {"type": "number"}}
730735
},
731736
"baz": {
732-
"properties": {"d": {"type": "number"}}
737+
"properties": {"d": {"type": "number"}}
733738
}
734739
},
735740
"type": "object"
@@ -757,15 +762,15 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
757762
"properties": {
758763
"number": {
759764
"type": "object",
760-
"properties": {
761-
"root": {
762-
"type": "number"
763-
}
764-
},
765-
"required": [
766-
"root"
767-
],
768-
"additionalProperties": false
765+
"properties": {
766+
"root": {
767+
"type": "number"
768+
}
769+
},
770+
"required": [
771+
"root"
772+
],
773+
"additionalProperties": false
769774
}
770775
},
771776
"required": [
@@ -796,7 +801,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
796801
int main() {
797802
test_all("C++", [](const TestCase & tc) {
798803
try {
799-
tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
804+
tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema)));
800805
tc.verify_status(SUCCESS);
801806
} catch (const std::runtime_error & ex) {
802807
fprintf(stderr, "Error: %s\n", ex.what());

0 commit comments

Comments
 (0)