Skip to content

Commit 614f631

Browse files
committed
[Serve] Add interface for structural generation config
This PR exposes the options of grammar cache and compact json output in debug config.
1 parent e283cd0 commit 614f631

File tree

4 files changed

+30
-4
lines changed

4 files changed

+30
-4
lines changed

cpp/serve/config.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ Result<DebugConfig> DebugConfig::FromJSON(const picojson::object& config) {
8686
} else {
8787
return TResult::Error("Uknown grammar execution mode " + grammar_execution_mode);
8888
}
89+
res.disable_grammar_cache = json::LookupOrDefault<bool>(config, "disable_grammar_cache", false);
90+
res.compact_json_output = json::LookupOrDefault<bool>(config, "compact_json_output", false);
8991
return TResult::Ok(res);
9092
}
9193

@@ -114,6 +116,8 @@ picojson::object DebugConfig::AsJSON() const {
114116
break;
115117
}
116118
}
119+
config["disable_grammar_cache"] = picojson::value(disable_grammar_cache);
120+
config["compact_json_output"] = picojson::value(compact_json_output);
117121
return config;
118122
}
119123

cpp/serve/config.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ class DebugConfig {
6363
SpecialRequestKind special_request = SpecialRequestKind::kNone;
6464
/*! \brief The grammar execution mode. */
6565
GrammarExecutionMode grammar_execution_mode = GrammarExecutionMode::kJumpForward;
66+
/*! \brief Wether to use the grammar cache. */
67+
bool disable_grammar_cache = false;
68+
/*! \brief Wether to generate json with compact format style. */
69+
bool compact_json_output = false;
6670

6771
/*!
6872
* \brief Create debug config from JSON.

cpp/serve/engine.cc

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,10 @@ class EngineImpl : public Engine {
495495

496496
int n = request->generation_cfg->n;
497497
int rng_seed = request->generation_cfg->seed;
498-
auto compiled_grammar = GetGrammarFromResponseFormat(request->generation_cfg->response_format);
498+
auto compiled_grammar =
499+
GetGrammarFromResponseFormat(request->generation_cfg->response_format,
500+
request->generation_cfg->debug_config.disable_grammar_cache,
501+
request->generation_cfg->debug_config.compact_json_output);
499502

500503
std::vector<RequestStateEntry> rsentries;
501504
// Create the request state entry for the input.
@@ -814,14 +817,27 @@ class EngineImpl : public Engine {
814817
/*! \brief Create a grammar init context according to the response format. If the response format
815818
* is not JSON, return std::nullopt. */
816819
std::optional<xgrammar::CompiledGrammar> GetGrammarFromResponseFormat(
817-
const ResponseFormat& response_format) {
820+
const ResponseFormat& response_format, bool disable_grammar_cache, bool compact_json_output) {
818821
if (response_format.type != "json_object") {
819822
return std::nullopt;
820823
} else if (!response_format.schema) {
821824
return cached_grammar_compiler_.GetCompiledGrammarForJSON();
822825
} else {
823-
return cached_grammar_compiler_.GetCompiledGrammarForJSONSchema(
824-
response_format.schema.value());
826+
std::optional<int> indent = std::nullopt;
827+
std::optional<std::pair<std::string, std::string>> separators = std::nullopt;
828+
if (!compact_json_output) {
829+
std::optional<int> indent = 2;
830+
std::optional<std::pair<std::string, std::string>> separators = std::make_pair(": ", ",");
831+
}
832+
if (disable_grammar_cache) {
833+
return xgrammar::CompiledGrammar(
834+
xgrammar::BuiltinGrammar::JSONSchema(response_format.schema.value(), indent, separators,
835+
true),
836+
token_table_);
837+
} else {
838+
return cached_grammar_compiler_.GetCompiledGrammarForJSONSchema(
839+
response_format.schema.value(), indent, separators, true);
840+
}
825841
}
826842
}
827843

python/mlc_llm/protocol/debug_protocol.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ class DebugConfig(BaseModel):
1717
pinned_system_prompt: bool = False
1818
special_request: Optional[Literal["query_engine_metrics"]] = None
1919
grammar_execution_mode: Literal["constraint", "jump_forward"] = "jump_forward"
20+
disable_grammar_cache: bool = False
21+
compact_json_output: bool = False
2022

2123
"""Special request indicators
2224

0 commit comments

Comments
 (0)