@@ -463,9 +463,11 @@ class EngineImpl : public Engine {
463
463
ModelWorkspace{model->AllocEmbeddingTensor (), model->AllocHiddenStatesTensor ()});
464
464
}
465
465
// - Initialize tokenizer and grammar
466
+
466
467
n->tokenizer_ = Tokenizer::FromPath (engine_config->model , GetTokenizerInfo (model_configs[0 ]));
467
468
n->token_table_ = n->tokenizer_ ->PostProcessedTokenTable ();
468
- n->cached_grammar_compiler_ = xgrammar::CachedGrammarCompiler (n->token_table_ );
469
+ // TODO: check 'vocab_size' of TokenizerInfo
470
+ n->grammar_compiler_ = xgrammar::GrammarCompiler (xgrammar::TokenizerInfo (n->token_table_ ));
469
471
// - Create the logit processor and sampler, and
470
472
// the DraftTokenWorkspaceManager for speculative decoding.
471
473
int max_num_tokens = engine_config->max_num_sequence ;
@@ -975,13 +977,13 @@ class EngineImpl : public Engine {
975
977
* is not JSON, return std::nullopt. */
976
978
std::optional<xgrammar::CompiledGrammar> GetGrammarFromResponseFormat (
977
979
const ResponseFormat& response_format) {
980
+ // TODO: add other grammar type
978
981
if (response_format.type != " json_object" ) {
979
982
return std::nullopt;
980
983
} else if (!response_format.schema ) {
981
- return cached_grammar_compiler_. GetCompiledGrammarForJSON ();
984
+ return grammar_compiler_. CompileBuiltinJSONGrammar ();
982
985
} else {
983
- return cached_grammar_compiler_.GetCompiledGrammarForJSONSchema (
984
- response_format.schema .value ());
986
+ return grammar_compiler_.CompileJSONSchema (response_format.schema .value ());
985
987
}
986
988
}
987
989
@@ -992,8 +994,8 @@ class EngineImpl : public Engine {
992
994
// internal tokenizer
993
995
Tokenizer tokenizer_;
994
996
std::vector<std::string> token_table_;
995
- // Cached grammar compiler for grammar matching.
996
- xgrammar::CachedGrammarCompiler cached_grammar_compiler_ ;
997
+ // Grammar compiler for grammar matching.
998
+ xgrammar::GrammarCompiler grammar_compiler_ ;
997
999
// Models
998
1000
Array<Model> models_;
999
1001
// Device that the models run on.
0 commit comments