[Grammar] Upgrade xgrammar to latest version

Irfnfnkemed · Irfnfnkemed · commit 712f1d5795bf · 2025-03-14T12:22:34.000+08:00
- upgrade xgrammar calling to latest API
diff --git a/cpp/serve/engine.cc b/cpp/serve/engine.cc
@@ -463,9 +463,11 @@ class EngineImpl : public Engine {
           ModelWorkspace{model->AllocEmbeddingTensor(), model->AllocHiddenStatesTensor()});
     }
     // - Initialize tokenizer and grammar
+
     n->tokenizer_ = Tokenizer::FromPath(engine_config->model, GetTokenizerInfo(model_configs[0]));
     n->token_table_ = n->tokenizer_->PostProcessedTokenTable();
-    n->cached_grammar_compiler_ = xgrammar::CachedGrammarCompiler(n->token_table_);
+    // TODO: check 'vocab_size' of TokenizerInfo
+    n->grammar_compiler_ = xgrammar::GrammarCompiler(xgrammar::TokenizerInfo(n->token_table_));
     // - Create the logit processor and sampler, and
     // the DraftTokenWorkspaceManager for speculative decoding.
     int max_num_tokens = engine_config->max_num_sequence;
@@ -975,13 +977,13 @@ class EngineImpl : public Engine {
    * is not JSON, return std::nullopt. */
   std::optional<xgrammar::CompiledGrammar> GetGrammarFromResponseFormat(
       const ResponseFormat& response_format) {
+    // TODO: add other grammar type
     if (response_format.type != "json_object") {
       return std::nullopt;
     } else if (!response_format.schema) {
-      return cached_grammar_compiler_.GetCompiledGrammarForJSON();
+      return grammar_compiler_.CompileBuiltinJSONGrammar();
     } else {
-      return cached_grammar_compiler_.GetCompiledGrammarForJSONSchema(
-          response_format.schema.value());
+      return grammar_compiler_.CompileJSONSchema(response_format.schema.value());
     }
   }
 
@@ -992,8 +994,8 @@ class EngineImpl : public Engine {
   // internal tokenizer
   Tokenizer tokenizer_;
   std::vector<std::string> token_table_;
-  // Cached grammar compiler for grammar matching.
-  xgrammar::CachedGrammarCompiler cached_grammar_compiler_;
+  // Grammar compiler for grammar matching.
+  xgrammar::GrammarCompiler grammar_compiler_;
   // Models
   Array<Model> models_;
   // Device that the models run on.
diff --git a/cpp/serve/request_state.cc b/cpp/serve/request_state.cc
@@ -24,7 +24,7 @@ RequestModelState::RequestModelState(
   if (compiled_grammar.has_value()) {
     // TODO(yixin): set rollback limit to a configurable value.
     n->grammar_matcher =
-        xgrammar::GrammarMatcher(compiled_grammar.value(), std::nullopt, false, std::nullopt, 10);
+        xgrammar::GrammarMatcher(compiled_grammar.value(), std::nullopt, false, 10);
   }
 
   n->request = std::move(request);
@@ -44,7 +44,7 @@ bool RequestModelStateNode::RequireNextTokenBitmask() { return grammar_matcher.h
 void RequestModelStateNode::GetNextTokenBitmask(DLTensor* bitmask) {
   ICHECK(grammar_matcher.has_value());
 
-  grammar_matcher->GetNextTokenBitmask(bitmask);
+  grammar_matcher->FillNextTokenBitmask(bitmask);
 }
 
 void RequestModelStateNode::CommitToken(SampleResult sampled_token) {

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ RequestModelState::RequestModelState(`
`24`	`24`	`if (compiled_grammar.has_value()) {`
`25`	`25`	`// TODO(yixin): set rollback limit to a configurable value.`
`26`	`26`	`n->grammar_matcher =`
`27`		`- xgrammar::GrammarMatcher(compiled_grammar.value(), std::nullopt, false, std::nullopt, 10);`
	`27`	`+ xgrammar::GrammarMatcher(compiled_grammar.value(), std::nullopt, false, 10);`
`28`	`28`	`}`
`29`	`29`
`30`	`30`	`n->request = std::move(request);`
`@@ -44,7 +44,7 @@ bool RequestModelStateNode::RequireNextTokenBitmask() { return grammar_matcher.h`
`44`	`44`	`void RequestModelStateNode::GetNextTokenBitmask(DLTensor* bitmask) {`
`45`	`45`	`ICHECK(grammar_matcher.has_value());`
`46`	`46`
`47`		`- grammar_matcher->GetNextTokenBitmask(bitmask);`
	`47`	`+ grammar_matcher->FillNextTokenBitmask(bitmask);`
`48`	`48`	`}`
`49`	`49`
`50`	`50`	`void RequestModelStateNode::CommitToken(SampleResult sampled_token) {`