update

cyx-6 · cyx-6 · commit c1fce501a19e · 2024-11-20T09:22:56.000Z
diff --git a/python/mlc_llm/bench/api_endpoint.py b/python/mlc_llm/bench/api_endpoint.py
@@ -7,10 +7,9 @@
 import traceback
 from typing import Optional
 
-from typing_extensions import Self
-
 from mlc_llm.bench.request_record import Metrics, RequestRecord, ServerMetrics
 from mlc_llm.support import logging
+from typing_extensions import Self
 
 logging.enable_logging()
 logger = logging.getLogger(__name__)
@@ -67,7 +66,7 @@ async def __aexit__(self, exc_type, exc_value, tb) -> None:
     async def __call__(  # pylint: disable=too-many-branches,too-many-statements,too-many-locals
         self, request_record: RequestRecord
     ) -> RequestRecord:
-        payload = request_record.chat_cmpl.model_dump()
+        payload = request_record.chat_cmpl.model_dump(exclude_unset=True, exclude_none=True)
         if self.timeout is not None and "timeout" not in payload:
             payload["timeout"] = self.timeout
         if self.include_server_metrics:
@@ -81,6 +80,12 @@ async def __call__(  # pylint: disable=too-many-branches,too-many-statements,too
         ):
             payload["ignore_eos"] = True
 
+        print(payload)
+
+        if "response_format" in payload and "json_schema" in payload["response_format"]:
+            payload["response_format"]["schema"] = payload["response_format"]["json_schema"]
+            payload["response_format"].pop("json_schema")
+
         generated_text = ""
         first_chunk_output_str = ""
         time_to_first_token_s = None
diff --git a/python/mlc_llm/bench/dataset.py b/python/mlc_llm/bench/dataset.py
@@ -7,14 +7,13 @@
 
 import numpy as np
 from datasets import load_dataset  # pylint: disable=import-error
-from transformers import AutoTokenizer  # pylint: disable=import-error
-
 from mlc_llm.bench.request_record import Metrics, RequestRecord
 from mlc_llm.protocol.openai_api_protocol import (
     ChatCompletionMessage,
     ChatCompletionRequest,
     DebugConfig,
 )
+from transformers import AutoTokenizer  # pylint: disable=import-error
 
 
 class Dataset:  # pylint: disable=too-few-public-methods
@@ -243,10 +242,11 @@ class JSONModeEvalDataset(Dataset):  # pylint: disable=too-few-public-methods
     """The dataset class for JSON dataset."""
 
     def __init__(self, tokenizer: AutoTokenizer) -> None:
-        raw_dataset = load_dataset("NousResearch/json-mode-eval")
+        raw_dataset = load_dataset("NousResearch/json-mode-eval", split="train")
         self.tokenizer = tokenizer
         self.dataset = []
-        for data in raw_dataset["train"]:
+        for data in raw_dataset:
+            data = self._process_data(data)
             messages = data["prompt"]
             schema = {
                 "type": "json_object",
@@ -259,6 +259,40 @@ def __init__(self, tokenizer: AutoTokenizer) -> None:
                 )
             self.dataset.append((messages, schema, num_tokens))
 
+    def _process_data(self, data):
+        data["prompt"][0]["content"] = data["prompt"][0]["content"].replace(
+            ", 'format': 'email'", ""
+        )
+        data["schema"] = data["schema"].replace(', "format": "email"', "")
+
+        data["prompt"][0]["content"] = data["prompt"][0]["content"].replace(
+            ", 'pattern': '\\\\d{5}'", ""
+        )
+        data["schema"] = data["schema"].replace(', "pattern": "\\\\d{5}"', "")
+
+        schema_str = data["schema"]
+        schema = json.loads(schema_str)
+        new_schema = None
+        if "type" not in schema:
+            if len(schema.keys()) == 1:
+                key = list(schema.keys())[0]
+                new_schema = {"title": key, **schema[key]}
+            else:
+                new_schema = {"type": "object", **schema}
+        if new_schema is None:
+            return data
+        return {
+            "prompt": [
+                {
+                    "content": f"You are a helpful assistant that answers in JSON. Here's the json schema you must adhere to:\n<schema>\n{new_schema}\n</schema>\n",
+                    "role": "system",
+                },
+                data["prompt"][1],
+            ],
+            "completion": data["completion"],
+            "schema": json.dumps(new_schema),
+        }
+
     def generate_request_records(
         self,
         input_len: Optional[int],
@@ -288,6 +322,10 @@ def generate_request_records(
                         model="",
                         max_tokens=output_length,
                         response_format=schema,
+                        debug_config=DebugConfig(
+                            grammar_execution_mode="constraint",
+                            compact_json_output=True,
+                        ),
                     ),
                     metrics=Metrics(
                         success=False,