diff --git a/requirements/common.txt b/requirements/common.txt index f97fe35d28b..a322377e9f5 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -28,6 +28,7 @@ lark == 1.2.2 xgrammar == 0.1.19; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 +json_repair # used for repairing JSON outputs partial-json-parser # used for parsing partial JSON outputs pyzmq >= 25.0.0 msgspec diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index c7030d34d45..eb9d6bb9390 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -7,7 +7,7 @@ import partial_json_parser import regex as re -from partial_json_parser.core.options import Allow +from json_repair import repair_json from vllm.entrypoints.chat_utils import random_tool_call_id from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, @@ -170,6 +170,7 @@ def extract_tool_calls_streaming( # something with tools with this diff. # flags for partial JSON parting. exported constants from # "Allow" are handled via BIT MASK + from partial_json_parser.core.options import Allow flags = Allow.ALL if self.current_tool_name_sent \ else Allow.ALL & ~Allow.STR @@ -237,6 +238,9 @@ def extract_tool_calls_streaming( return delta try: + if tool_call_portion is not None: + # repair the JSON if needed + tool_call_portion = repair_json(tool_call_portion) current_tool_call = partial_json_parser.loads( tool_call_portion or "{}",