From 41d5af9f0d9fc57ee2030779a502ac5d0aaa6784 Mon Sep 17 00:00:00 2001 From: zxy Date: Fri, 11 Jul 2025 22:56:59 +0800 Subject: [PATCH 1/2] [Bugfix] Fix the bug in Hermes streaming parsing Signed-off-by: zxy --- vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index c7030d34d45..338dadd08cd 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -8,6 +8,7 @@ import partial_json_parser import regex as re from partial_json_parser.core.options import Allow +from json_repair import repair_json from vllm.entrypoints.chat_utils import random_tool_call_id from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, @@ -237,6 +238,9 @@ def extract_tool_calls_streaming( return delta try: + if tool_call_portion is not None: + # repair the JSON if needed + tool_call_portion = repair_json(tool_call_portion) current_tool_call = partial_json_parser.loads( tool_call_portion or "{}", From 95520ef1460c2f394d6fee13db9feceff7ca283d Mon Sep 17 00:00:00 2001 From: zxy Date: Fri, 11 Jul 2025 22:56:59 +0800 Subject: [PATCH 2/2] [Bugfix] Fix the bug in Hermes streaming parsing Signed-off-by: zxy --- requirements/common.txt | 1 + vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/common.txt b/requirements/common.txt index f97fe35d28b..a322377e9f5 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -28,6 +28,7 @@ lark == 1.2.2 xgrammar == 0.1.19; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 +json_repair # used for repairing JSON outputs partial-json-parser # used for parsing partial JSON outputs pyzmq >= 25.0.0 msgspec diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index 338dadd08cd..eb9d6bb9390 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -7,7 +7,6 @@ import partial_json_parser import regex as re -from partial_json_parser.core.options import Allow from json_repair import repair_json from vllm.entrypoints.chat_utils import random_tool_call_id @@ -171,6 +170,7 @@ def extract_tool_calls_streaming( # something with tools with this diff. # flags for partial JSON parting. exported constants from # "Allow" are handled via BIT MASK + from partial_json_parser.core.options import Allow flags = Allow.ALL if self.current_tool_name_sent \ else Allow.ALL & ~Allow.STR