Skip to content

Commit 03f582a

Browse files
authored
server: fix streaming crashes (#13786)
* add preludes to content on partial regex match * allow all parsers to parse non-tool-call content. * tweak order of <|python_tag|> vs <function= parsing for functionary v3.1 format. still not ideal but hopefully less prone to crash
1 parent 88c125f commit 03f582a

File tree

7 files changed

+112
-59
lines changed

7 files changed

+112
-59
lines changed

common/chat-parser.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,20 +170,23 @@ std::string common_chat_msg_parser::consume_rest() {
170170
}
171171

172172
// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
173-
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from) {
173+
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
174174
auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
175175
if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
176176
return std::nullopt;
177177
}
178+
auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
179+
pos_ = m.groups[0].end;
180+
181+
if (add_prelude_to_content) {
182+
add_content(prelude);
183+
}
178184
if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
179185
if (is_partial()) {
180186
throw common_chat_msg_partial_exception(regex.str());
181187
}
182188
return std::nullopt;
183189
}
184-
auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
185-
pos_ = m.groups[0].end;
186-
187190
return find_regex_result{prelude, m.groups};
188191
}
189192

common/chat-parser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class common_chat_msg_parser {
3030
const std::string & healing_marker() const { return healing_marker_; }
3131
const bool & is_partial() const { return is_partial_; }
3232
const common_chat_msg & result() const { return result_; }
33+
const common_chat_syntax & syntax() const { return syntax_; }
3334

3435
void move_to(size_t pos) {
3536
if (pos > input_.size()) {
@@ -77,7 +78,7 @@ class common_chat_msg_parser {
7778
std::vector<common_string_range> groups;
7879
};
7980

80-
std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos);
81+
std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
8182

8283
bool try_consume_literal(const std::string & literal);
8384

common/chat.cpp

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,6 @@ static void parse_json_tool_calls(
656656
}
657657
from = std::string::npos;
658658

659-
builder.add_content(res->prelude);
660659
auto maybe_raw_python = name == "python" && allow_raw_python;
661660
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
662661
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
@@ -686,7 +685,6 @@ static void parse_json_tool_calls(
686685
};
687686
if (block_open) {
688687
if (auto res = builder.try_find_regex(*block_open)) {
689-
builder.add_content(res->prelude);
690688
parse_tool_calls();
691689
} else {
692690
builder.add_content(builder.consume_rest());
@@ -699,7 +697,6 @@ static void parse_json_tool_calls(
699697
static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) {
700698
static const std::vector<std::vector<std::string>> args_paths = {{"arguments"}};
701699
if (auto res = builder.try_find_regex(prefix)) {
702-
builder.add_content(res->prelude);
703700
builder.move_back(rstrip_prefix);
704701
auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
705702
if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
@@ -835,6 +832,10 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
835832
return data;
836833
}
837834
static void common_chat_parse_generic(common_chat_msg_parser & builder) {
835+
if (!builder.syntax().parse_tool_calls) {
836+
builder.add_content(builder.consume_rest());
837+
return;
838+
}
838839
static const std::vector<std::vector<std::string>> content_paths = {
839840
{"response"},
840841
};
@@ -907,6 +908,11 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
907908
return data;
908909
}
909910
static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
911+
if (!builder.syntax().parse_tool_calls) {
912+
builder.add_content(builder.consume_rest());
913+
return;
914+
}
915+
910916
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
911917
parse_prefixed_json_tool_call_array(builder, prefix);
912918
}
@@ -1001,7 +1007,6 @@ static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
10011007

10021008
if (auto res = builder.try_find_regex(start_action_regex)) {
10031009
// If we didn't extract thoughts, prelude includes them.
1004-
builder.add_content(res->prelude);
10051010
auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
10061011
for (const auto & tool_call : tool_calls.value) {
10071012
std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
@@ -1016,11 +1021,7 @@ static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
10161021
}
10171022
builder.consume_regex(end_action_regex);
10181023
} else if (auto res = builder.try_find_regex(start_response_regex)) {
1019-
// If we didn't extract thoughts, prelude includes them.
1020-
builder.add_content(res->prelude);
1021-
if (auto res = builder.try_find_regex(end_response_regex)) {
1022-
builder.add_content(res->prelude);
1023-
} else {
1024+
if (!builder.try_find_regex(end_response_regex)) {
10241025
builder.add_content(builder.consume_rest());
10251026
throw common_chat_msg_partial_exception(end_response_regex.str());
10261027
}
@@ -1128,6 +1129,11 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
11281129
return data;
11291130
}
11301131
static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
1132+
if (!builder.syntax().parse_tool_calls) {
1133+
builder.add_content(builder.consume_rest());
1134+
return;
1135+
}
1136+
11311137
static const common_regex function_regex(
11321138
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
11331139
static const common_regex close_regex("\\}\\s*");
@@ -1138,8 +1144,6 @@ static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool w
11381144
if (with_builtin_tools) {
11391145
static const common_regex builtin_call_regex("<\\|python_tag\\|>");
11401146
if (auto res = builder.try_find_regex(builtin_call_regex)) {
1141-
builder.add_content(res->prelude);
1142-
11431147
auto fun_res = builder.consume_regex(function_name_regex);
11441148
auto function_name = builder.str(fun_res.groups[1]);
11451149

@@ -1255,6 +1259,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
12551259
}
12561260
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
12571261
builder.try_parse_reasoning("<think>", "</think>");
1262+
if (!builder.syntax().parse_tool_calls) {
1263+
builder.add_content(builder.consume_rest());
1264+
return;
1265+
}
12581266

12591267
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
12601268
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
@@ -1316,6 +1324,10 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
13161324
return data;
13171325
}
13181326
static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
1327+
if (!builder.syntax().parse_tool_calls) {
1328+
builder.add_content(builder.consume_rest());
1329+
return;
1330+
}
13191331
static const common_regex prefix(regex_escape(" functools["));
13201332
parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
13211333
}
@@ -1457,15 +1469,12 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
14571469
return data;
14581470
}
14591471
static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
1460-
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
1461-
static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
1462-
1463-
if (auto res = builder.try_find_regex(python_tag_regex)) {
1464-
builder.add_content(res->prelude);
1465-
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
1466-
builder.add_tool_call("python", "", arguments);
1472+
if (!builder.syntax().parse_tool_calls) {
1473+
builder.add_content(builder.consume_rest());
14671474
return;
14681475
}
1476+
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
1477+
static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
14691478

14701479
static const common_regex function_regex(R"(<function=(\w+)>)");
14711480
static const common_regex close_regex(R"(</function>)");
@@ -1477,6 +1486,12 @@ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser
14771486
function_regex,
14781487
close_regex,
14791488
std::nullopt);
1489+
1490+
if (auto res = builder.try_find_regex(python_tag_regex)) {
1491+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
1492+
builder.add_tool_call("python", "", arguments);
1493+
return;
1494+
}
14801495
}
14811496

14821497
static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -1595,6 +1610,10 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
15951610
}
15961611
static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
15971612
builder.try_parse_reasoning("<think>", "</think>");
1613+
if (!builder.syntax().parse_tool_calls) {
1614+
builder.add_content(builder.consume_rest());
1615+
return;
1616+
}
15981617

15991618
static const common_regex open_regex(
16001619
"(?:"
@@ -1616,8 +1635,6 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
16161635
);
16171636

16181637
if (auto res = builder.try_find_regex(open_regex)) {
1619-
builder.add_content(res->prelude);
1620-
16211638
const auto & block_start = res->groups[1];
16221639
std::string block_end = block_start.empty() ? "" : "```";
16231640

@@ -1853,10 +1870,10 @@ static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
18531870
builder.add_content(builder.consume_rest());
18541871
}
18551872

1856-
static void common_chat_parse(common_chat_msg_parser & builder, common_chat_format format) {
1857-
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(format), builder.input().c_str());
1873+
static void common_chat_parse(common_chat_msg_parser & builder) {
1874+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
18581875

1859-
switch (format) {
1876+
switch (builder.syntax().format) {
18601877
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
18611878
common_chat_parse_content_only(builder);
18621879
break;
@@ -1891,15 +1908,15 @@ static void common_chat_parse(common_chat_msg_parser & builder, common_chat_form
18911908
common_chat_parse_command_r7b(builder);
18921909
break;
18931910
default:
1894-
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(format));
1911+
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
18951912
}
18961913
builder.finish();
18971914
}
18981915

18991916
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
19001917
common_chat_msg_parser builder(input, is_partial, syntax);
19011918
try {
1902-
common_chat_parse(builder, syntax.format);
1919+
common_chat_parse(builder);
19031920
} catch (const common_chat_msg_partial_exception & ex) {
19041921
LOG_DBG("Partial parse: %s\n", ex.what());
19051922
if (!is_partial) {

common/chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ struct common_chat_syntax {
144144
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
145145
bool reasoning_in_content = false;
146146
bool thinking_forced_open = false;
147+
bool parse_tool_calls = true;
147148
};
148149

149150
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid

0 commit comments

Comments
 (0)