Skip to content

Commit c7a3665

Browse files
committed
Merge branch 'remoteManagement' into crokeso
2 parents 47dc9a6 + c87e81f commit c7a3665

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+3901
-1347
lines changed

common/arg.cpp

Lines changed: 127 additions & 98 deletions
Large diffs are not rendered by default.

common/chat-parser.cpp

Lines changed: 379 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,379 @@
1+
#include "chat-parser.h"
2+
#include "common.h"
3+
#include "log.h"
4+
#include "regex-partial.h"
5+
6+
#include <optional>
7+
#include <stdexcept>
8+
#include <string>
9+
#include <vector>
10+
11+
using json = nlohmann::ordered_json;
12+
13+
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
14+
: input_(input), is_partial_(is_partial), syntax_(syntax)
15+
{
16+
result_.role = "assistant";
17+
18+
while (true) {
19+
std::string id = std::to_string(std::rand());
20+
if (input.find(id) == std::string::npos) {
21+
healing_marker_ = id;
22+
break;
23+
}
24+
}
25+
}
26+
27+
std::string common_chat_msg_parser::str(const common_string_range & rng) const {
28+
GGML_ASSERT(rng.begin <= rng.end);
29+
return input_.substr(rng.begin, rng.end - rng.begin);
30+
}
31+
32+
void common_chat_msg_parser::add_content(const std::string &content) {
33+
result_.content += content;
34+
}
35+
36+
void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
37+
result_.reasoning_content += reasoning_content;
38+
}
39+
40+
bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
41+
if (name.empty()) {
42+
return false;
43+
}
44+
45+
common_chat_tool_call tool_call;
46+
tool_call.name = name;
47+
tool_call.arguments = arguments;
48+
tool_call.id = id;
49+
50+
// LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
51+
result_.tool_calls.emplace_back(tool_call);
52+
return true;
53+
}
54+
bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
55+
std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
56+
std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
57+
std::string arguments = tool_call.contains("arguments") ? tool_call.at("arguments") : "";
58+
return add_tool_call(name, id, arguments);
59+
}
60+
61+
bool common_chat_msg_parser::add_tool_calls(const json & arr) {
62+
for (const auto & item : arr) {
63+
if (!add_tool_call(item)) {
64+
return false;
65+
}
66+
}
67+
return true;
68+
}
69+
void common_chat_msg_parser::finish() {
70+
if (!is_partial_ && pos_ != input_.size()) {
71+
throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
72+
}
73+
}
74+
75+
bool common_chat_msg_parser::consume_spaces() {
76+
const auto length = input_.size();
77+
auto consumed = false;
78+
while (pos_ < length && std::isspace(input_[pos_])) {
79+
++pos_;
80+
consumed = true;
81+
}
82+
return consumed;
83+
}
84+
85+
bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
86+
auto pos = pos_;
87+
for (auto i = 0u; i < literal.size(); ++i) {
88+
if (pos >= input_.size()) {
89+
return false;
90+
}
91+
if (input_[pos] != literal[i]) {
92+
return false;
93+
}
94+
++pos;
95+
}
96+
pos_ = pos;
97+
return true;
98+
}
99+
100+
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_literal(const std::string & literal) {
101+
auto idx = input_.find(literal, pos_);
102+
if (idx != std::string::npos) {
103+
find_regex_result res;
104+
res.prelude = input_.substr(pos_, idx - pos_);
105+
auto end = idx + literal.size();
106+
res.groups.emplace_back(common_string_range{idx, end});
107+
move_to(end);
108+
return res;
109+
}
110+
if (is_partial_) {
111+
idx = string_find_partial_stop(input_, literal);
112+
if (idx != std::string::npos && idx >= pos_) {
113+
find_regex_result res;
114+
res.prelude = input_.substr(pos_, idx - pos_);
115+
auto end = input_.size();
116+
res.groups.emplace_back(common_string_range{idx, end});
117+
move_to(end);
118+
return res;
119+
}
120+
}
121+
return std::nullopt;
122+
}
123+
124+
void common_chat_msg_parser::consume_literal(const std::string & literal) {
125+
if (!try_consume_literal(literal)) {
126+
throw common_chat_msg_partial_exception(literal);
127+
}
128+
}
129+
130+
bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
131+
auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
132+
auto stripped_reasoning = string_strip(reasoning);
133+
if (stripped_reasoning.empty()) {
134+
return;
135+
}
136+
if (syntax_.reasoning_in_content) {
137+
add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
138+
add_content(stripped_reasoning);
139+
if (closed) {
140+
add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
141+
}
142+
} else {
143+
add_reasoning_content(stripped_reasoning);
144+
}
145+
};
146+
if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) {
147+
if (syntax_.thinking_forced_open || try_consume_literal(start_think)) {
148+
if (auto res = try_find_literal(end_think)) {
149+
handle_reasoning(res->prelude, /* closed */ true);
150+
consume_spaces();
151+
return true;
152+
}
153+
auto rest = consume_rest();
154+
if (!rest.empty()) {
155+
handle_reasoning(rest, /* closed */ !is_partial());
156+
}
157+
if (!syntax_.thinking_forced_open) {
158+
throw common_chat_msg_partial_exception(end_think);
159+
}
160+
return true;
161+
}
162+
}
163+
return false;
164+
}
165+
166+
std::string common_chat_msg_parser::consume_rest() {
167+
auto rest = input_.substr(pos_);
168+
pos_ = input_.size();
169+
return rest;
170+
}
171+
172+
// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
173+
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
174+
auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
175+
if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
176+
return std::nullopt;
177+
}
178+
auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
179+
pos_ = m.groups[0].end;
180+
181+
if (add_prelude_to_content) {
182+
add_content(prelude);
183+
}
184+
if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
185+
if (is_partial()) {
186+
throw common_chat_msg_partial_exception(regex.str());
187+
}
188+
return std::nullopt;
189+
}
190+
return find_regex_result{prelude, m.groups};
191+
}
192+
193+
common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
194+
if (auto result = try_consume_regex(regex)) {
195+
return *result;
196+
}
197+
throw common_chat_msg_partial_exception(regex.str());
198+
}
199+
200+
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
201+
auto m = regex.search(input_, pos_);
202+
if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
203+
return std::nullopt;
204+
}
205+
if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
206+
if (is_partial()) {
207+
throw common_chat_msg_partial_exception(regex.str());
208+
}
209+
return std::nullopt;
210+
}
211+
if (m.groups[0].begin != pos_) {
212+
// Didn't match at the current position.
213+
return std::nullopt;
214+
}
215+
pos_ = m.groups[0].end;
216+
217+
return find_regex_result {
218+
/* .prelude = */ "",
219+
m.groups,
220+
};
221+
}
222+
223+
std::optional<common_json> common_chat_msg_parser::try_consume_json() {
224+
auto it = input_.cbegin() + pos_;
225+
const auto end = input_.cend();
226+
common_json result;
227+
if (!common_json_parse(it, end, healing_marker_, result)) {
228+
return std::nullopt;
229+
}
230+
pos_ = std::distance(input_.cbegin(), it);
231+
if (result.healing_marker.marker.empty()) {
232+
// No healing marker, just return the parsed json
233+
return result;
234+
}
235+
if (!is_partial()) {
236+
throw common_chat_msg_partial_exception("JSON");
237+
}
238+
return result;
239+
}
240+
241+
common_json common_chat_msg_parser::consume_json() {
242+
if (auto result = try_consume_json()) {
243+
return *result;
244+
}
245+
throw common_chat_msg_partial_exception("JSON");
246+
}
247+
248+
common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
249+
const std::vector<std::vector<std::string>> & args_paths,
250+
const std::vector<std::vector<std::string>> & content_paths
251+
) {
252+
if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
253+
return *result;
254+
}
255+
throw common_chat_msg_partial_exception("JSON");
256+
}
257+
258+
std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
259+
const std::vector<std::vector<std::string>> & args_paths,
260+
const std::vector<std::vector<std::string>> & content_paths
261+
) {
262+
auto partial = try_consume_json();
263+
if (!partial) {
264+
return std::nullopt;
265+
}
266+
auto is_arguments_path = [&](const std::vector<std::string> & path) {
267+
return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
268+
};
269+
auto is_content_path = [&](const std::vector<std::string> & path) {
270+
return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
271+
};
272+
273+
if (partial->healing_marker.marker.empty()) {
274+
if (args_paths.empty()) {
275+
// No arguments to dump, and JSON was parsed fully.
276+
return consume_json_result {
277+
partial->json,
278+
/* .is_partial = */ false,
279+
};
280+
}
281+
if (is_arguments_path({})) {
282+
// Entire JSON is the arguments and was parsed fully.
283+
return consume_json_result {
284+
partial->json.dump(),
285+
/* .is_partial = */ false,
286+
};
287+
}
288+
}
289+
290+
LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
291+
292+
auto found_healing_marker = false;
293+
std::vector<std::string> path;
294+
std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
295+
if (is_arguments_path(path)) {
296+
auto arguments = j.dump();
297+
if (is_partial() && !partial->healing_marker.marker.empty()) {
298+
auto idx = arguments.find(partial->healing_marker.json_dump_marker);
299+
if (idx != std::string::npos) {
300+
arguments.resize(idx);
301+
found_healing_marker = true;
302+
}
303+
if (arguments == "\"") {
304+
// This happens because of completing `:"$magic` after `"arguments"`
305+
arguments = "";
306+
}
307+
}
308+
return arguments;
309+
}
310+
if (is_content_path(path)) {
311+
if (!j.is_string()) {
312+
throw std::runtime_error("Content path must be a string");
313+
}
314+
std::string str = j;
315+
auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
316+
if (idx != std::string::npos) {
317+
str.resize(idx);
318+
found_healing_marker = true;
319+
}
320+
return str;
321+
}
322+
if (j.is_object()) {
323+
auto obj = json::object();
324+
for (const auto & p : j.items()) {
325+
const auto & key = p.key();
326+
const auto & value = p.value();
327+
const std::string key_str = key; // NOLINT
328+
auto idx = key_str.find(healing_marker_);
329+
if (idx != std::string::npos) {
330+
found_healing_marker = true;
331+
break;
332+
}
333+
path.push_back(key_str);
334+
if (value.is_string()) {
335+
const std::string value_str = value;
336+
if (value_str.find(healing_marker_) != std::string::npos) {
337+
found_healing_marker = true;
338+
if (is_content_path(path)) {
339+
if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
340+
// The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
341+
obj[key] = remove_unsupported_healings_and_dump_args(value);
342+
}
343+
}
344+
break;
345+
}
346+
obj[key] = value;
347+
} else {
348+
obj[key] = remove_unsupported_healings_and_dump_args(value);
349+
}
350+
path.pop_back();
351+
}
352+
return obj;
353+
}
354+
if (j.is_array()) {
355+
auto arr = json::array();
356+
for (const auto & value : j) {
357+
if (value.is_string()) {
358+
std::string str = value;
359+
auto idx = str.find(healing_marker_);
360+
if (idx != std::string::npos) {
361+
// Don't heal array values that aren't in the arguments.
362+
found_healing_marker = true;
363+
break;
364+
}
365+
}
366+
arr.push_back(remove_unsupported_healings_and_dump_args(value));
367+
}
368+
return arr;
369+
}
370+
return j;
371+
};
372+
373+
auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
374+
LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
375+
return consume_json_result {
376+
cleaned,
377+
/* .is_partial = */ found_healing_marker,
378+
};
379+
}

0 commit comments

Comments
 (0)