Skip to content

Commit fdd859a

Browse files
committed
Server:ChatON: Add alternate chat using experimental ChatON logic
NOTE: This is not fully complete, I think the current logic sends the chat template string from the model in the tmpl field, while chaton requires a template id. I need to add cmdline chaton-template-id and inturn load meta json file.
1 parent 929ea5c commit fdd859a

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

examples/server/utils.hpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "common.h"
55

66
#include "json.hpp"
7+
#include "chaton.hpp"
78

89
#include <string>
910
#include <vector>
@@ -14,6 +15,8 @@
1415

1516
using json = nlohmann::ordered_json;
1617

18+
const bool bOldChatTemplate = true;
19+
1720
// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
1821
enum error_type {
1922
ERROR_TYPE_INVALID_REQUEST,
@@ -117,7 +120,14 @@ static inline void server_log(const char *level, const char *function, int line,
117120
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
118121
inline bool verify_custom_template(const std::string & tmpl) {
119122
llama_chat_message chat[] = {{"user", "test"}};
120-
int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);
123+
int res = -1;
124+
if (bOldChatTemplate) {
125+
res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);
126+
} else {
127+
std::vector<char> test;
128+
test.resize(64);
129+
res = chaton_tmpl_apply_capi(tmpl.c_str(), chat, 1, true, test.data(), test.size());
130+
}
121131
return res >= 0;
122132
}
123133

@@ -141,12 +151,21 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
141151
std::vector<char> buf(alloc_size * 2);
142152

143153
// run the first time to get the total output length
144-
int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
154+
int32_t res = -1;
155+
if (bOldChatTemplate) {
156+
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
157+
} else {
158+
res = chaton_tmpl_apply_capi(ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
159+
}
145160

146161
// if it turns out that our buffer is too small, we resize it
147162
if ((size_t) res > buf.size()) {
148163
buf.resize(res);
149-
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
164+
if (bOldChatTemplate) {
165+
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
166+
} else {
167+
res = chaton_tmpl_apply_capi(ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
168+
}
150169
}
151170

152171
const std::string formatted_chat(buf.data(), res);

0 commit comments

Comments
 (0)