4
4
#include " common.h"
5
5
6
6
#include " json.hpp"
7
+ #include " chaton.hpp"
7
8
8
9
#include < string>
9
10
#include < vector>
14
15
15
16
using json = nlohmann::ordered_json;
16
17
18
+ const bool bOldChatTemplate = true ;
19
+
17
20
// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
18
21
enum error_type {
19
22
ERROR_TYPE_INVALID_REQUEST,
@@ -117,7 +120,14 @@ static inline void server_log(const char *level, const char *function, int line,
117
120
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
118
121
inline bool verify_custom_template (const std::string & tmpl) {
119
122
llama_chat_message chat[] = {{" user" , " test" }};
120
- int res = llama_chat_apply_template (nullptr , tmpl.c_str (), chat, 1 , true , nullptr , 0 );
123
+ int res = -1 ;
124
+ if (bOldChatTemplate) {
125
+ res = llama_chat_apply_template (nullptr , tmpl.c_str (), chat, 1 , true , nullptr , 0 );
126
+ } else {
127
+ std::vector<char > test;
128
+ test.resize (64 );
129
+ res = chaton_tmpl_apply_capi (tmpl.c_str (), chat, 1 , true , test.data (), test.size ());
130
+ }
121
131
return res >= 0 ;
122
132
}
123
133
@@ -141,12 +151,21 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
141
151
std::vector<char > buf (alloc_size * 2 );
142
152
143
153
// run the first time to get the total output length
144
- int32_t res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), true , buf.data (), buf.size ());
154
+ int32_t res = -1 ;
155
+ if (bOldChatTemplate) {
156
+ res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), true , buf.data (), buf.size ());
157
+ } else {
158
+ res = chaton_tmpl_apply_capi (ptr_tmpl, chat.data (), chat.size (), true , buf.data (), buf.size ());
159
+ }
145
160
146
161
// if it turns out that our buffer is too small, we resize it
147
162
if ((size_t ) res > buf.size ()) {
148
163
buf.resize (res);
149
- res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), true , buf.data (), buf.size ());
164
+ if (bOldChatTemplate) {
165
+ res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), true , buf.data (), buf.size ());
166
+ } else {
167
+ res = chaton_tmpl_apply_capi (ptr_tmpl, chat.data (), chat.size (), true , buf.data (), buf.size ());
168
+ }
150
169
}
151
170
152
171
const std::string formatted_chat (buf.data (), res);
0 commit comments