Server:ChatON: Add alternate chat using experimental ChatON logic

hanishkvc · hanishkvc · commit fdd859a6ff8e · 2024-04-26T09:56:25.000+05:30
NOTE: This is not fully complete, I think the current logic sends
the chat template string from the model in the tmpl field, while
chaton requires a template id.

I need to add cmdline chaton-template-id and inturn load meta json
file.
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
@@ -4,6 +4,7 @@
 #include "common.h"
 
 #include "json.hpp"
+#include "chaton.hpp"
 
 #include <string>
 #include <vector>
@@ -14,6 +15,8 @@
 
 using json = nlohmann::ordered_json;
 
+const bool bOldChatTemplate = true;
+
 // https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
 enum error_type {
     ERROR_TYPE_INVALID_REQUEST,
@@ -117,7 +120,14 @@ static inline void server_log(const char *level, const char *function, int line,
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
 inline bool verify_custom_template(const std::string & tmpl) {
     llama_chat_message chat[] = {{"user", "test"}};
-    int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);
+    int res = -1;
+    if (bOldChatTemplate) {
+        res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);
+    } else {
+        std::vector<char> test;
+        test.resize(64);
+        res = chaton_tmpl_apply_capi(tmpl.c_str(), chat, 1, true, test.data(), test.size());
+    }
     return res >= 0;
 }
 
@@ -141,12 +151,21 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
     std::vector<char> buf(alloc_size * 2);
 
     // run the first time to get the total output length
-    int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
+    int32_t res = -1;
+    if (bOldChatTemplate) {
+        res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
+    } else {
+        res = chaton_tmpl_apply_capi(ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
+    }
 
     // if it turns out that our buffer is too small, we resize it
     if ((size_t) res > buf.size()) {
         buf.resize(res);
-        res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
+        if (bOldChatTemplate) {
+            res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
+        } else {
+            res = chaton_tmpl_apply_capi(ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
+        }
     }
 
     const std::string formatted_chat(buf.data(), res);