From db6f775c93848a56ccee8d53b0eaaaa76e53e409 Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Sat, 20 Apr 2024 11:44:15 +0530
Subject: [PATCH 1/7] Common:ChatOn: Add arguments for chaton

user needs to pass --chaton TEMPLATE_ID

TEMPLATE_ID will be one of the predefined chat templates already
in llama.cpp's llama_chat_apply_template_internal and related
like chatml, llama2, llama3, ...
---
 common/common.cpp | 11 +++++++++++
 common/common.h   |  2 ++
 2 files changed, 13 insertions(+)
diff --git a/common/common.cpp b/common/common.cpp
index cf69535e2d1f5..b704d6f1f986a 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -868,6 +868,15 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         params.chatml = true;
         return true;
     }
+    if (arg == "--chaton") {
+        params.chaton = true;
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.chaton_template_id = argv[i];
+        return true;
+    }
     if (arg == "--infill") {
         params.infill = true;
         return true;
@@ -1378,6 +1387,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("  --version             show version and build info\n");
     printf("  -i, --interactive     run in interactive mode\n");
     printf("  --interactive-first   run in interactive mode and wait for input right away\n");
+    printf("  --chaton TEMPLATE_ID  allow the interactive mode to apply the specified chat template before sending user input to model (you need to specify -i also)\n");
+    printf("                        TEMPLATE_ID could be chatml, llama3, ...\n");
     printf("  -ins, --instruct      run in instruction mode (use with Alpaca models)\n");
     printf("  -cml, --chatml        run in chatml mode (use with ChatML-compatible models)\n");
     printf("  --multiline-input     allows you to write or paste multiple lines without ending each in '\\'\n");
diff --git a/common/common.h b/common/common.h
index cca44268e6df5..931317c832153 100644
--- a/common/common.h
+++ b/common/common.h
@@ -139,6 +139,8 @@ struct gpt_params {
     bool use_color         = false; // use color to distinguish generations and inputs
     bool interactive       = false; // interactive mode
     bool chatml            = false; // chatml mode (used for models trained on chatml syntax)
+    bool chaton            = false; // chaton mode (used to chat with models which have been trained for chat and or instruct operation)
+    std::string chaton_template_id = "";   // the internal chat template to use
     bool prompt_cache_all  = false; // save user input and generations to prompt cache
     bool prompt_cache_ro   = false; // open the prompt cache read-only and do not update it
 

From efbcdc1cafa315782b4bb5330fa3672b319b4ad3 Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Sat, 20 Apr 2024 11:58:15 +0530
Subject: [PATCH 2/7] Common:ChatOn: ReversePrompts, SingleMsgChatTemplate
 wrapper

Helper to return reverse prompts needed for a given chat template

A wrapper that will allow wrapping a given message within a tagged
chat template based on the role and chat template specified.
---
 common/CMakeLists.txt |  1 +
 common/chaton.hpp     | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 common/chaton.hpp

diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 0ec8d6d8d03b5..fe865abab708d 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -65,6 +65,7 @@ add_library(${TARGET} STATIC
     train.cpp
     ngram-cache.h
     ngram-cache.cpp
+    chaton.hpp
     )
 
 if (BUILD_SHARED_LIBS)
diff --git a/common/chaton.hpp b/common/chaton.hpp
new file mode 100644
index 0000000000000..62e82d658e7e7
--- /dev/null
+++ b/common/chaton.hpp
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <vector>
+#include <string>
+
+#include "llama.h"
+#include "log.h"
+
+inline std::string llama_chat_apply_template_simple(
+            const std::string & tmpl,
+            const std::string &role,
+            const std::string &content,
+            bool add_ass) {
+    llama_chat_message msg = { role.c_str(), content.c_str() };
+    std::vector<llama_chat_message> msgs{ msg };
+    std::vector<char> buf(content.size() * 2);
+
+    int32_t slen =  llama_chat_apply_template(nullptr, tmpl.c_str(), msgs.data(), msgs.size(), add_ass, buf.data(), buf.size());
+    if ((size_t) slen > buf.size()) {
+        buf.resize(slen);
+        slen = llama_chat_apply_template(nullptr, tmpl.c_str(), msgs.data(), msgs.size(), add_ass, buf.data(), buf.size());
+    }
+
+    const std::string tagged_msg(buf.data(), slen);
+    LOGLN("INFO:%s:%s", __func__, tagged_msg.c_str());
+    return tagged_msg;
+}
+
+// return what should be the reverse prompt for the given template id
+// ie possible end text tag(s) of specified model type's chat query response
+std::vector<std::string> llama_chat_reverse_prompt(std::string &template_id) {
+    std::vector<std::string> rends;
+
+    if (template_id == "chatml") {
+        rends.push_back("<|im_start|>user\n");
+    } else if (template_id == "llama3") {
+        rends.push_back("<|eot_id|>");
+    }
+    return rends;
+}

From 0a8797b28eacb3916f0b9be137d62c9725b66918 Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Sat, 20 Apr 2024 18:40:55 +0530
Subject: [PATCH 3/7] Main:Update to support chaton mode

Glanced through existing interactive and chatml flow, to incorporate
this flow. Need to look deeper later.

NOTE: Till this point is reapplying of my initial go at chaton, by
simplifying the amount of change done to existing code, a bitmore.
---
 examples/main/main.cpp | 52 +++++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 249fc2bb605b3..a073a7bfdc3ad 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -1,4 +1,5 @@
 #include "common.h"
+#include "chaton.hpp"
 
 #include "console.h"
 #include "llama.h"
@@ -251,11 +252,14 @@ int main(int argc, char ** argv) {
 
     std::vector<llama_token> embd_inp;
 
-    if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
-        LOG("tokenize the prompt\n");
+    if (params.interactive_first || params.instruct || params.chatml || params.chaton || !params.prompt.empty() || session_tokens.empty()) {
+        LOG("tokenize the prompt: %s\n", params.prompt.c_str());
         if (params.chatml) {
             params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>";
         }
+        if (params.chaton) {
+            params.prompt = llama_chat_apply_template_simple(params.chaton_template_id, "system", params.prompt, false);
+        }
         embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
     } else {
         LOG("use session tokens\n");
@@ -333,7 +337,7 @@ int main(int argc, char ** argv) {
     }
 
     // number of tokens to keep when resetting context
-    if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct || params.chatml) {
+    if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct || params.chatml || params.chaton) {
         params.n_keep = (int)embd_inp.size();
     } else {
         params.n_keep += add_bos; // always keep the BOS token
@@ -363,6 +367,19 @@ int main(int argc, char ** argv) {
         params.interactive_first = true;
         params.antiprompt.emplace_back("<|im_start|>user\n");
     }
+    // handle chaton mode, it adds on to any reverse prompt specified explicitly by the user
+    if (params.chaton) {
+        params.interactive_first = true;
+        std::vector<std::string> resp_ends = llama_chat_reverse_prompt(params.chaton_template_id);
+        if (resp_ends.size() == 0) {
+            LOG_TEELN("ERRR:%s:ChatOn:Unsupported ChatType:%s", __func__, params.chaton_template_id.c_str());
+            exit(1);
+        }
+        for (size_t i = 0; i < resp_ends.size(); i++)
+        {
+            params.antiprompt.emplace_back(resp_ends[i]);
+        }
+    }
 
     // enable interactive mode if interactive start is specified
     if (params.interactive_first) {
@@ -817,7 +834,7 @@ int main(int argc, char ** argv) {
             if (n_past > 0 && is_interacting) {
                 LOG("waiting for user input\n");
 
-                if (params.instruct || params.chatml) {
+                if (params.instruct || params.chatml || params.chaton) {
                     printf("\n> ");
                 }
 
@@ -876,15 +893,23 @@ int main(int argc, char ** argv) {
                         process_escapes(buffer);
                     }
 
-                    const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
-                    const auto line_inp = ::llama_tokenize(ctx, buffer,              false, false);
-                    const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
-
-                    LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
-
-                    embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end());
-                    embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
-                    embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
+                    std::vector<int> line_inp;
+                    if (params.chaton) {
+                        std::string f_chat = llama_chat_apply_template_simple(params.chaton_template_id, "user", buffer.c_str(), true);
+                        line_inp = ::llama_tokenize(ctx, f_chat, false, true);
+                        LOG("formatted input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
+                        embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
+                    } else {
+                        const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
+                        line_inp = ::llama_tokenize(ctx, buffer,              false, false);
+                        const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
+
+                        LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
+
+                        embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end());
+                        embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
+                        embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
+                    }
 
                     // instruct mode: insert response suffix
                     if (params.instruct) {
@@ -921,6 +946,7 @@ int main(int argc, char ** argv) {
         }
 
         // end of text token
+        // chaton expected to be used along with interactive argument, so not checking for chaton seperately
         if (!embd.empty() && embd.back() == llama_token_eos(model) && !(params.instruct || params.interactive || params.chatml)) {
             LOG_TEE(" [end of text]\n");
             break;

From aac2ee6e9dbc0c14e99fbf7e3b86f88f5ecc561f Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Sat, 20 Apr 2024 20:08:00 +0530
Subject: [PATCH 4/7] Common:ChatOn+Main:DBUG: Cleanup ChatTmplSimp, RevPrompt
 Llama2

This is a commit with dbug messages.

ChatApplyTemplateSimple

* wasnt handling unknown template ids properly, this is identified
now and a warning logged, rather than trying to work with len of -1.
Need to change to quit later.

* Also avoid wrapping in a vector, as only a single message can
be tagged wrt chat handshake template.

ReversePrompt

Add support for llama2
---
 common/chaton.hpp      | 18 +++++++++++++-----
 examples/main/main.cpp |  4 +++-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/common/chaton.hpp b/common/chaton.hpp
index 62e82d658e7e7..da2834668554a 100644
--- a/common/chaton.hpp
+++ b/common/chaton.hpp
@@ -7,18 +7,24 @@
 #include "log.h"
 
 inline std::string llama_chat_apply_template_simple(
-            const std::string & tmpl,
+            const std::string &tmpl,
             const std::string &role,
             const std::string &content,
             bool add_ass) {
     llama_chat_message msg = { role.c_str(), content.c_str() };
-    std::vector<llama_chat_message> msgs{ msg };
+    //std::vector<llama_chat_message> msgs{ msg };
     std::vector<char> buf(content.size() * 2);
 
-    int32_t slen =  llama_chat_apply_template(nullptr, tmpl.c_str(), msgs.data(), msgs.size(), add_ass, buf.data(), buf.size());
+    int32_t slen =  llama_chat_apply_template(nullptr, tmpl.c_str(), &msg, 1, add_ass, buf.data(), buf.size());
+    LOG_TEELN("DBUG:%s:AA:%s:LengthNeeded:%d:BufSizeWas:%zu", __func__, role.c_str(), slen, buf.size());
+    if (slen == -1) {
+        LOG_TEELN("WARN:%s:Unknown template [%s] encounted", __func__, tmpl.c_str());
+        return "";
+    }
     if ((size_t) slen > buf.size()) {
         buf.resize(slen);
-        slen = llama_chat_apply_template(nullptr, tmpl.c_str(), msgs.data(), msgs.size(), add_ass, buf.data(), buf.size());
+        slen = llama_chat_apply_template(nullptr, tmpl.c_str(), &msg, 1, add_ass, buf.data(), buf.size());
+        LOG_TEELN("DBUG:%s:BB:%s:LengthNeeded:%d:BufSizeWas:%zu", __func__, role.c_str(), slen, buf.size());
     }
 
     const std::string tagged_msg(buf.data(), slen);
@@ -28,11 +34,13 @@ inline std::string llama_chat_apply_template_simple(
 
 // return what should be the reverse prompt for the given template id
 // ie possible end text tag(s) of specified model type's chat query response
-std::vector<std::string> llama_chat_reverse_prompt(std::string &template_id) {
+inline std::vector<std::string> llama_chat_reverse_prompt(std::string &template_id) {
     std::vector<std::string> rends;
 
     if (template_id == "chatml") {
         rends.push_back("<|im_start|>user\n");
+    } else if (template_id == "llama2") {
+        rends.push_back("</s>");
     } else if (template_id == "llama3") {
         rends.push_back("<|eot_id|>");
     }
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index a073a7bfdc3ad..e04b26d36bf25 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -258,7 +258,9 @@ int main(int argc, char ** argv) {
             params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>";
         }
         if (params.chaton) {
+            LOG_TEELN("DBUG:%s:AA:%s", __func__, params.prompt.c_str());
             params.prompt = llama_chat_apply_template_simple(params.chaton_template_id, "system", params.prompt, false);
+            LOG_TEELN("DBUG:%s:BB:%s", __func__, params.prompt.c_str());
         }
         embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
     } else {
@@ -372,7 +374,7 @@ int main(int argc, char ** argv) {
         params.interactive_first = true;
         std::vector<std::string> resp_ends = llama_chat_reverse_prompt(params.chaton_template_id);
         if (resp_ends.size() == 0) {
-            LOG_TEELN("ERRR:%s:ChatOn:Unsupported ChatType:%s", __func__, params.chaton_template_id.c_str());
+            LOG_TEELN("ERRR:%s:ChatOn:Unsupported ChatTemplateType:%s", __func__, params.chaton_template_id.c_str());
             exit(1);
         }
         for (size_t i = 0; i < resp_ends.size(); i++)

From ca55da2b6fa676f9cd3c286e98063b9c5a2d9c41 Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Sat, 20 Apr 2024 22:41:27 +0530
Subject: [PATCH 5/7] ChatOn+Main: ChatApplyTemplateSimple cleanup

Cleanup the associated log messages.

Dont overload the return for status as well as data. Now the data
returned if any is kept independent of the status of the operation.

On failure log a message and exit.
---
 common/chaton.hpp      | 21 ++++++++++++---------
 examples/main/main.cpp | 13 +++++++++----
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/common/chaton.hpp b/common/chaton.hpp
index da2834668554a..91b3d480253d2 100644
--- a/common/chaton.hpp
+++ b/common/chaton.hpp
@@ -6,30 +6,33 @@
 #include "llama.h"
 #include "log.h"
 
-inline std::string llama_chat_apply_template_simple(
+// Tag the passed message suitabley as expected by the specified chat handshake template
+// and the role. If the specified template is not supported logic will return false.
+inline bool llama_chat_apply_template_simple(
             const std::string &tmpl,
             const std::string &role,
             const std::string &content,
+            std::string &dst,
             bool add_ass) {
     llama_chat_message msg = { role.c_str(), content.c_str() };
-    //std::vector<llama_chat_message> msgs{ msg };
-    std::vector<char> buf(content.size() * 2);
+    std::vector<char> buf(content.size() * 2); // This may under allot for small messages and over allot for large messages
 
     int32_t slen =  llama_chat_apply_template(nullptr, tmpl.c_str(), &msg, 1, add_ass, buf.data(), buf.size());
-    LOG_TEELN("DBUG:%s:AA:%s:LengthNeeded:%d:BufSizeWas:%zu", __func__, role.c_str(), slen, buf.size());
     if (slen == -1) {
-        LOG_TEELN("WARN:%s:Unknown template [%s] encounted", __func__, tmpl.c_str());
-        return "";
+        LOG_TEELN("WARN:%s:Unknown template [%s] requested", __func__, tmpl.c_str());
+        dst = "";
+        return false;
     }
     if ((size_t) slen > buf.size()) {
+        LOGLN("INFO:%s:%s:LengthNeeded:%d:BufSizeWas:%zu", __func__, role.c_str(), slen, buf.size());
         buf.resize(slen);
         slen = llama_chat_apply_template(nullptr, tmpl.c_str(), &msg, 1, add_ass, buf.data(), buf.size());
-        LOG_TEELN("DBUG:%s:BB:%s:LengthNeeded:%d:BufSizeWas:%zu", __func__, role.c_str(), slen, buf.size());
     }
 
     const std::string tagged_msg(buf.data(), slen);
-    LOGLN("INFO:%s:%s", __func__, tagged_msg.c_str());
-    return tagged_msg;
+    LOGLN("INFO:%s:%s:%s", __func__, role.c_str(), tagged_msg.c_str());
+    dst = tagged_msg;
+    return true;
 }
 
 // return what should be the reverse prompt for the given template id
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index e04b26d36bf25..dfd16670e0313 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -258,9 +258,10 @@ int main(int argc, char ** argv) {
             params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>";
         }
         if (params.chaton) {
-            LOG_TEELN("DBUG:%s:AA:%s", __func__, params.prompt.c_str());
-            params.prompt = llama_chat_apply_template_simple(params.chaton_template_id, "system", params.prompt, false);
-            LOG_TEELN("DBUG:%s:BB:%s", __func__, params.prompt.c_str());
+            if (!llama_chat_apply_template_simple(params.chaton_template_id, "system", params.prompt, params.prompt, false)) {
+                LOG_TEELN("ERRR:%s:Wrt:%s:%s:%s", __func__, params.chaton_template_id.c_str(), "system", params.prompt.c_str());
+                exit(2);
+            }
         }
         embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
     } else {
@@ -897,7 +898,11 @@ int main(int argc, char ** argv) {
 
                     std::vector<int> line_inp;
                     if (params.chaton) {
-                        std::string f_chat = llama_chat_apply_template_simple(params.chaton_template_id, "user", buffer.c_str(), true);
+                        std::string f_chat;
+                        if (!llama_chat_apply_template_simple(params.chaton_template_id, "user", buffer.c_str(), f_chat, true)) {
+                            LOG_TEELN("ERRR:%s:Wrt:%s:%s:%s", __func__, params.chaton_template_id.c_str(), "user", params.prompt.c_str());
+                            exit(2);
+                        }
                         line_inp = ::llama_tokenize(ctx, f_chat, false, true);
                         LOG("formatted input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
                         embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());

From e23b5c8689d5aea56e1f6ecad2eaee66e3aa8081 Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Sat, 20 Apr 2024 23:26:16 +0530
Subject: [PATCH 6/7] ChatOn+Main: Cleanup the Requested ChatOn ReversePrompt
 handling

Avoid the use of the seperate vector, which inturn is copied to
the main vector on return. Now directly pass the main reverse
prompt vector and inturn directly add to passed vector.

Also keep data and return status seperate. Explicitly identify
a unknown template_id situation and return failure status.
---
 common/chaton.hpp      | 18 ++++++++++--------
 examples/main/main.cpp |  7 +------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/common/chaton.hpp b/common/chaton.hpp
index 91b3d480253d2..b33027669e018 100644
--- a/common/chaton.hpp
+++ b/common/chaton.hpp
@@ -36,16 +36,18 @@ inline bool llama_chat_apply_template_simple(
 }
 
 // return what should be the reverse prompt for the given template id
-// ie possible end text tag(s) of specified model type's chat query response
-inline std::vector<std::string> llama_chat_reverse_prompt(std::string &template_id) {
-    std::vector<std::string> rends;
-
+// ie possible end text tag(s) of specified model type's chat query response.
+// Note that It adds these reverse prompts to any that may already exist in the passed vector.
+inline bool llama_chat_reverse_prompt(std::string &template_id, std::vector<std::string> &rprompts) {
     if (template_id == "chatml") {
-        rends.push_back("<|im_start|>user\n");
+        rprompts.push_back("<|im_start|>user\n");
     } else if (template_id == "llama2") {
-        rends.push_back("</s>");
+        rprompts.push_back("</s>");
     } else if (template_id == "llama3") {
-        rends.push_back("<|eot_id|>");
+        rprompts.push_back("<|eot_id|>");
+    } else {
+        LOG_TEELN("WARN:%s:Unknown template [%s] requested", __func__, template_id.c_str());
+        return false;
     }
-    return rends;
+    return true;
 }
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index dfd16670e0313..32bcee9c43199 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -373,15 +373,10 @@ int main(int argc, char ** argv) {
     // handle chaton mode, it adds on to any reverse prompt specified explicitly by the user
     if (params.chaton) {
         params.interactive_first = true;
-        std::vector<std::string> resp_ends = llama_chat_reverse_prompt(params.chaton_template_id);
-        if (resp_ends.size() == 0) {
+        if (!llama_chat_reverse_prompt(params.chaton_template_id, params.antiprompt)) {
             LOG_TEELN("ERRR:%s:ChatOn:Unsupported ChatTemplateType:%s", __func__, params.chaton_template_id.c_str());
             exit(1);
         }
-        for (size_t i = 0; i < resp_ends.size(); i++)
-        {
-            params.antiprompt.emplace_back(resp_ends[i]);
-        }
     }
 
     // enable interactive mode if interactive start is specified

From 9037892127cf01707cc92a4a242f79e3b9fbffa4 Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Sat, 20 Apr 2024 23:42:25 +0530
Subject: [PATCH 7/7] ChatON: Add a note

---
 common/chaton.hpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/common/chaton.hpp b/common/chaton.hpp
index b33027669e018..9616dea407c69 100644
--- a/common/chaton.hpp
+++ b/common/chaton.hpp
@@ -1,5 +1,21 @@
 #pragma once
 
+/**
+ *
+ * Provides a simple and dumb helpers which help chat with llm chat/instruct models
+ * using the chat template expected by them.
+ *
+ * Normally used to tag system prompt and user messages.
+ * Currently used by example/main programs.
+ *
+ * This builds on the llama_chat_apply_template. When adding support for new chat templates
+ * remember to update llama_chat_apply_template_internal as well as llama_chat_reverse_prompt.
+ *
+ * example/main program uses this when --chaton TEMPLATE_ID is passed to it along with -i
+ * sample TEMPLATE_ID's include chatml, llama2, llama3, ...
+ *
+ */
+
 #include <vector>
 #include <string>