Add CodeLlama-70b-Instruct-hf (#752)

nsarrazin · mishig25 · web-flow · commit a8ca669b503b · 2024-01-30T11:39:21.000+01:00
* Add CodeLlama-70b-Instruct-hf

* add comment to reduce

* Added missing newlines to prompt format for codellama 70b

* remove extra space

* stop tokens

* Remove source newline

* fix preprompt

* fix prompt one last time

* add news

* shorter text

* fix link &amp; remove old tokens

---------

Co-authored-by: Mishig Davaadorj &lt;dmishig@gmail.com&gt;
diff --git a/.env b/.env
@@ -99,9 +99,9 @@ PUBLIC_SHARE_PREFIX=#https://hf.co/chat
 PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
 PUBLIC_ANNOUNCEMENT_BANNERS=`[
     {
-    "title": "Llama v2 is live on HuggingChat! 🦙",
+    "title": "Code Llama 70B is live! 🦙",
     "linkTitle": "Announcement",
-    "linkHref": "https://huggingface.co/blog/llama2"
+    "linkHref": "https://ai.meta.com/blog/code-llama-large-language-model-coding/"
   }
 ]`
 
diff --git a/.env.template b/.env.template
@@ -89,16 +89,12 @@ MODELS=`[
       }
     },
     {
-      "name": "codellama/CodeLlama-34b-Instruct-hf",
-      "displayName": "codellama/CodeLlama-34b-Instruct-hf",
-      "description": "Code Llama, a state of the art code model from Meta.",
-      "websiteUrl": "https://about.fb.com/news/2023/08/code-llama-ai-for-coding/",
-      "userMessageToken": "",
-      "userMessageEndToken": " [/INST] ",
-      "assistantMessageToken": "",
-      "assistantMessageEndToken": " </s><s>[INST] ",
-      "preprompt": " ",
-      "chatPromptTemplate" : "<s>[INST] <<SYS>>\n{{preprompt}}\n<</SYS>>\n\n{{#each messages}}{{#ifUser}}{{content}} [/INST] {{/ifUser}}{{#ifAssistant}}{{content}} </s><s>[INST] {{/ifAssistant}}{{/each}}",
+      "name": "codellama/CodeLlama-70b-Instruct-hf",
+      "displayName": "codellama/CodeLlama-70b-Instruct-hf",
+      "description": "Code Llama, a state of the art code model from Meta. Now in 70B!",
+      "websiteUrl": "https://ai.meta.com/blog/code-llama-large-language-model-coding/",
+      "preprompt": "",
+      "chatPromptTemplate" : "<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ",
       "promptExamples": [
         {
           "title": "Fibonacci in Python",
@@ -118,7 +114,7 @@ MODELS=`[
         "top_k": 50,
         "truncate": 4096,
         "max_new_tokens": 4096,
-        "stop": [" </s><s>[INST] "]
+        "stop": ["<step>", " <step>", " <step> "],
       }
       },
     {
@@ -217,7 +213,8 @@ OLD_MODELS=`[
   {"name":"HuggingFaceH4/zephyr-7b-alpha"},
   {"name":"openchat/openchat_3.5"},
   {"name":"openchat/openchat-3.5-1210"},
-  {"name": "tiiuae/falcon-180B-chat"}
+  {"name": "tiiuae/falcon-180B-chat"},
+  {"name": "codellama/CodeLlama-34b-Instruct-hf"}
 ]`
 
 TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'
diff --git a/PROMPTS.md b/PROMPTS.md
@@ -55,3 +55,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/
 ```env
 {{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
 ```
+
+## CodeLlama 70B
+
+```env
+<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ``
+```
diff --git a/src/routes/conversation/[id]/+server.ts b/src/routes/conversation/[id]/+server.ts
@@ -310,13 +310,23 @@ export async function POST({ request, locals, params, getClientAddress }) {
 							}
 						}
 					} else {
+						let interrupted = !output.token.special;
 						// add output.generated text to the last message
+						// strip end tokens from the output.generated_text
+						const text = (model.parameters.stop ?? []).reduce((acc: string, curr: string) => {
+							if (acc.endsWith(curr)) {
+								interrupted = false;
+								return acc.slice(0, acc.length - curr.length);
+							}
+							return acc;
+						}, output.generated_text.trimEnd());
+
 						messages = [
 							...messages.slice(0, -1),
 							{
 								...messages[messages.length - 1],
-								content: previousContent + output.generated_text,
-								interrupted: !output.token.special, // if its a special token it finished on its own, else it was interrupted
+								content: previousContent + text,
+								interrupted, // if its a special token it finished on its own, else it was interrupted
 								updates,
 								updatedAt: new Date(),
 							},

Original file line number	Diff line number	Diff line change
`@@ -99,9 +99,9 @@ PUBLIC_SHARE_PREFIX=#https://hf.co/chat`
`99`	`99`	`PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable`
`100`	`100`	PUBLIC_ANNOUNCEMENT_BANNERS=`[
`101`	`101`	`{`
`102`		`- "title": "Llama v2 is live on HuggingChat! 🦙",`
	`102`	`+ "title": "Code Llama 70B is live! 🦙",`
`103`	`103`	`"linkTitle": "Announcement",`
`104`		`- "linkHref": "https://huggingface.co/blog/llama2"`
	`104`	`+ "linkHref": "https://ai.meta.com/blog/code-llama-large-language-model-coding/"`
`105`	`105`	`}`
`106`	`106`	]`
`107`	`107`