feat(models): Llama 3.1 support (#1355)

nsarrazin · web-flow · commit 88a904b296fe · 2024-07-23T17:08:41.000+02:00
* feat(models): Add llama 3.1 405B and 70B

* feat(tools): improve tools support for llama 3.1 models

* feat(models): update mistral to latest

* feat(models): use llama 3.1 8B for tasks

* fix(tools): disable tools by default

* feat(models): update model descriptions in huggingchat

* fix(front): show branding for llama 3

* fix(config): fix encoding

* feat(migrations): reset tools for everyone

* feat(config): replace orgs with final org

* feat(config): update website for llama 3.1
diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml
@@ -38,10 +38,68 @@ envVars:
   METRICS_ENABLED: "true"
   MODELS: >
     [
+      { 
+        "name" : "meta-llama/Meta-Llama-3.1-70B-Instruct",
+        "id": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+        "tokenizer": {"tokenizerUrl": "https://huggingface.co/nsarrazin/llama3.1-tokenizer/resolve/main/tokenizer.json", "tokenizerConfigUrl": "https://huggingface.co/nsarrazin/llama3.1-tokenizer/raw/main/tokenizer_config.json"},
+        "description": "Ideal for everyday use. A fast and extremely capable model matching closed source models' capabilities.",
+        "modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct",
+        "websiteUrl": "https://llama.meta.com/",
+        "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
+        "tools": true,
+        "preprompt" : "You are a helpful assistant with tool calling capabilities. The user has access to the tool's outputs that you as a model cannot see. This could include text, images and more.",
+        "parameters": {
+          "temperature": 0.1,
+          "stop": ["<|endoftext|>", "<|eot_id|>"],
+          "max_new_tokens": 1024,
+          "truncate": 7167
+        },
+        "promptExamples": [
+          {
+            "title": "Write an email from bullet list",
+            "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
+          }, {
+            "title": "Code a snake game",
+            "prompt": "Code a basic snake game in python, give explanations for each step."
+          }, {
+            "title": "Assist in a task",
+            "prompt": "How do I make a delicious lemon cheesecake?"
+          }
+        ]
+      }, 
+      {
+        "name" : "meta-llama/Meta-Llama-3.1-405B-Instruct-FP8",
+        "id": "meta-llama/Meta-Llama-3.1-405B-Instruct-FP8",
+        "tokenizer": {"tokenizerUrl": "https://huggingface.co/nsarrazin/llama3.1-tokenizer/resolve/main/tokenizer.json", "tokenizerConfigUrl": "https://huggingface.co/nsarrazin/llama3.1-tokenizer/raw/main/tokenizer_config.json"},
+        "description": "The most intelligent open-source model, showing exceptional capabilities for complex tasks.",
+        "modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct-FP8",
+        "websiteUrl": "https://llama.meta.com/",
+        "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
+        "tools": true,
+        "preprompt" : "You are a helpful assistant with tool calling capabilities. The user has access to the tool's outputs that you as a model cannot see. This could include text, images and more.",
+        "parameters": {
+          "temperature": 0.1,
+          "stop": ["<|endoftext|>", "<|eot_id|>"],
+          "max_new_tokens": 2048,
+          "truncate": 14337
+        },
+        "promptExamples": [
+          {
+            "title": "Write an email from bullet list",
+            "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
+          }, {
+            "title": "Code a snake game",
+            "prompt": "Code a basic snake game in python, give explanations for each step."
+          }, {
+            "title": "Assist in a task",
+            "prompt": "How do I make a delicious lemon cheesecake?"
+          }
+        ]
+      }, 
       {
         "name" : "CohereForAI/c4ai-command-r-plus",
         "tokenizer": {"tokenizerUrl": "https://huggingface.co/nsarrazin/c4ai-command-r-v01-tokenizer/resolve/main/tokenizer.json", "tokenizerConfigUrl": "https://huggingface.co/nsarrazin/c4ai-command-r-v01-tokenizer/raw/main/tokenizer_config.json"},
-        "description": "Command R+ is Cohere's latest LLM and is the first open weight model to beat GPT4 in the Chatbot Arena!",
+        "description": "Cohere's largest language model, optimized for conversational interaction and tool use.",
         "modelUrl": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
         "websiteUrl": "https://docs.cohere.com/docs/command-r-plus",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/cohere-logo.png",
@@ -65,36 +123,9 @@ envVars:
           }
         ]
       },
-      {
-        "name" : "meta-llama/Meta-Llama-3-70B-Instruct",
-        "description": "Meta Llama 3 delivers top performance on various benchmarks and introduces new features like better reasoning.",
-        "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
-        "modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
-        "websiteUrl": "https://llama.meta.com/llama3/",
-        "tokenizer" : "philschmid/meta-llama-3-tokenizer",
-        "promptExamples" : [
-          {
-            "title": "Write an email from bullet list",
-            "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
-          }, {
-            "title": "Code a snake game",
-            "prompt": "Code a basic snake game in python, give explanations for each step."
-          }, {
-            "title": "Assist in a task",
-            "prompt": "How do I make a delicious lemon cheesecake?"
-          }
-        ],
-        "parameters": {
-          "stop": ["<|eot_id|>"],
-          "truncate": 6144,
-          "max_new_tokens": 2047,
-          "temperature": 0.6,
-          "top_p" : 0.9
-        }
-      },
       {
         "name" : "mistralai/Mixtral-8x7B-Instruct-v0.1",
-        "description" : "The latest MoE model from Mistral AI! 8x7B and outperforms Llama 2 70B in most benchmarks.",
+        "description" : "A high-quality sparse mixture of experts model with open weights.",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
         "websiteUrl" : "https://mistral.ai/news/mixtral-of-experts/",
         "modelUrl": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -125,7 +156,7 @@ envVars:
       },
       {
           "name" : "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-          "description" : "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM.",
+          "description" : "Nous Hermes' strong flagship model trained on the Mixtral 8x7B.",
           "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
           "websiteUrl" : "https://nousresearch.com/",
           "modelUrl": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
@@ -156,7 +187,7 @@ envVars:
         {
           "name": "01-ai/Yi-1.5-34B-Chat",
           "tokenizer": "01-ai/Yi-1.5-34B-Chat",
-          "description" : "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
+          "description" : "Strong performance in reasoning while maintaining excellent capabilities in language understanding.",
           "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/01-ai-logo.png",
           "modelUrl": "https://huggingface.co/01-ai/Yi-1.5-34B-Chat",
           "websiteUrl": "https://www.01.ai",
@@ -182,13 +213,13 @@ envVars:
           ]
         },
         {
-          "name": "mistralai/Mistral-7B-Instruct-v0.2",
-          "displayName": "mistralai/Mistral-7B-Instruct-v0.2",
-          "description": "Mistral 7B is a new Apache 2.0 model, released by Mistral AI that outperforms Llama2 13B in benchmarks.",
+          "name": "mistralai/Mistral-7B-Instruct-v0.3",
+          "displayName": "mistralai/Mistral-7B-Instruct-v0.3",
+          "description": "A small model with good capabilities in language understanding and commonsense reasoning.",
           "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
           "websiteUrl": "https://mistral.ai/news/announcing-mistral-7b/",
-          "modelUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
-          "tokenizer": "mistralai/Mistral-7B-Instruct-v0.2",
+          "modelUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
+          "tokenizer": "mistralai/Mistral-7B-Instruct-v0.3",
           "preprompt": "",
           "chatPromptTemplate" : "<s>{{#each messages}}{{#ifUser}}[INST] {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}} [/INST]{{/ifUser}}{{#ifAssistant}}{{content}}</s>{{/ifAssistant}}{{/each}}",
           "parameters": {
@@ -216,7 +247,7 @@ envVars:
         {
           "name": "microsoft/Phi-3-mini-4k-instruct",
           "tokenizer": "microsoft/Phi-3-mini-4k-instruct",
-          "description" : "Phi-3 Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2.",
+          "description" : "One of the best small models (3.8B parameters), super fast for simple tasks.",
           "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
           "modelUrl": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
           "websiteUrl": "https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/",
@@ -242,11 +273,11 @@ envVars:
           ]
         },
         {
-          "name": "meta-llama/Meta-Llama-3-8B-Instruct",
-          "tokenizer" : "philschmid/meta-llama-3-tokenizer",
+          "name" : "llhf/Meta-Llama-3.1-8B-Instruct",
+        "tokenizer": {"tokenizerUrl": "https://huggingface.co/nsarrazin/llama3.1-tokenizer/resolve/main/tokenizer.json", "tokenizerConfigUrl": "https://huggingface.co/nsarrazin/llama3.1-tokenizer/raw/main/tokenizer_config.json"},
           "parameters": {
             "temperature": 0.1,
-            "stop": ["<|eot_id|>"],
+            "stop": ["<|endoftext|>", "<|eot_id|>"],
           },
           "unlisted": true
         }
@@ -265,7 +296,8 @@ envVars:
       { "name": "google/gemma-7b-it" },
       { "name": "meta-llama/Llama-2-70b-chat-hf" },
       { "name": "codellama/CodeLlama-70b-Instruct-hf" },
-      { "name": "openchat/openchat-3.5-0106" }
+      { "name": "openchat/openchat-3.5-0106" },
+      { "name": "meta-llama/Meta-Llama-3-70B-Instruct"}
     ]
   PUBLIC_ORIGIN: "https://huggingface.co"
   PUBLIC_SHARE_PREFIX: "https://hf.co/chat"
@@ -280,7 +312,7 @@ envVars:
   PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
   PUBLIC_APPLE_APP_ID: "6476778843"
   REQUIRE_FEATURED_ASSISTANTS: "true"
-  TASK_MODEL: "meta-llama/Meta-Llama-3-8B-Instruct"
+  TASK_MODEL: "llhf/Meta-Llama-3.1-8B-Instruct"
   TEXT_EMBEDDING_MODELS: >
     [{
       "name": "bge-base-en-v1-5-sxa",
diff --git a/src/lib/components/ModelCardMetadata.svelte b/src/lib/components/ModelCardMetadata.svelte
@@ -42,9 +42,9 @@
 			class="ml-auto flex items-center hover:underline"
 			rel="noreferrer"
 		>
-			{#if model.name === "meta-llama/Meta-Llama-3-70B-Instruct"}
+			{#if model.name.startsWith("meta-llama/Meta-Llama")}
 				<BIMeta class="mr-1.5 shrink-0 text-xs text-gray-400" />
-				Built with Meta Llama 3
+				Built with Llama
 			{:else}
 				<CarbonEarth class="mr-1.5 shrink-0 text-xs text-gray-400" />
 				Website
diff --git a/src/lib/migrations/routines/07-reset-tools-in-settings.ts b/src/lib/migrations/routines/07-reset-tools-in-settings.ts
@@ -0,0 +1,19 @@
+import type { Migration } from ".";
+import { collections } from "$lib/server/database";
+import { ObjectId } from "mongodb";
+
+const resetTools: Migration = {
+	_id: new ObjectId("000000000007"),
+	name: "Reset tools to empty",
+	up: async () => {
+		const { settings } = collections;
+
+		await settings.updateMany({}, { $set: { tools: {} } });
+
+		return true;
+	},
+	runEveryTime: false,
+	runForHuggingChat: "only",
+};
+
+export default resetTools;
diff --git a/src/lib/migrations/routines/index.ts b/src/lib/migrations/routines/index.ts
@@ -7,6 +7,7 @@ import addToolsToSettings from "./03-add-tools-in-settings";
 import updateMessageUpdates from "./04-update-message-updates";
 import updateMessageFiles from "./05-update-message-files";
 import trimMessageUpdates from "./06-trim-message-updates";
+import resetTools from "./07-reset-tools-in-settings";
 
 export interface Migration {
 	_id: ObjectId;
@@ -25,4 +26,5 @@ export const migrations: Migration[] = [
 	updateMessageUpdates,
 	updateMessageFiles,
 	trimMessageUpdates,
+	resetTools,
 ];
diff --git a/src/lib/server/models.ts b/src/lib/server/models.ts
@@ -96,7 +96,10 @@ async function getChatPromptRender(
 
 	const renderTemplate = ({ messages, preprompt, tools, toolResults }: ChatTemplateInput) => {
 		let formattedMessages: { role: string; content: string }[] = messages.map((message) => ({
-			content: message.content,
+			content:
+				message.files?.length && !tools?.length
+					? message.content + `\n This message has ${message.files.length} files attached`
+					: message.content,
 			role: message.from,
 		}));
 
@@ -113,32 +116,69 @@ async function getChatPromptRender(
 		if (toolResults?.length) {
 			// todo: should update the command r+ tokenizer to support system messages at any location
 			// or use the `rag` mode without the citations
-			formattedMessages = [
-				{
-					role: "system",
-					content:
-						"\n\n<results>\n" +
-						toolResults
-							.flatMap((result, idx) => {
-								if (result.status === ToolResultStatus.Error) {
+			const id = m.id ?? m.name;
+
+			if (id.startsWith("CohereForAI")) {
+				formattedMessages = [
+					{
+						role: "system",
+						content:
+							"\n\n<results>\n" +
+							toolResults
+								.flatMap((result, idx) => {
+									if (result.status === ToolResultStatus.Error) {
+										return (
+											`Document: ${idx}\n` + `Tool "${result.call.name}" error\n` + result.message
+										);
+									}
 									return (
-										`Document: ${idx}\n` + `Tool "${result.call.name}" error\n` + result.message
+										`Document: ${idx}\n` +
+										result.outputs
+											.flatMap((output) =>
+												Object.entries(output).map(([title, text]) => `${title}\n${text}`)
+											)
+											.join("\n")
 									);
-								}
-								return (
-									`Document: ${idx}\n` +
-									result.outputs
-										.flatMap((output) =>
-											Object.entries(output).map(([title, text]) => `${title}\n${text}`)
-										)
-										.join("\n")
-								);
-							})
-							.join("\n\n") +
-						"\n</results>",
-				},
-				...formattedMessages,
-			];
+								})
+								.join("\n\n") +
+							"\n</results>",
+					},
+					...formattedMessages,
+				];
+			} else if (id.startsWith("meta-llama")) {
+				const results = toolResults.flatMap((result) => {
+					if (result.status === ToolResultStatus.Error) {
+						return [
+							{
+								tool_call_id: result.call.name,
+								output: "Error: " + result.message,
+							},
+						];
+					} else {
+						logger.info(result.outputs);
+						return result.outputs.map((output) => ({
+							tool_call_id: result.call.name,
+							output: JSON.stringify(output),
+						}));
+					}
+				});
+
+				formattedMessages = [
+					...formattedMessages,
+					{
+						role: "python",
+						content: JSON.stringify(results),
+					},
+				];
+			} else {
+				formattedMessages = [
+					...formattedMessages,
+					{
+						role: "system",
+						content: JSON.stringify(toolResults),
+					},
+				];
+			}
 			tools = [];
 		}
 
diff --git a/src/lib/server/textGeneration/tools.ts b/src/lib/server/textGeneration/tools.ts
@@ -143,19 +143,26 @@ export async function* runTools(
 		// look for a code blocks of ```json and parse them
 		// if they're valid json, add them to the calls array
 		if (output.generated_text) {
+			if (!output.generated_text.endsWith("```")) {
+				output.generated_text = output.generated_text + "```";
+			}
 			const codeBlocks = Array.from(output.generated_text.matchAll(/```json\n(.*?)```/gs))
 				.map(([, block]) => block)
 				// remove trailing comma
 				.map((block) => block.trim().replace(/,$/, ""));
 			if (codeBlocks.length === 0) continue;
-
 			// grab only the capture group from the regex match
 			for (const block of codeBlocks) {
+				// make it an array if it's not already
+				let call = JSON5.parse(block);
+				if (!Array.isArray(call)) {
+					call = [call];
+				}
+
 				try {
-					calls.push(
-						...JSON5.parse(block).filter(isExternalToolCall).map(externalToToolCall).filter(Boolean)
-					);
+					calls.push(...call.filter(isExternalToolCall).map(externalToToolCall).filter(Boolean));
 				} catch (e) {
+					logger.error(e, "Error while parsing tool calls, please retry");
 					// error parsing the calls
 					yield {
 						type: MessageUpdateType.Status,
diff --git a/src/lib/server/tools/calculator.ts b/src/lib/server/tools/calculator.ts
@@ -6,7 +6,6 @@ const calculator: BackendTool = {
 	displayName: "Calculator",
 	description:
 		"A simple calculator, takes a string containing a mathematical expression and returns the answer. Only supports +, -, *, ** (power) and /, as well as parenthesis ().",
-	isOnByDefault: true,
 	parameterDefinitions: {
 		equation: {
 			description:
diff --git a/src/lib/server/tools/documentParser.ts b/src/lib/server/tools/documentParser.ts
@@ -9,7 +9,6 @@ const documentParser: BackendTool = {
 	name: "document_parser",
 	displayName: "Document Parser",
 	description: "Use this tool to parse any document and get its content in markdown format.",
-	isOnByDefault: true,
 	mimeTypes: ["application/*", "text/*"],
 	parameterDefinitions: {
 		fileMessageIndex: {
diff --git a/src/lib/server/tools/images/editing.ts b/src/lib/server/tools/images/editing.ts
@@ -17,7 +17,6 @@ const imageEditing: BackendTool = {
 	name: "image_editing",
 	displayName: "Image Editing",
 	description: "Use this tool to edit an image from a prompt.",
-	isOnByDefault: true,
 	mimeTypes: ["image/*"],
 	parameterDefinitions: {
 		prompt: {
diff --git a/src/lib/server/tools/images/generation.ts b/src/lib/server/tools/images/generation.ts
diff --git a/src/lib/server/tools/web/search.ts b/src/lib/server/tools/web/search.ts
diff --git a/src/lib/server/tools/web/url.ts b/src/lib/server/tools/web/url.ts
diff --git a/src/lib/types/Template.ts b/src/lib/types/Template.ts