huggingface
diff --git a/‎packages/inference/src/snippets/getInferenceSnippets.ts
Lines changed: 15 additions & 8 deletions b/‎packages/inference/src/snippets/getInferenceSnippets.ts
Lines changed: 15 additions & 8 deletions
diff --git a/‎packages/inference/src/snippets/templates/js/fetch/conversational.jinja
Lines changed: 23 additions & 0 deletions b/‎packages/inference/src/snippets/templates/js/fetch/conversational.jinja
Lines changed: 23 additions & 0 deletions
diff --git a/‎packages/tasks-gen/scripts/generate-snippets-fixtures.ts
Lines changed: 4 additions & 4 deletions b/‎packages/tasks-gen/scripts/generate-snippets-fixtures.ts
Lines changed: 4 additions & 4 deletions
diff --git a/‎packages/tasks-gen/snippets-fixtures/bill-to-param/js/fetch/0.hf-inference.js
Lines changed: 28 additions & 0 deletions b/‎packages/tasks-gen/snippets-fixtures/bill-to-param/js/fetch/0.hf-inference.js
Lines changed: 28 additions & 0 deletions
diff --git a/‎packages/tasks-gen/snippets-fixtures/conversational-llm-custom-endpoint/js/fetch/0.hf-inference.js
Lines changed: 27 additions & 0 deletions b/‎packages/tasks-gen/snippets-fixtures/conversational-llm-custom-endpoint/js/fetch/0.hf-inference.js
Lines changed: 27 additions & 0 deletions
diff --git a/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/fetch/0.auto.js
Lines changed: 27 additions & 0 deletions b/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/fetch/0.auto.js
Lines changed: 27 additions & 0 deletions
diff --git a/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/fetch/0.hf-inference.js
Lines changed: 27 additions & 0 deletions b/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/fetch/0.hf-inference.js
Lines changed: 27 additions & 0 deletions
diff --git a/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/fetch/0.together.js
Lines changed: 27 additions & 0 deletions b/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/fetch/0.together.js
Lines changed: 27 additions & 0 deletions
diff --git a/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.auto.js
Lines changed: 16 additions & 0 deletions b/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.auto.js
Lines changed: 16 additions & 0 deletions
diff --git a/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.auto.js
Lines changed: 18 additions & 0 deletions b/‎packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.auto.js
Lines changed: 18 additions & 0 deletions
@@ -36,9 +36,11 @@ const CLIENTS: Record<InferenceSnippetLanguage, Client[]> = {
 	sh: [...SH_CLIENTS],
 };
 
-const CLIENTS_AUTO_POLICY: Partial<Record<InferenceSnippetLanguage, Client[]>> = {
+// The "auto"-provider policy is only available through the HF SDKs (huggingface.js / huggingface_hub)
+// except for conversational tasks for which we have https://router.huggingface.co/v1/chat/completions
+const CLIENTS_NON_CONVERSATIONAL_AUTO_POLICY: Partial<Record<InferenceSnippetLanguage, Client[]>> = {
 	js: ["huggingface.js"],
-	python: ["huggingface_hub", "openai"],
+	python: ["huggingface_hub"],
 };
 
 type InputPreparationFn = (model: ModelDataMinimal, opts?: Record<string, unknown>) => object;
@@ -206,11 +208,16 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 		// Inputs for the "auto" route is strictly the same as "inputs", except the model includes the provider
 		// If not "auto" route, use the providerInputs
 		const autoInputs =
-			provider !== "auto" && !opts?.endpointUrl && !opts?.directRequest
-				? {
-						...inputs,
-						model: `${model.id}:${provider}`,
-				  }
+			!opts?.endpointUrl && !opts?.directRequest
+				? provider !== "auto"
+					? {
+							...inputs,
+							model: `${model.id}:${provider}`,
+					  }
+					: {
+							...inputs,
+							model: `${model.id}`, // if no :provider => auto
+					  }
 				: providerInputs;
 
 		/// Prepare template injection data
@@ -259,7 +266,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 		};
 
 		/// Iterate over clients => check if a snippet exists => generate
-		const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
+		const clients = provider === "auto" && task !== "conversational" ? CLIENTS_NON_CONVERSATIONAL_AUTO_POLICY : CLIENTS;
 		return inferenceSnippetLanguages
 			.map((language) => {
 				const langClients = clients[language] ?? [];
 
@@ -0,0 +1,23 @@
+async function query(data) {
+	const response = await fetch(
+		"{{ fullUrl }}",
+		{
+			headers: {
+				Authorization: "{{ authorizationHeader }}",
+				"Content-Type": "application/json",
+{% if billTo %}
+				"X-HF-Bill-To": "{{ billTo }}",
+{% endif %}			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ 
+{{ autoInputs.asTsString }}
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
@@ -56,7 +56,7 @@ const TEST_CASES: {
 			tags: ["conversational"],
 			inference: "",
 		},
-		providers: ["hf-inference", "together"],
+		providers: ["hf-inference", "together", "auto"],
 		opts: { streaming: false },
 	},
 	{
@@ -68,7 +68,7 @@ const TEST_CASES: {
 			tags: ["conversational"],
 			inference: "",
 		},
-		providers: ["hf-inference", "together"],
+		providers: ["hf-inference", "together", "auto"],
 		opts: { streaming: true },
 	},
 	{
@@ -80,7 +80,7 @@ const TEST_CASES: {
 			tags: ["conversational"],
 			inference: "",
 		},
-		providers: ["hf-inference", "fireworks-ai"],
+		providers: ["hf-inference", "fireworks-ai", "auto"],
 		opts: { streaming: false },
 	},
 	{
@@ -92,7 +92,7 @@ const TEST_CASES: {
 			tags: ["conversational"],
 			inference: "",
 		},
-		providers: ["hf-inference", "fireworks-ai"],
+		providers: ["hf-inference", "fireworks-ai", "auto"],
 		opts: { streaming: true },
 	},
 	{
 
@@ -0,0 +1,28 @@
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/v1/chat/completions",
+		{
+			headers: {
+				Authorization: `Bearer ${process.env.HF_TOKEN}`,
+				"Content-Type": "application/json",
+				"X-HF-Bill-To": "huggingface",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ 
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    model: "meta-llama/Llama-3.1-8B-Instruct:hf-inference",
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
@@ -0,0 +1,27 @@
+async function query(data) {
+	const response = await fetch(
+		"http://localhost:8080/v1/chat/completions",
+		{
+			headers: {
+				Authorization: `Bearer ${process.env.API_TOKEN}`,
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ 
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    model: "meta-llama/Llama-3.1-8B-Instruct",
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
@@ -0,0 +1,27 @@
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/v1/chat/completions",
+		{
+			headers: {
+				Authorization: `Bearer ${process.env.HF_TOKEN}`,
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ 
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    model: "meta-llama/Llama-3.1-8B-Instruct",
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
@@ -0,0 +1,27 @@
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/v1/chat/completions",
+		{
+			headers: {
+				Authorization: `Bearer ${process.env.HF_TOKEN}`,
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ 
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    model: "meta-llama/Llama-3.1-8B-Instruct:hf-inference",
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
@@ -0,0 +1,27 @@
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/v1/chat/completions",
+		{
+			headers: {
+				Authorization: `Bearer ${process.env.HF_TOKEN}`,
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ 
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    model: "meta-llama/Llama-3.1-8B-Instruct:together",
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
@@ -0,0 +1,16 @@
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient(process.env.HF_TOKEN);
+
+const chatCompletion = await client.chatCompletion({
+    provider: "auto",
+    model: "meta-llama/Llama-3.1-8B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+});
+
+console.log(chatCompletion.choices[0].message);
@@ -0,0 +1,18 @@
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/v1",
+	apiKey: process.env.HF_TOKEN,
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "meta-llama/Llama-3.1-8B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+});
+
+console.log(chatCompletion.choices[0].message);