From 877f0de9d66c0e6017aade53e378557ed485629b Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 14 May 2025 12:10:47 +0200
Subject: [PATCH 01/10] fix structured output

---
 src/huggingface_hub/__init__.py               |  3 --
 .../inference/_generated/types/__init__.py    |  1 -
 .../_generated/types/chat_completion.py       | 53 +++++++++++++++----
 .../inference/_providers/cerebras.py          |  2 +-
 .../inference/_providers/cohere.py            | 22 ++++++--
 .../inference/_providers/fireworks_ai.py      | 18 +++++++
 .../inference/_providers/hf_inference.py      |  9 +++-
 .../inference/_providers/nebius.py            | 11 ++++
 .../inference/_providers/sambanova.py         | 14 +++++
 .../inference/_providers/together.py          | 14 +++++
 10 files changed, 129 insertions(+), 18 deletions(-)

diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index 9e9135bd44..726f1a62f0 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -301,7 +301,6 @@
         "ChatCompletionInputFunctionDefinition",
         "ChatCompletionInputFunctionName",
         "ChatCompletionInputGrammarType",
-        "ChatCompletionInputGrammarTypeType",
         "ChatCompletionInputMessage",
         "ChatCompletionInputMessageChunk",
         "ChatCompletionInputMessageChunkType",
@@ -542,7 +541,6 @@
     "ChatCompletionInputFunctionDefinition",
     "ChatCompletionInputFunctionName",
     "ChatCompletionInputGrammarType",
-    "ChatCompletionInputGrammarTypeType",
     "ChatCompletionInputMessage",
     "ChatCompletionInputMessageChunk",
     "ChatCompletionInputMessageChunkType",
@@ -1263,7 +1261,6 @@ def __dir__():
         ChatCompletionInputFunctionDefinition,  # noqa: F401
         ChatCompletionInputFunctionName,  # noqa: F401
         ChatCompletionInputGrammarType,  # noqa: F401
-        ChatCompletionInputGrammarTypeType,  # noqa: F401
         ChatCompletionInputMessage,  # noqa: F401
         ChatCompletionInputMessageChunk,  # noqa: F401
         ChatCompletionInputMessageChunkType,  # noqa: F401
diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py
index 92f286792b..87e5f66fd5 100644
--- a/src/huggingface_hub/inference/_generated/types/__init__.py
+++ b/src/huggingface_hub/inference/_generated/types/__init__.py
@@ -24,7 +24,6 @@
     ChatCompletionInputFunctionDefinition,
     ChatCompletionInputFunctionName,
     ChatCompletionInputGrammarType,
-    ChatCompletionInputGrammarTypeType,
     ChatCompletionInputMessage,
     ChatCompletionInputMessageChunk,
     ChatCompletionInputMessageChunkType,
diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py
index 9978c0a5a9..6e53056d24 100644
--- a/src/huggingface_hub/inference/_generated/types/chat_completion.py
+++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, List, Literal, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -45,19 +45,54 @@ class ChatCompletionInputMessage(BaseInferenceType):
     tool_calls: Optional[List[ChatCompletionInputToolCall]] = None
 
 
-ChatCompletionInputGrammarTypeType = Literal["json", "regex", "json_schema"]
+@dataclass_with_extra
+class JSONSchema(BaseInferenceType):
+    name: str
+    """
+    The name of the response format.
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
 
+    schema: Optional[Dict[str, object]] = None
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
 
-@dataclass_with_extra
-class ChatCompletionInputGrammarType(BaseInferenceType):
-    type: "ChatCompletionInputGrammarTypeType"
-    value: Any
-    """A string that represents a [JSON Schema](https://json-schema.org/).
-    JSON Schema is a declarative language that allows to annotate JSON documents
-    with types and descriptions.
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field.
     """
 
 
+@dataclass_with_extra
+class ResponseFormatText(BaseInferenceType):
+    type: Literal["text"]
+
+
+@dataclass_with_extra
+class ResponseFormatJSONSchema(BaseInferenceType):
+    type: Literal["json_schema"]
+    json_schema: JSONSchema
+
+
+@dataclass_with_extra
+class ResponseFormatJSONObject(BaseInferenceType):
+    type: Literal["json_object"]
+
+
+ChatCompletionInputGrammarType: TypeAlias = Union[
+    ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject
+]
+
+
 @dataclass_with_extra
 class ChatCompletionInputStreamOptions(BaseInferenceType):
     include_usage: Optional[bool] = None
diff --git a/src/huggingface_hub/inference/_providers/cerebras.py b/src/huggingface_hub/inference/_providers/cerebras.py
index 12b1815832..a9b9c3aacb 100644
--- a/src/huggingface_hub/inference/_providers/cerebras.py
+++ b/src/huggingface_hub/inference/_providers/cerebras.py
@@ -1,4 +1,4 @@
-from huggingface_hub.inference._providers._common import BaseConversationalTask
+from ._common import BaseConversationalTask
 
 
 class CerebrasConversationalTask(BaseConversationalTask):
diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py
index 0dc35c7e6c..7bf19edc0c 100644
--- a/src/huggingface_hub/inference/_providers/cohere.py
+++ b/src/huggingface_hub/inference/_providers/cohere.py
@@ -1,6 +1,8 @@
-from huggingface_hub.inference._providers._common import (
-    BaseConversationalTask,
-)
+from typing import Any, Dict, Optional
+
+from huggingface_hub.hf_api import InferenceProviderMapping
+
+from ._common import BaseConversationalTask
 
 
 _PROVIDER = "cohere"
@@ -13,3 +15,17 @@ def __init__(self):
 
     def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/compatibility/v1/chat/completions"
+
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format")
+        if response_format is not None and response_format["type"] == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["response_format"] = {  # type: ignore [index]
+                    "type": "json_object",
+                    "schema": json_schema_details["schema"],
+                }
+        return payload
diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py
index 9fc9aba806..3ffec6a633 100644
--- a/src/huggingface_hub/inference/_providers/fireworks_ai.py
+++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py
@@ -1,3 +1,7 @@
+from typing import Any, Dict, Optional
+
+from huggingface_hub.hf_api import InferenceProviderMapping
+
 from ._common import BaseConversationalTask
 
 
@@ -7,3 +11,17 @@ def __init__(self):
 
     def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/inference/v1/chat/completions"
+
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format")
+        if response_format is not None and response_format["type"] == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["response_format"] = {  # type: ignore [index]
+                    "type": "json_object",
+                    "schema": json_schema_details["schema"],
+                }
+        return payload
diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py
index 7923567be3..4ccb976e78 100644
--- a/src/huggingface_hub/inference/_providers/hf_inference.py
+++ b/src/huggingface_hub/inference/_providers/hf_inference.py
@@ -96,13 +96,20 @@ def __init__(self):
     def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
+        payload = filter_none(parameters)
         mapped_model = provider_mapping_info.provider_id
         payload_model = parameters.get("model") or mapped_model
 
         if payload_model is None or payload_model.startswith(("http://", "https://")):
             payload_model = "dummy"
 
-        return {**filter_none(parameters), "model": payload_model, "messages": inputs}
+        response_format = parameters.pop("response_format")
+        if response_format is not None and response_format["type"] == "json_schema":
+            payload["response_format"] = {
+                "type": "json_object",
+                "value": response_format["json_schema"]["schema"],
+            }
+        return {**payload, "model": payload_model, "messages": inputs}
 
     def _prepare_url(self, api_key: str, mapped_model: str) -> str:
         base_url = (
diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py
index eccc9ee83b..07ec82c3e3 100644
--- a/src/huggingface_hub/inference/_providers/nebius.py
+++ b/src/huggingface_hub/inference/_providers/nebius.py
@@ -30,6 +30,17 @@ class NebiusConversationalTask(BaseConversationalTask):
     def __init__(self):
         super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
 
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format", None)
+        if response_format is not None and response_format["type"] == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["guided_json"] = json_schema_details["schema"]  # type: ignore [index]
+        return payload
+
 
 class NebiusTextToImageTask(TaskProviderHelper):
     def __init__(self):
diff --git a/src/huggingface_hub/inference/_providers/sambanova.py b/src/huggingface_hub/inference/_providers/sambanova.py
index 92bc95daa4..ed96fb766c 100644
--- a/src/huggingface_hub/inference/_providers/sambanova.py
+++ b/src/huggingface_hub/inference/_providers/sambanova.py
@@ -9,6 +9,20 @@ class SambanovaConversationalTask(BaseConversationalTask):
     def __init__(self):
         super().__init__(provider="sambanova", base_url="https://api.sambanova.ai")
 
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        response_format_config = parameters.get("response_format")
+        if isinstance(response_format_config, dict):
+            if response_format_config.get("type") == "json_schema":
+                json_schema_config = response_format_config.get("json_schema", {})
+                strict = json_schema_config.get("strict")
+                if isinstance(json_schema_config, dict) and (strict is True or strict is None):
+                    json_schema_config["strict"] = False
+
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        return payload
+
 
 class SambanovaFeatureExtractionTask(TaskProviderHelper):
     def __init__(self):
diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py
index b27e332938..5b30231f06 100644
--- a/src/huggingface_hub/inference/_providers/together.py
+++ b/src/huggingface_hub/inference/_providers/together.py
@@ -51,6 +51,20 @@ class TogetherConversationalTask(BaseConversationalTask):
     def __init__(self):
         super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
 
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format", None)
+        if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["response_format"] = {  # type: ignore [index]
+                    "type": "json_object",
+                    "schema": json_schema_details["schema"],
+                }
+        return payload
+
 
 class TogetherTextToImageTask(TogetherTask):
     def __init__(self):

From b15d02d9966fd03b10cbaabc3d828c01a5afc5fa Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 14 May 2025 12:26:30 +0200
Subject: [PATCH 02/10] fix

---
 .../inference/_generated/types/chat_completion.py           | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py
index 6e53056d24..020bd57de5 100644
--- a/src/huggingface_hub/inference/_generated/types/chat_completion.py
+++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union
+from typing import Any, Dict, List, Literal, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -88,9 +88,7 @@ class ResponseFormatJSONObject(BaseInferenceType):
     type: Literal["json_object"]
 
 
-ChatCompletionInputGrammarType: TypeAlias = Union[
-    ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject
-]
+ChatCompletionInputGrammarType = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
 
 
 @dataclass_with_extra

From 638d45a7ada6cc6210ac9895116404801a091685 Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 14 May 2025 12:44:47 +0200
Subject: [PATCH 03/10] style

---
 docs/source/en/package_reference/inference_types.md    | 10 ++++++++--
 docs/source/ko/package_reference/inference_types.md    | 10 ++++++++--
 src/huggingface_hub/__init__.py                        |  4 ++++
 .../inference/_generated/types/__init__.py             |  4 ++++
 .../inference/_generated/types/chat_completion.py      |  3 ---
 5 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md
index 9bd2528fe5..94578d3a3a 100644
--- a/docs/source/en/package_reference/inference_types.md
+++ b/docs/source/en/package_reference/inference_types.md
@@ -57,8 +57,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName
 
-[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType
-
 [[autodoc]] huggingface_hub.ChatCompletionInputMessage
 
 [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk
@@ -109,6 +107,14 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
+[[autodoc]] huggingface_hub.JSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONObject
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatText
+
 
 
 ## depth_estimation
diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md
index bf9de4b8e7..2970a0ec0f 100644
--- a/docs/source/ko/package_reference/inference_types.md
+++ b/docs/source/ko/package_reference/inference_types.md
@@ -56,8 +56,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName
 
-[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType
-
 [[autodoc]] huggingface_hub.ChatCompletionInputMessage
 
 [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk
@@ -108,6 +106,14 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
+[[autodoc]] huggingface_hub.JSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONObject
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatText
+
 
 
 ## depth_estimation[[huggingface_hub.DepthEstimationInput]]
diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index 726f1a62f0..bd834a0968 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -356,6 +356,7 @@
         "ImageToTextInput",
         "ImageToTextOutput",
         "ImageToTextParameters",
+        "JSONSchema",
         "ObjectDetectionBoundingBox",
         "ObjectDetectionInput",
         "ObjectDetectionOutputElement",
@@ -365,6 +366,9 @@
         "QuestionAnsweringInputData",
         "QuestionAnsweringOutputElement",
         "QuestionAnsweringParameters",
+        "ResponseFormatJSONObject",
+        "ResponseFormatJSONSchema",
+        "ResponseFormatText",
         "SentenceSimilarityInput",
         "SentenceSimilarityInputData",
         "SummarizationInput",
diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py
index 87e5f66fd5..4bce05317a 100644
--- a/src/huggingface_hub/inference/_generated/types/__init__.py
+++ b/src/huggingface_hub/inference/_generated/types/__init__.py
@@ -51,6 +51,10 @@
     ChatCompletionStreamOutputLogprobs,
     ChatCompletionStreamOutputTopLogprob,
     ChatCompletionStreamOutputUsage,
+    JSONSchema,
+    ResponseFormatJSONObject,
+    ResponseFormatJSONSchema,
+    ResponseFormatText,
 )
 from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
 from .document_question_answering import (
diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py
index 020bd57de5..53c864c0cd 100644
--- a/src/huggingface_hub/inference/_generated/types/chat_completion.py
+++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -51,19 +51,16 @@ class JSONSchema(BaseInferenceType):
     """
     The name of the response format.
     """
-
     description: Optional[str] = None
     """
     A description of what the response format is for, used by the model to determine
     how to respond in the format.
     """
-
     schema: Optional[Dict[str, object]] = None
     """
     The schema for the response format, described as a JSON Schema object. Learn how
     to build JSON schemas [here](https://json-schema.org/).
     """
-
     strict: Optional[bool] = None
     """
     Whether to enable strict schema adherence when generating the output. If set to

From 16698de0feff4c9ccaa33ebbcf8ba45b29a70372 Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 14 May 2025 12:46:06 +0200
Subject: [PATCH 04/10] run style again

---
 src/huggingface_hub/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index bd834a0968..dfbbf63953 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -645,6 +645,7 @@
     "InferenceEndpointTimeoutError",
     "InferenceEndpointType",
     "InferenceTimeoutError",
+    "JSONSchema",
     "KerasModelHubMixin",
     "ModelCard",
     "ModelCardData",
@@ -670,6 +671,9 @@
     "RepoCard",
     "RepoUrl",
     "Repository",
+    "ResponseFormatJSONObject",
+    "ResponseFormatJSONSchema",
+    "ResponseFormatText",
     "SentenceSimilarityInput",
     "SentenceSimilarityInputData",
     "SpaceCard",
@@ -1320,6 +1324,7 @@ def __dir__():
         ImageToTextInput,  # noqa: F401
         ImageToTextOutput,  # noqa: F401
         ImageToTextParameters,  # noqa: F401
+        JSONSchema,  # noqa: F401
         ObjectDetectionBoundingBox,  # noqa: F401
         ObjectDetectionInput,  # noqa: F401
         ObjectDetectionOutputElement,  # noqa: F401
@@ -1329,6 +1334,9 @@ def __dir__():
         QuestionAnsweringInputData,  # noqa: F401
         QuestionAnsweringOutputElement,  # noqa: F401
         QuestionAnsweringParameters,  # noqa: F401
+        ResponseFormatJSONObject,  # noqa: F401
+        ResponseFormatJSONSchema,  # noqa: F401
+        ResponseFormatText,  # noqa: F401
         SentenceSimilarityInput,  # noqa: F401
         SentenceSimilarityInputData,  # noqa: F401
         SummarizationInput,  # noqa: F401

From 1c3300fb7ca4c3652d307bf2a2ad782771b02d38 Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 14 May 2025 12:52:29 +0200
Subject: [PATCH 05/10] fix tests

---
 src/huggingface_hub/inference/_providers/cohere.py       | 2 +-
 src/huggingface_hub/inference/_providers/fireworks_ai.py | 2 +-
 src/huggingface_hub/inference/_providers/hf_inference.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py
index 7bf19edc0c..48905b84e4 100644
--- a/src/huggingface_hub/inference/_providers/cohere.py
+++ b/src/huggingface_hub/inference/_providers/cohere.py
@@ -20,7 +20,7 @@ def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
-        response_format = parameters.pop("response_format")
+        response_format = parameters.pop("response_format", None)
         if response_format is not None and response_format["type"] == "json_schema":
             json_schema_details = response_format.get("json_schema")
             if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py
index 3ffec6a633..72cf285ec5 100644
--- a/src/huggingface_hub/inference/_providers/fireworks_ai.py
+++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py
@@ -16,7 +16,7 @@ def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
-        response_format = parameters.pop("response_format")
+        response_format = parameters.pop("response_format", None)
         if response_format is not None and response_format["type"] == "json_schema":
             json_schema_details = response_format.get("json_schema")
             if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py
index 4ccb976e78..511266eeba 100644
--- a/src/huggingface_hub/inference/_providers/hf_inference.py
+++ b/src/huggingface_hub/inference/_providers/hf_inference.py
@@ -103,7 +103,7 @@ def _prepare_payload_as_dict(
         if payload_model is None or payload_model.startswith(("http://", "https://")):
             payload_model = "dummy"
 
-        response_format = parameters.pop("response_format")
+        response_format = parameters.pop("response_format", None)
         if response_format is not None and response_format["type"] == "json_schema":
             payload["response_format"] = {
                 "type": "json_object",

From 720d95f166ff58f59248acdaf3e2f3e201fec506 Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 21 May 2025 16:21:22 +0200
Subject: [PATCH 06/10] rename types

---
 .../en/package_reference/inference_types.md      | 16 ++++++++--------
 .../ko/package_reference/inference_types.md      | 16 ++++++++--------
 src/huggingface_hub/__init__.py                  |  8 ++++----
 .../inference/_generated/types/__init__.py       |  8 ++++----
 .../_generated/types/chat_completion.py          | 16 ++++++++++------
 5 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md
index 94578d3a3a..1c90e9facb 100644
--- a/docs/source/en/package_reference/inference_types.md
+++ b/docs/source/en/package_reference/inference_types.md
@@ -57,10 +57,18 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName
 
+[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema
+
 [[autodoc]] huggingface_hub.ChatCompletionInputMessage
 
 [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk
 
+[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject
+
+[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema
+
+[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText
+
 [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions
 
 [[autodoc]] huggingface_hub.ChatCompletionInputTool
@@ -107,14 +115,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
-[[autodoc]] huggingface_hub.JSONSchema
-
-[[autodoc]] huggingface_hub.ResponseFormatJSONObject
-
-[[autodoc]] huggingface_hub.ResponseFormatJSONSchema
-
-[[autodoc]] huggingface_hub.ResponseFormatText
-
 
 
 ## depth_estimation
diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md
index 2970a0ec0f..3746086ed2 100644
--- a/docs/source/ko/package_reference/inference_types.md
+++ b/docs/source/ko/package_reference/inference_types.md
@@ -56,10 +56,18 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName
 
+[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema
+
 [[autodoc]] huggingface_hub.ChatCompletionInputMessage
 
 [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk
 
+[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject
+
+[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema
+
+[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText
+
 [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions
 
 [[autodoc]] huggingface_hub.ChatCompletionInputTool
@@ -106,14 +114,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
-[[autodoc]] huggingface_hub.JSONSchema
-
-[[autodoc]] huggingface_hub.ResponseFormatJSONObject
-
-[[autodoc]] huggingface_hub.ResponseFormatJSONSchema
-
-[[autodoc]] huggingface_hub.ResponseFormatText
-
 
 
 ## depth_estimation[[huggingface_hub.DepthEstimationInput]]
diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index 43112b5f06..702778e0c4 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -301,9 +301,13 @@
         "ChatCompletionInputFunctionDefinition",
         "ChatCompletionInputFunctionName",
         "ChatCompletionInputGrammarType",
+        "ChatCompletionInputJSONSchema",
         "ChatCompletionInputMessage",
         "ChatCompletionInputMessageChunk",
         "ChatCompletionInputMessageChunkType",
+        "ChatCompletionInputResponseFormatJSONObject",
+        "ChatCompletionInputResponseFormatJSONSchema",
+        "ChatCompletionInputResponseFormatText",
         "ChatCompletionInputStreamOptions",
         "ChatCompletionInputTool",
         "ChatCompletionInputToolCall",
@@ -356,7 +360,6 @@
         "ImageToTextInput",
         "ImageToTextOutput",
         "ImageToTextParameters",
-        "JSONSchema",
         "ObjectDetectionBoundingBox",
         "ObjectDetectionInput",
         "ObjectDetectionOutputElement",
@@ -366,9 +369,6 @@
         "QuestionAnsweringInputData",
         "QuestionAnsweringOutputElement",
         "QuestionAnsweringParameters",
-        "ResponseFormatJSONObject",
-        "ResponseFormatJSONSchema",
-        "ResponseFormatText",
         "SentenceSimilarityInput",
         "SentenceSimilarityInputData",
         "SummarizationInput",
diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py
index 4bce05317a..63f6a653d6 100644
--- a/src/huggingface_hub/inference/_generated/types/__init__.py
+++ b/src/huggingface_hub/inference/_generated/types/__init__.py
@@ -24,9 +24,13 @@
     ChatCompletionInputFunctionDefinition,
     ChatCompletionInputFunctionName,
     ChatCompletionInputGrammarType,
+    ChatCompletionInputJSONSchema,
     ChatCompletionInputMessage,
     ChatCompletionInputMessageChunk,
     ChatCompletionInputMessageChunkType,
+    ChatCompletionInputResponseFormatJSONObject,
+    ChatCompletionInputResponseFormatJSONSchema,
+    ChatCompletionInputResponseFormatText,
     ChatCompletionInputStreamOptions,
     ChatCompletionInputTool,
     ChatCompletionInputToolCall,
@@ -51,10 +55,6 @@
     ChatCompletionStreamOutputLogprobs,
     ChatCompletionStreamOutputTopLogprob,
     ChatCompletionStreamOutputUsage,
-    JSONSchema,
-    ResponseFormatJSONObject,
-    ResponseFormatJSONSchema,
-    ResponseFormatText,
 )
 from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
 from .document_question_answering import (
diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py
index 53c864c0cd..fe455ee710 100644
--- a/src/huggingface_hub/inference/_generated/types/chat_completion.py
+++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -46,7 +46,7 @@ class ChatCompletionInputMessage(BaseInferenceType):
 
 
 @dataclass_with_extra
-class JSONSchema(BaseInferenceType):
+class ChatCompletionInputJSONSchema(BaseInferenceType):
     name: str
     """
     The name of the response format.
@@ -70,22 +70,26 @@ class JSONSchema(BaseInferenceType):
 
 
 @dataclass_with_extra
-class ResponseFormatText(BaseInferenceType):
+class ChatCompletionInputResponseFormatText(BaseInferenceType):
     type: Literal["text"]
 
 
 @dataclass_with_extra
-class ResponseFormatJSONSchema(BaseInferenceType):
+class ChatCompletionInputResponseFormatJSONSchema(BaseInferenceType):
     type: Literal["json_schema"]
-    json_schema: JSONSchema
+    json_schema: ChatCompletionInputJSONSchema
 
 
 @dataclass_with_extra
-class ResponseFormatJSONObject(BaseInferenceType):
+class ChatCompletionInputResponseFormatJSONObject(BaseInferenceType):
     type: Literal["json_object"]
 
 
-ChatCompletionInputGrammarType = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+ChatCompletionInputGrammarType = Union[
+    ChatCompletionInputResponseFormatText,
+    ChatCompletionInputResponseFormatJSONSchema,
+    ChatCompletionInputResponseFormatJSONObject,
+]
 
 
 @dataclass_with_extra

From eaedda08d881656af13586f0c01df5228a4afcc7 Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 21 May 2025 17:13:41 +0200
Subject: [PATCH 07/10] review suggestions

---
 src/huggingface_hub/__init__.py                  | 16 ++++++++--------
 .../inference/_providers/cohere.py               |  6 ++++--
 .../inference/_providers/fireworks_ai.py         |  5 +++--
 .../inference/_providers/hf_inference.py         |  5 +++--
 .../inference/_providers/nebius.py               |  6 ++++--
 .../inference/_providers/together.py             |  4 +++-
 6 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index 702778e0c4..fe29e43e19 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -548,9 +548,13 @@
     "ChatCompletionInputFunctionDefinition",
     "ChatCompletionInputFunctionName",
     "ChatCompletionInputGrammarType",
+    "ChatCompletionInputJSONSchema",
     "ChatCompletionInputMessage",
     "ChatCompletionInputMessageChunk",
     "ChatCompletionInputMessageChunkType",
+    "ChatCompletionInputResponseFormatJSONObject",
+    "ChatCompletionInputResponseFormatJSONSchema",
+    "ChatCompletionInputResponseFormatText",
     "ChatCompletionInputStreamOptions",
     "ChatCompletionInputTool",
     "ChatCompletionInputToolCall",
@@ -648,7 +652,6 @@
     "InferenceEndpointTimeoutError",
     "InferenceEndpointType",
     "InferenceTimeoutError",
-    "JSONSchema",
     "KerasModelHubMixin",
     "MCPClient",
     "ModelCard",
@@ -675,9 +678,6 @@
     "RepoCard",
     "RepoUrl",
     "Repository",
-    "ResponseFormatJSONObject",
-    "ResponseFormatJSONSchema",
-    "ResponseFormatText",
     "SentenceSimilarityInput",
     "SentenceSimilarityInputData",
     "SpaceCard",
@@ -1273,9 +1273,13 @@ def __dir__():
         ChatCompletionInputFunctionDefinition,  # noqa: F401
         ChatCompletionInputFunctionName,  # noqa: F401
         ChatCompletionInputGrammarType,  # noqa: F401
+        ChatCompletionInputJSONSchema,  # noqa: F401
         ChatCompletionInputMessage,  # noqa: F401
         ChatCompletionInputMessageChunk,  # noqa: F401
         ChatCompletionInputMessageChunkType,  # noqa: F401
+        ChatCompletionInputResponseFormatJSONObject,  # noqa: F401
+        ChatCompletionInputResponseFormatJSONSchema,  # noqa: F401
+        ChatCompletionInputResponseFormatText,  # noqa: F401
         ChatCompletionInputStreamOptions,  # noqa: F401
         ChatCompletionInputTool,  # noqa: F401
         ChatCompletionInputToolCall,  # noqa: F401
@@ -1328,7 +1332,6 @@ def __dir__():
         ImageToTextInput,  # noqa: F401
         ImageToTextOutput,  # noqa: F401
         ImageToTextParameters,  # noqa: F401
-        JSONSchema,  # noqa: F401
         ObjectDetectionBoundingBox,  # noqa: F401
         ObjectDetectionInput,  # noqa: F401
         ObjectDetectionOutputElement,  # noqa: F401
@@ -1338,9 +1341,6 @@ def __dir__():
         QuestionAnsweringInputData,  # noqa: F401
         QuestionAnsweringOutputElement,  # noqa: F401
         QuestionAnsweringParameters,  # noqa: F401
-        ResponseFormatJSONObject,  # noqa: F401
-        ResponseFormatJSONSchema,  # noqa: F401
-        ResponseFormatText,  # noqa: F401
         SentenceSimilarityInput,  # noqa: F401
         SentenceSimilarityInputData,  # noqa: F401
         SummarizationInput,  # noqa: F401
diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py
index 48905b84e4..4916d2a640 100644
--- a/src/huggingface_hub/inference/_providers/cohere.py
+++ b/src/huggingface_hub/inference/_providers/cohere.py
@@ -20,12 +20,14 @@ def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
-        response_format = parameters.pop("response_format", None)
-        if response_format is not None and response_format["type"] == "json_schema":
+        response_format = parameters.get("response_format")
+        if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
             json_schema_details = response_format.get("json_schema")
             if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
                 payload["response_format"] = {  # type: ignore [index]
                     "type": "json_object",
                     "schema": json_schema_details["schema"],
                 }
+                # Only remove response_format from parameters if we've handled it
+                parameters.pop("response_format", None)
         return payload
diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py
index 72cf285ec5..cfc8be67f3 100644
--- a/src/huggingface_hub/inference/_providers/fireworks_ai.py
+++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py
@@ -16,12 +16,13 @@ def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
-        response_format = parameters.pop("response_format", None)
-        if response_format is not None and response_format["type"] == "json_schema":
+        response_format = parameters.get("response_format")
+        if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
             json_schema_details = response_format.get("json_schema")
             if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
                 payload["response_format"] = {  # type: ignore [index]
                     "type": "json_object",
                     "schema": json_schema_details["schema"],
                 }
+                parameters.pop("response_format", None)
         return payload
diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py
index 511266eeba..90596df05f 100644
--- a/src/huggingface_hub/inference/_providers/hf_inference.py
+++ b/src/huggingface_hub/inference/_providers/hf_inference.py
@@ -103,12 +103,13 @@ def _prepare_payload_as_dict(
         if payload_model is None or payload_model.startswith(("http://", "https://")):
             payload_model = "dummy"
 
-        response_format = parameters.pop("response_format", None)
-        if response_format is not None and response_format["type"] == "json_schema":
+        response_format = parameters.get("response_format")
+        if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
             payload["response_format"] = {
                 "type": "json_object",
                 "value": response_format["json_schema"]["schema"],
             }
+            parameters.pop("response_format", None)
         return {**payload, "model": payload_model, "messages": inputs}
 
     def _prepare_url(self, api_key: str, mapped_model: str) -> str:
diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py
index 07ec82c3e3..5a7ca72956 100644
--- a/src/huggingface_hub/inference/_providers/nebius.py
+++ b/src/huggingface_hub/inference/_providers/nebius.py
@@ -34,11 +34,13 @@ def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
-        response_format = parameters.pop("response_format", None)
-        if response_format is not None and response_format["type"] == "json_schema":
+        response_format = parameters.get("response_format")
+        if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
             json_schema_details = response_format.get("json_schema")
             if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
                 payload["guided_json"] = json_schema_details["schema"]  # type: ignore [index]
+                # Only remove response_format from parameters if we've handled it
+                parameters.pop("response_format", None)
         return payload
 
 
diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py
index 5b30231f06..e1fbdbc24b 100644
--- a/src/huggingface_hub/inference/_providers/together.py
+++ b/src/huggingface_hub/inference/_providers/together.py
@@ -55,7 +55,7 @@ def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
-        response_format = parameters.pop("response_format", None)
+        response_format = parameters.get("response_format")
         if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
             json_schema_details = response_format.get("json_schema")
             if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
@@ -63,6 +63,8 @@ def _prepare_payload_as_dict(
                     "type": "json_object",
                     "schema": json_schema_details["schema"],
                 }
+                # Only remove response_format from parameters if we've handled it
+                parameters.pop("response_format", None)
         return payload
 
 

From ce45ef0ce6bb9da55ed98f3a240f757954e3db1f Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Wed, 21 May 2025 17:51:25 +0200
Subject: [PATCH 08/10] no need to mutate parameters

---
 src/huggingface_hub/inference/_providers/cohere.py       | 3 +--
 src/huggingface_hub/inference/_providers/fireworks_ai.py | 1 -
 src/huggingface_hub/inference/_providers/hf_inference.py | 1 -
 src/huggingface_hub/inference/_providers/nebius.py       | 2 --
 src/huggingface_hub/inference/_providers/together.py     | 3 +--
 5 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py
index 4916d2a640..a5e9191cae 100644
--- a/src/huggingface_hub/inference/_providers/cohere.py
+++ b/src/huggingface_hub/inference/_providers/cohere.py
@@ -28,6 +28,5 @@ def _prepare_payload_as_dict(
                     "type": "json_object",
                     "schema": json_schema_details["schema"],
                 }
-                # Only remove response_format from parameters if we've handled it
-                parameters.pop("response_format", None)
+
         return payload
diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py
index cfc8be67f3..b4cc19a570 100644
--- a/src/huggingface_hub/inference/_providers/fireworks_ai.py
+++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py
@@ -24,5 +24,4 @@ def _prepare_payload_as_dict(
                     "type": "json_object",
                     "schema": json_schema_details["schema"],
                 }
-                parameters.pop("response_format", None)
         return payload
diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py
index 90596df05f..0b2cf1e7a3 100644
--- a/src/huggingface_hub/inference/_providers/hf_inference.py
+++ b/src/huggingface_hub/inference/_providers/hf_inference.py
@@ -109,7 +109,6 @@ def _prepare_payload_as_dict(
                 "type": "json_object",
                 "value": response_format["json_schema"]["schema"],
             }
-            parameters.pop("response_format", None)
         return {**payload, "model": payload_model, "messages": inputs}
 
     def _prepare_url(self, api_key: str, mapped_model: str) -> str:
diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py
index 5a7ca72956..85ad67c4c8 100644
--- a/src/huggingface_hub/inference/_providers/nebius.py
+++ b/src/huggingface_hub/inference/_providers/nebius.py
@@ -39,8 +39,6 @@ def _prepare_payload_as_dict(
             json_schema_details = response_format.get("json_schema")
             if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
                 payload["guided_json"] = json_schema_details["schema"]  # type: ignore [index]
-                # Only remove response_format from parameters if we've handled it
-                parameters.pop("response_format", None)
         return payload
 
 
diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py
index e1fbdbc24b..de166b7baf 100644
--- a/src/huggingface_hub/inference/_providers/together.py
+++ b/src/huggingface_hub/inference/_providers/together.py
@@ -63,8 +63,7 @@ def _prepare_payload_as_dict(
                     "type": "json_object",
                     "schema": json_schema_details["schema"],
                 }
-                # Only remove response_format from parameters if we've handled it
-                parameters.pop("response_format", None)
+
         return payload
 
 

From 5ea6a0fc35c0d03b7660301da4c6ccdd644569ad Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Thu, 22 May 2025 11:24:16 +0200
Subject: [PATCH 09/10] docs

---
 docs/source/en/guides/inference.md | 98 ++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md
index d59138ab0d..57083b85d1 100644
--- a/docs/source/en/guides/inference.md
+++ b/docs/source/en/guides/inference.md
@@ -308,6 +308,104 @@ You might wonder why using [`InferenceClient`] instead of OpenAI's client? There
 
 </Tip>
 
+## Function Calling
+
+Function calling allows LLMs to interact with external tools, such as defined functions or APIs. This enables users to easily build applications tailored to specific use cases and real-world tasks. `InferenceClient` implements the same tool calling interface as the OpenAI Chat Completions API. Here is a simple example of tool calling using [Nebius](https://nebius.com/) as the inference provider:
+
+```python
+from huggingface_hub import InferenceClient
+
+tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get current temperature for a given location.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "City and country e.g. Paris, France"
+                        }
+                    },
+                    "required": ["location"],
+                },
+            }
+        }
+]
+
+client = InferenceClient(provider="nebius")
+
+response = client.chat.completions.create(
+    model="Qwen/Qwen2.5-72B-Instruct",
+    messages=[
+    {
+        "role": "user",
+        "content": "What's the weather like the next 3 days in London, UK?"
+    }
+    ],
+    tools=tools,
+    tool_choice="auto",
+)
+
+print(response.choices[0].message.tool_calls[0].function.arguments)
+
+```
+
+## Structured Outputs & JSON Mode
+
+`InferenceClient` supports both JSON mode and Structured Outputs for controlling and validating the format of model responses. JSON mode ensures that the LLM's output is a syntactically valid JSON object. This is useful when you want the model to return machine-readable data but don't require strict adherence to a specific structure. Structured Outputs build on JSON mode by enforcing a predefined schema. This guarantees not only valid JSON but also that the output matches an expected structure, making it ideal for reliable downstream processing.
+
+We follow the OpenAI API specs for both JSON mode and Structured Outputs. You can enable them via the `response_format` argument. Here is an example of Structured Outputs using [Cerebras](https://www.cerebras.ai/) as the inference provider: 
+
+```python
+from huggingface_hub import InferenceClient
+
+json_schema = {
+    "name": "book",
+    "schema": {
+        "properties": {
+            "name": {
+                "title": "Name",
+                "type": "string",
+            },
+            "authors": {
+                "items": {"type": "string"},
+                "title": "Authors",
+                "type": "array",
+            },
+        },
+        "required": ["name", "authors"],
+        "title": "Book",
+        "type": "object",
+    },
+    "strict": True,
+}
+
+client = InferenceClient(
+    provider="cerebras",
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen3-32B",
+    messages=[
+        {"role": "system", "content": "Extract the books information."},
+        {"role": "user", "content": "I recently read 'The Great Gatsby' by F. Scott Fitzgerald."},
+    ],
+    response_format={
+        "type": "json_schema",
+        "json_schema": json_schema,
+    },
+)
+
+print(completion.choices[0].message)
+```
+<Tip>
+
+Please refer to the providers' documentation to verify which models are supported by them for Structured Outputs and Function Calling.
+
+</Tip>
 
 ## Async client
 

From edd446539f6d180222a7d6285303615d9267cd6c Mon Sep 17 00:00:00 2001
From: Celina Hanouti <hanouticelina@gmail.com>
Date: Thu, 22 May 2025 11:33:45 +0200
Subject: [PATCH 10/10] better

---
 docs/source/en/guides/inference.md | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md
index 57083b85d1..1817aa9460 100644
--- a/docs/source/en/guides/inference.md
+++ b/docs/source/en/guides/inference.md
@@ -310,7 +310,8 @@ You might wonder why using [`InferenceClient`] instead of OpenAI's client? There
 
 ## Function Calling
 
-Function calling allows LLMs to interact with external tools, such as defined functions or APIs. This enables users to easily build applications tailored to specific use cases and real-world tasks. `InferenceClient` implements the same tool calling interface as the OpenAI Chat Completions API. Here is a simple example of tool calling using [Nebius](https://nebius.com/) as the inference provider:
+Function calling allows LLMs to interact with external tools, such as defined functions or APIs. This enables users to easily build applications tailored to specific use cases and real-world tasks. 
+`InferenceClient` implements the same tool calling interface as the OpenAI Chat Completions API. Here is a simple example of tool calling using [Nebius](https://nebius.com/) as the inference provider:
 
 ```python
 from huggingface_hub import InferenceClient
@@ -353,9 +354,15 @@ print(response.choices[0].message.tool_calls[0].function.arguments)
 
 ```
 
+<Tip>
+
+Please refer to the providers' documentation to verify which models are supported by them for Function/Tool Calling.
+
+</Tip>
+
 ## Structured Outputs & JSON Mode
 
-`InferenceClient` supports both JSON mode and Structured Outputs for controlling and validating the format of model responses. JSON mode ensures that the LLM's output is a syntactically valid JSON object. This is useful when you want the model to return machine-readable data but don't require strict adherence to a specific structure. Structured Outputs build on JSON mode by enforcing a predefined schema. This guarantees not only valid JSON but also that the output matches an expected structure, making it ideal for reliable downstream processing.
+InferenceClient supports JSON mode for syntactically valid JSON responses and Structured Outputs for schema-enforced responses. JSON mode provides machine-readable data without strict structure, while Structured Outputs guarantee both valid JSON and adherence to a predefined schema for reliable downstream processing.
 
 We follow the OpenAI API specs for both JSON mode and Structured Outputs. You can enable them via the `response_format` argument. Here is an example of Structured Outputs using [Cerebras](https://www.cerebras.ai/) as the inference provider: 
 
@@ -383,9 +390,8 @@ json_schema = {
     "strict": True,
 }
 
-client = InferenceClient(
-    provider="cerebras",
-)
+client = InferenceClient(provider="cerebras")
+
 
 completion = client.chat.completions.create(
     model="Qwen/Qwen3-32B",
@@ -403,7 +409,7 @@ print(completion.choices[0].message)
 ```
 <Tip>
 
-Please refer to the providers' documentation to verify which models are supported by them for Structured Outputs and Function Calling.
+Please refer to the providers' documentation to verify which models are supported by them for Structured Outputs and JSON Mode.
 
 </Tip>