huggingface · hanouticelina · May 22, 2025 · May 14, 2025 · May 14, 2025 · May 14, 2025
diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md
@@ -57,8 +57,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName
 
-[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType
-
 [[autodoc]] huggingface_hub.ChatCompletionInputMessage
 
 [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk
@@ -109,6 +107,14 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
+[[autodoc]] huggingface_hub.JSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONObject
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatText
+
 
 
 ## depth_estimation

diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md
@@ -56,8 +56,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName
 
-[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType
-
 [[autodoc]] huggingface_hub.ChatCompletionInputMessage
 
 [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk
@@ -108,6 +106,14 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
+[[autodoc]] huggingface_hub.JSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONObject
+
+[[autodoc]] huggingface_hub.ResponseFormatJSONSchema
+
+[[autodoc]] huggingface_hub.ResponseFormatText
+
 
 
 ## depth_estimation[[huggingface_hub.DepthEstimationInput]]

diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
@@ -301,7 +301,6 @@
         "ChatCompletionInputFunctionDefinition",
         "ChatCompletionInputFunctionName",
         "ChatCompletionInputGrammarType",
-        "ChatCompletionInputGrammarTypeType",
         "ChatCompletionInputMessage",
         "ChatCompletionInputMessageChunk",
         "ChatCompletionInputMessageChunkType",
@@ -357,6 +356,7 @@
         "ImageToTextInput",
         "ImageToTextOutput",
         "ImageToTextParameters",
+        "JSONSchema",
         "ObjectDetectionBoundingBox",
         "ObjectDetectionInput",
         "ObjectDetectionOutputElement",
@@ -366,6 +366,9 @@
         "QuestionAnsweringInputData",
         "QuestionAnsweringOutputElement",
         "QuestionAnsweringParameters",
+        "ResponseFormatJSONObject",
+        "ResponseFormatJSONSchema",
+        "ResponseFormatText",
         "SentenceSimilarityInput",
         "SentenceSimilarityInputData",
         "SummarizationInput",
@@ -545,7 +548,6 @@
     "ChatCompletionInputFunctionDefinition",
     "ChatCompletionInputFunctionName",
     "ChatCompletionInputGrammarType",
-    "ChatCompletionInputGrammarTypeType",
     "ChatCompletionInputMessage",
     "ChatCompletionInputMessageChunk",
     "ChatCompletionInputMessageChunkType",
@@ -646,6 +648,7 @@
     "InferenceEndpointTimeoutError",
     "InferenceEndpointType",
     "InferenceTimeoutError",
+    "JSONSchema",
     "KerasModelHubMixin",
     "MCPClient",
     "ModelCard",
@@ -672,6 +675,9 @@
     "RepoCard",
     "RepoUrl",
     "Repository",
+    "ResponseFormatJSONObject",
+    "ResponseFormatJSONSchema",
+    "ResponseFormatText",
     "SentenceSimilarityInput",
     "SentenceSimilarityInputData",
     "SpaceCard",
@@ -1267,7 +1273,6 @@ def __dir__():
         ChatCompletionInputFunctionDefinition,  # noqa: F401
         ChatCompletionInputFunctionName,  # noqa: F401
         ChatCompletionInputGrammarType,  # noqa: F401
-        ChatCompletionInputGrammarTypeType,  # noqa: F401
         ChatCompletionInputMessage,  # noqa: F401
         ChatCompletionInputMessageChunk,  # noqa: F401
         ChatCompletionInputMessageChunkType,  # noqa: F401
@@ -1323,6 +1328,7 @@ def __dir__():
         ImageToTextInput,  # noqa: F401
         ImageToTextOutput,  # noqa: F401
         ImageToTextParameters,  # noqa: F401
+        JSONSchema,  # noqa: F401
         ObjectDetectionBoundingBox,  # noqa: F401
         ObjectDetectionInput,  # noqa: F401
         ObjectDetectionOutputElement,  # noqa: F401
@@ -1332,6 +1338,9 @@ def __dir__():
         QuestionAnsweringInputData,  # noqa: F401
         QuestionAnsweringOutputElement,  # noqa: F401
         QuestionAnsweringParameters,  # noqa: F401
+        ResponseFormatJSONObject,  # noqa: F401
+        ResponseFormatJSONSchema,  # noqa: F401
+        ResponseFormatText,  # noqa: F401
         SentenceSimilarityInput,  # noqa: F401
         SentenceSimilarityInputData,  # noqa: F401
         SummarizationInput,  # noqa: F401

diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py
@@ -24,7 +24,6 @@
     ChatCompletionInputFunctionDefinition,
     ChatCompletionInputFunctionName,
     ChatCompletionInputGrammarType,
-    ChatCompletionInputGrammarTypeType,
     ChatCompletionInputMessage,
     ChatCompletionInputMessageChunk,
     ChatCompletionInputMessageChunkType,
@@ -52,6 +51,10 @@
     ChatCompletionStreamOutputLogprobs,
     ChatCompletionStreamOutputTopLogprob,
     ChatCompletionStreamOutputUsage,
+    JSONSchema,
+    ResponseFormatJSONObject,
+    ResponseFormatJSONSchema,
+    ResponseFormatText,
 )
 from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
 from .document_question_answering import (

diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, List, Literal, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -45,17 +45,47 @@ class ChatCompletionInputMessage(BaseInferenceType):
     tool_calls: Optional[List[ChatCompletionInputToolCall]] = None
 
 
-ChatCompletionInputGrammarTypeType = Literal["json", "regex", "json_schema"]
+@dataclass_with_extra
+class JSONSchema(BaseInferenceType):
+    name: str
+    """
+    The name of the response format.
+    """
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+    schema: Optional[Dict[str, object]] = None
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field.
+    """
 
 
 @dataclass_with_extra
-class ChatCompletionInputGrammarType(BaseInferenceType):
-    type: "ChatCompletionInputGrammarTypeType"
-    value: Any
-    """A string that represents a [JSON Schema](https://json-schema.org/).
-    JSON Schema is a declarative language that allows to annotate JSON documents
-    with types and descriptions.
-    """
+class ResponseFormatText(BaseInferenceType):
+    type: Literal["text"]
+
+
+@dataclass_with_extra
+class ResponseFormatJSONSchema(BaseInferenceType):
+    type: Literal["json_schema"]
+    json_schema: JSONSchema
+
+
+@dataclass_with_extra
+class ResponseFormatJSONObject(BaseInferenceType):
+    type: Literal["json_object"]
+
+
+ChatCompletionInputGrammarType = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
 
 
 @dataclass_with_extra

diff --git a/src/huggingface_hub/inference/_providers/cerebras.py b/src/huggingface_hub/inference/_providers/cerebras.py
@@ -1,4 +1,4 @@
-from huggingface_hub.inference._providers._common import BaseConversationalTask
+from ._common import BaseConversationalTask
 
 
 class CerebrasConversationalTask(BaseConversationalTask):

diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py
@@ -1,6 +1,8 @@
-from huggingface_hub.inference._providers._common import (
-    BaseConversationalTask,
-)
+from typing import Any, Dict, Optional
+
+from huggingface_hub.hf_api import InferenceProviderMapping
+
+from ._common import BaseConversationalTask
 
 
 _PROVIDER = "cohere"
@@ -13,3 +15,17 @@ def __init__(self):
 
     def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/compatibility/v1/chat/completions"
+
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format", None)
+        if response_format is not None and response_format["type"] == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["response_format"] = {  # type: ignore [index]
+                    "type": "json_object",
+                    "schema": json_schema_details["schema"],
+                }
+        return payload
diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py
@@ -1,3 +1,7 @@
+from typing import Any, Dict, Optional
+
+from huggingface_hub.hf_api import InferenceProviderMapping
+
 from ._common import BaseConversationalTask
 
 
@@ -7,3 +11,17 @@ def __init__(self):
 
     def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/inference/v1/chat/completions"
+
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format", None)
+        if response_format is not None and response_format["type"] == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["response_format"] = {  # type: ignore [index]
+                    "type": "json_object",
+                    "schema": json_schema_details["schema"],
+                }
+        return payload
diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py
@@ -96,13 +96,20 @@ def __init__(self):
     def _prepare_payload_as_dict(
         self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
     ) -> Optional[Dict]:
+        payload = filter_none(parameters)
         mapped_model = provider_mapping_info.provider_id
         payload_model = parameters.get("model") or mapped_model
 
         if payload_model is None or payload_model.startswith(("http://", "https://")):
             payload_model = "dummy"
 
-        return {**filter_none(parameters), "model": payload_model, "messages": inputs}
+        response_format = parameters.pop("response_format", None)
+        if response_format is not None and response_format["type"] == "json_schema":
+            payload["response_format"] = {
+                "type": "json_object",
+                "value": response_format["json_schema"]["schema"],
+            }
+        return {**payload, "model": payload_model, "messages": inputs}
 
     def _prepare_url(self, api_key: str, mapped_model: str) -> str:
         base_url = (

diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py
@@ -30,6 +30,17 @@ class NebiusConversationalTask(BaseConversationalTask):
     def __init__(self):
         super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
 
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format", None)
+        if response_format is not None and response_format["type"] == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["guided_json"] = json_schema_details["schema"]  # type: ignore [index]
+        return payload
+
 
 class NebiusTextToImageTask(TaskProviderHelper):
     def __init__(self):

diff --git a/src/huggingface_hub/inference/_providers/sambanova.py b/src/huggingface_hub/inference/_providers/sambanova.py
@@ -9,6 +9,20 @@ class SambanovaConversationalTask(BaseConversationalTask):
     def __init__(self):
         super().__init__(provider="sambanova", base_url="https://api.sambanova.ai")
 
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        response_format_config = parameters.get("response_format")
+        if isinstance(response_format_config, dict):
+            if response_format_config.get("type") == "json_schema":
+                json_schema_config = response_format_config.get("json_schema", {})
+                strict = json_schema_config.get("strict")
+                if isinstance(json_schema_config, dict) and (strict is True or strict is None):
+                    json_schema_config["strict"] = False
+
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        return payload
+
 
 class SambanovaFeatureExtractionTask(TaskProviderHelper):
     def __init__(self):

diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py
@@ -51,6 +51,20 @@ class TogetherConversationalTask(BaseConversationalTask):
     def __init__(self):
         super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
 
+    def _prepare_payload_as_dict(
+        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[Dict]:
+        payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
+        response_format = parameters.pop("response_format", None)
+        if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
+            json_schema_details = response_format.get("json_schema")
+            if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
+                payload["response_format"] = {  # type: ignore [index]
+                    "type": "json_object",
+                    "schema": json_schema_details["schema"],
+                }
+        return payload
+
 
 class TogetherTextToImageTask(TogetherTask):
     def __init__(self):