From 877f0de9d66c0e6017aade53e378557ed485629b Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 14 May 2025 12:10:47 +0200 Subject: [PATCH 01/10] fix structured output --- src/huggingface_hub/__init__.py | 3 -- .../inference/_generated/types/__init__.py | 1 - .../_generated/types/chat_completion.py | 53 +++++++++++++++---- .../inference/_providers/cerebras.py | 2 +- .../inference/_providers/cohere.py | 22 ++++++-- .../inference/_providers/fireworks_ai.py | 18 +++++++ .../inference/_providers/hf_inference.py | 9 +++- .../inference/_providers/nebius.py | 11 ++++ .../inference/_providers/sambanova.py | 14 +++++ .../inference/_providers/together.py | 14 +++++ 10 files changed, 129 insertions(+), 18 deletions(-) diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 9e9135bd44..726f1a62f0 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -301,7 +301,6 @@ "ChatCompletionInputFunctionDefinition", "ChatCompletionInputFunctionName", "ChatCompletionInputGrammarType", - "ChatCompletionInputGrammarTypeType", "ChatCompletionInputMessage", "ChatCompletionInputMessageChunk", "ChatCompletionInputMessageChunkType", @@ -542,7 +541,6 @@ "ChatCompletionInputFunctionDefinition", "ChatCompletionInputFunctionName", "ChatCompletionInputGrammarType", - "ChatCompletionInputGrammarTypeType", "ChatCompletionInputMessage", "ChatCompletionInputMessageChunk", "ChatCompletionInputMessageChunkType", @@ -1263,7 +1261,6 @@ def __dir__(): ChatCompletionInputFunctionDefinition, # noqa: F401 ChatCompletionInputFunctionName, # noqa: F401 ChatCompletionInputGrammarType, # noqa: F401 - ChatCompletionInputGrammarTypeType, # noqa: F401 ChatCompletionInputMessage, # noqa: F401 ChatCompletionInputMessageChunk, # noqa: F401 ChatCompletionInputMessageChunkType, # noqa: F401 diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py index 92f286792b..87e5f66fd5 100644 --- a/src/huggingface_hub/inference/_generated/types/__init__.py +++ b/src/huggingface_hub/inference/_generated/types/__init__.py @@ -24,7 +24,6 @@ ChatCompletionInputFunctionDefinition, ChatCompletionInputFunctionName, ChatCompletionInputGrammarType, - ChatCompletionInputGrammarTypeType, ChatCompletionInputMessage, ChatCompletionInputMessageChunk, ChatCompletionInputMessageChunkType, diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py index 9978c0a5a9..6e53056d24 100644 --- a/src/huggingface_hub/inference/_generated/types/chat_completion.py +++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py @@ -3,7 +3,7 @@ # See: # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. -from typing import Any, List, Literal, Optional, Union +from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union from .base import BaseInferenceType, dataclass_with_extra @@ -45,19 +45,54 @@ class ChatCompletionInputMessage(BaseInferenceType): tool_calls: Optional[List[ChatCompletionInputToolCall]] = None -ChatCompletionInputGrammarTypeType = Literal["json", "regex", "json_schema"] +@dataclass_with_extra +class JSONSchema(BaseInferenceType): + name: str + """ + The name of the response format. + """ + + description: Optional[str] = None + """ + A description of what the response format is for, used by the model to determine + how to respond in the format. + """ + schema: Optional[Dict[str, object]] = None + """ + The schema for the response format, described as a JSON Schema object. Learn how + to build JSON schemas [here](https://json-schema.org/). + """ -@dataclass_with_extra -class ChatCompletionInputGrammarType(BaseInferenceType): - type: "ChatCompletionInputGrammarTypeType" - value: Any - """A string that represents a [JSON Schema](https://json-schema.org/). - JSON Schema is a declarative language that allows to annotate JSON documents - with types and descriptions. + strict: Optional[bool] = None + """ + Whether to enable strict schema adherence when generating the output. If set to + true, the model will always follow the exact schema defined in the `schema` + field. """ +@dataclass_with_extra +class ResponseFormatText(BaseInferenceType): + type: Literal["text"] + + +@dataclass_with_extra +class ResponseFormatJSONSchema(BaseInferenceType): + type: Literal["json_schema"] + json_schema: JSONSchema + + +@dataclass_with_extra +class ResponseFormatJSONObject(BaseInferenceType): + type: Literal["json_object"] + + +ChatCompletionInputGrammarType: TypeAlias = Union[ + ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject +] + + @dataclass_with_extra class ChatCompletionInputStreamOptions(BaseInferenceType): include_usage: Optional[bool] = None diff --git a/src/huggingface_hub/inference/_providers/cerebras.py b/src/huggingface_hub/inference/_providers/cerebras.py index 12b1815832..a9b9c3aacb 100644 --- a/src/huggingface_hub/inference/_providers/cerebras.py +++ b/src/huggingface_hub/inference/_providers/cerebras.py @@ -1,4 +1,4 @@ -from huggingface_hub.inference._providers._common import BaseConversationalTask +from ._common import BaseConversationalTask class CerebrasConversationalTask(BaseConversationalTask): diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py index 0dc35c7e6c..7bf19edc0c 100644 --- a/src/huggingface_hub/inference/_providers/cohere.py +++ b/src/huggingface_hub/inference/_providers/cohere.py @@ -1,6 +1,8 @@ -from huggingface_hub.inference._providers._common import ( - BaseConversationalTask, -) +from typing import Any, Dict, Optional + +from huggingface_hub.hf_api import InferenceProviderMapping + +from ._common import BaseConversationalTask _PROVIDER = "cohere" @@ -13,3 +15,17 @@ def __init__(self): def _prepare_route(self, mapped_model: str, api_key: str) -> str: return "/compatibility/v1/chat/completions" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.pop("response_format") + if response_format is not None and response_format["type"] == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["response_format"] = { # type: ignore [index] + "type": "json_object", + "schema": json_schema_details["schema"], + } + return payload diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py index 9fc9aba806..3ffec6a633 100644 --- a/src/huggingface_hub/inference/_providers/fireworks_ai.py +++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py @@ -1,3 +1,7 @@ +from typing import Any, Dict, Optional + +from huggingface_hub.hf_api import InferenceProviderMapping + from ._common import BaseConversationalTask @@ -7,3 +11,17 @@ def __init__(self): def _prepare_route(self, mapped_model: str, api_key: str) -> str: return "/inference/v1/chat/completions" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.pop("response_format") + if response_format is not None and response_format["type"] == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["response_format"] = { # type: ignore [index] + "type": "json_object", + "schema": json_schema_details["schema"], + } + return payload diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py index 7923567be3..4ccb976e78 100644 --- a/src/huggingface_hub/inference/_providers/hf_inference.py +++ b/src/huggingface_hub/inference/_providers/hf_inference.py @@ -96,13 +96,20 @@ def __init__(self): def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: + payload = filter_none(parameters) mapped_model = provider_mapping_info.provider_id payload_model = parameters.get("model") or mapped_model if payload_model is None or payload_model.startswith(("http://", "https://")): payload_model = "dummy" - return {**filter_none(parameters), "model": payload_model, "messages": inputs} + response_format = parameters.pop("response_format") + if response_format is not None and response_format["type"] == "json_schema": + payload["response_format"] = { + "type": "json_object", + "value": response_format["json_schema"]["schema"], + } + return {**payload, "model": payload_model, "messages": inputs} def _prepare_url(self, api_key: str, mapped_model: str) -> str: base_url = ( diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py index eccc9ee83b..07ec82c3e3 100644 --- a/src/huggingface_hub/inference/_providers/nebius.py +++ b/src/huggingface_hub/inference/_providers/nebius.py @@ -30,6 +30,17 @@ class NebiusConversationalTask(BaseConversationalTask): def __init__(self): super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai") + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.pop("response_format", None) + if response_format is not None and response_format["type"] == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["guided_json"] = json_schema_details["schema"] # type: ignore [index] + return payload + class NebiusTextToImageTask(TaskProviderHelper): def __init__(self): diff --git a/src/huggingface_hub/inference/_providers/sambanova.py b/src/huggingface_hub/inference/_providers/sambanova.py index 92bc95daa4..ed96fb766c 100644 --- a/src/huggingface_hub/inference/_providers/sambanova.py +++ b/src/huggingface_hub/inference/_providers/sambanova.py @@ -9,6 +9,20 @@ class SambanovaConversationalTask(BaseConversationalTask): def __init__(self): super().__init__(provider="sambanova", base_url="https://api.sambanova.ai") + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + response_format_config = parameters.get("response_format") + if isinstance(response_format_config, dict): + if response_format_config.get("type") == "json_schema": + json_schema_config = response_format_config.get("json_schema", {}) + strict = json_schema_config.get("strict") + if isinstance(json_schema_config, dict) and (strict is True or strict is None): + json_schema_config["strict"] = False + + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + return payload + class SambanovaFeatureExtractionTask(TaskProviderHelper): def __init__(self): diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py index b27e332938..5b30231f06 100644 --- a/src/huggingface_hub/inference/_providers/together.py +++ b/src/huggingface_hub/inference/_providers/together.py @@ -51,6 +51,20 @@ class TogetherConversationalTask(BaseConversationalTask): def __init__(self): super().__init__(provider=_PROVIDER, base_url=_BASE_URL) + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.pop("response_format", None) + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["response_format"] = { # type: ignore [index] + "type": "json_object", + "schema": json_schema_details["schema"], + } + return payload + class TogetherTextToImageTask(TogetherTask): def __init__(self): From b15d02d9966fd03b10cbaabc3d828c01a5afc5fa Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 14 May 2025 12:26:30 +0200 Subject: [PATCH 02/10] fix --- .../inference/_generated/types/chat_completion.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py index 6e53056d24..020bd57de5 100644 --- a/src/huggingface_hub/inference/_generated/types/chat_completion.py +++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py @@ -3,7 +3,7 @@ # See: # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. -from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union +from typing import Any, Dict, List, Literal, Optional, Union from .base import BaseInferenceType, dataclass_with_extra @@ -88,9 +88,7 @@ class ResponseFormatJSONObject(BaseInferenceType): type: Literal["json_object"] -ChatCompletionInputGrammarType: TypeAlias = Union[ - ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject -] +ChatCompletionInputGrammarType = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject] @dataclass_with_extra From 638d45a7ada6cc6210ac9895116404801a091685 Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 14 May 2025 12:44:47 +0200 Subject: [PATCH 03/10] style --- docs/source/en/package_reference/inference_types.md | 10 ++++++++-- docs/source/ko/package_reference/inference_types.md | 10 ++++++++-- src/huggingface_hub/__init__.py | 4 ++++ .../inference/_generated/types/__init__.py | 4 ++++ .../inference/_generated/types/chat_completion.py | 3 --- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md index 9bd2528fe5..94578d3a3a 100644 --- a/docs/source/en/package_reference/inference_types.md +++ b/docs/source/en/package_reference/inference_types.md @@ -57,8 +57,6 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName -[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType - [[autodoc]] huggingface_hub.ChatCompletionInputMessage [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk @@ -109,6 +107,14 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage +[[autodoc]] huggingface_hub.JSONSchema + +[[autodoc]] huggingface_hub.ResponseFormatJSONObject + +[[autodoc]] huggingface_hub.ResponseFormatJSONSchema + +[[autodoc]] huggingface_hub.ResponseFormatText + ## depth_estimation diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md index bf9de4b8e7..2970a0ec0f 100644 --- a/docs/source/ko/package_reference/inference_types.md +++ b/docs/source/ko/package_reference/inference_types.md @@ -56,8 +56,6 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName -[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType - [[autodoc]] huggingface_hub.ChatCompletionInputMessage [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk @@ -108,6 +106,14 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage +[[autodoc]] huggingface_hub.JSONSchema + +[[autodoc]] huggingface_hub.ResponseFormatJSONObject + +[[autodoc]] huggingface_hub.ResponseFormatJSONSchema + +[[autodoc]] huggingface_hub.ResponseFormatText + ## depth_estimation[[huggingface_hub.DepthEstimationInput]] diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 726f1a62f0..bd834a0968 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -356,6 +356,7 @@ "ImageToTextInput", "ImageToTextOutput", "ImageToTextParameters", + "JSONSchema", "ObjectDetectionBoundingBox", "ObjectDetectionInput", "ObjectDetectionOutputElement", @@ -365,6 +366,9 @@ "QuestionAnsweringInputData", "QuestionAnsweringOutputElement", "QuestionAnsweringParameters", + "ResponseFormatJSONObject", + "ResponseFormatJSONSchema", + "ResponseFormatText", "SentenceSimilarityInput", "SentenceSimilarityInputData", "SummarizationInput", diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py index 87e5f66fd5..4bce05317a 100644 --- a/src/huggingface_hub/inference/_generated/types/__init__.py +++ b/src/huggingface_hub/inference/_generated/types/__init__.py @@ -51,6 +51,10 @@ ChatCompletionStreamOutputLogprobs, ChatCompletionStreamOutputTopLogprob, ChatCompletionStreamOutputUsage, + JSONSchema, + ResponseFormatJSONObject, + ResponseFormatJSONSchema, + ResponseFormatText, ) from .depth_estimation import DepthEstimationInput, DepthEstimationOutput from .document_question_answering import ( diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py index 020bd57de5..53c864c0cd 100644 --- a/src/huggingface_hub/inference/_generated/types/chat_completion.py +++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py @@ -51,19 +51,16 @@ class JSONSchema(BaseInferenceType): """ The name of the response format. """ - description: Optional[str] = None """ A description of what the response format is for, used by the model to determine how to respond in the format. """ - schema: Optional[Dict[str, object]] = None """ The schema for the response format, described as a JSON Schema object. Learn how to build JSON schemas [here](https://json-schema.org/). """ - strict: Optional[bool] = None """ Whether to enable strict schema adherence when generating the output. If set to From 16698de0feff4c9ccaa33ebbcf8ba45b29a70372 Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 14 May 2025 12:46:06 +0200 Subject: [PATCH 04/10] run style again --- src/huggingface_hub/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index bd834a0968..dfbbf63953 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -645,6 +645,7 @@ "InferenceEndpointTimeoutError", "InferenceEndpointType", "InferenceTimeoutError", + "JSONSchema", "KerasModelHubMixin", "ModelCard", "ModelCardData", @@ -670,6 +671,9 @@ "RepoCard", "RepoUrl", "Repository", + "ResponseFormatJSONObject", + "ResponseFormatJSONSchema", + "ResponseFormatText", "SentenceSimilarityInput", "SentenceSimilarityInputData", "SpaceCard", @@ -1320,6 +1324,7 @@ def __dir__(): ImageToTextInput, # noqa: F401 ImageToTextOutput, # noqa: F401 ImageToTextParameters, # noqa: F401 + JSONSchema, # noqa: F401 ObjectDetectionBoundingBox, # noqa: F401 ObjectDetectionInput, # noqa: F401 ObjectDetectionOutputElement, # noqa: F401 @@ -1329,6 +1334,9 @@ def __dir__(): QuestionAnsweringInputData, # noqa: F401 QuestionAnsweringOutputElement, # noqa: F401 QuestionAnsweringParameters, # noqa: F401 + ResponseFormatJSONObject, # noqa: F401 + ResponseFormatJSONSchema, # noqa: F401 + ResponseFormatText, # noqa: F401 SentenceSimilarityInput, # noqa: F401 SentenceSimilarityInputData, # noqa: F401 SummarizationInput, # noqa: F401 From 1c3300fb7ca4c3652d307bf2a2ad782771b02d38 Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 14 May 2025 12:52:29 +0200 Subject: [PATCH 05/10] fix tests --- src/huggingface_hub/inference/_providers/cohere.py | 2 +- src/huggingface_hub/inference/_providers/fireworks_ai.py | 2 +- src/huggingface_hub/inference/_providers/hf_inference.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py index 7bf19edc0c..48905b84e4 100644 --- a/src/huggingface_hub/inference/_providers/cohere.py +++ b/src/huggingface_hub/inference/_providers/cohere.py @@ -20,7 +20,7 @@ def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) - response_format = parameters.pop("response_format") + response_format = parameters.pop("response_format", None) if response_format is not None and response_format["type"] == "json_schema": json_schema_details = response_format.get("json_schema") if isinstance(json_schema_details, dict) and "schema" in json_schema_details: diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py index 3ffec6a633..72cf285ec5 100644 --- a/src/huggingface_hub/inference/_providers/fireworks_ai.py +++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py @@ -16,7 +16,7 @@ def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) - response_format = parameters.pop("response_format") + response_format = parameters.pop("response_format", None) if response_format is not None and response_format["type"] == "json_schema": json_schema_details = response_format.get("json_schema") if isinstance(json_schema_details, dict) and "schema" in json_schema_details: diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py index 4ccb976e78..511266eeba 100644 --- a/src/huggingface_hub/inference/_providers/hf_inference.py +++ b/src/huggingface_hub/inference/_providers/hf_inference.py @@ -103,7 +103,7 @@ def _prepare_payload_as_dict( if payload_model is None or payload_model.startswith(("http://", "https://")): payload_model = "dummy" - response_format = parameters.pop("response_format") + response_format = parameters.pop("response_format", None) if response_format is not None and response_format["type"] == "json_schema": payload["response_format"] = { "type": "json_object", From 720d95f166ff58f59248acdaf3e2f3e201fec506 Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 21 May 2025 16:21:22 +0200 Subject: [PATCH 06/10] rename types --- .../en/package_reference/inference_types.md | 16 ++++++++-------- .../ko/package_reference/inference_types.md | 16 ++++++++-------- src/huggingface_hub/__init__.py | 8 ++++---- .../inference/_generated/types/__init__.py | 8 ++++---- .../_generated/types/chat_completion.py | 16 ++++++++++------ 5 files changed, 34 insertions(+), 30 deletions(-) diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md index 94578d3a3a..1c90e9facb 100644 --- a/docs/source/en/package_reference/inference_types.md +++ b/docs/source/en/package_reference/inference_types.md @@ -57,10 +57,18 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName +[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema + [[autodoc]] huggingface_hub.ChatCompletionInputMessage [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText + [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions [[autodoc]] huggingface_hub.ChatCompletionInputTool @@ -107,14 +115,6 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage -[[autodoc]] huggingface_hub.JSONSchema - -[[autodoc]] huggingface_hub.ResponseFormatJSONObject - -[[autodoc]] huggingface_hub.ResponseFormatJSONSchema - -[[autodoc]] huggingface_hub.ResponseFormatText - ## depth_estimation diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md index 2970a0ec0f..3746086ed2 100644 --- a/docs/source/ko/package_reference/inference_types.md +++ b/docs/source/ko/package_reference/inference_types.md @@ -56,10 +56,18 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName +[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema + [[autodoc]] huggingface_hub.ChatCompletionInputMessage [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText + [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions [[autodoc]] huggingface_hub.ChatCompletionInputTool @@ -106,14 +114,6 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage -[[autodoc]] huggingface_hub.JSONSchema - -[[autodoc]] huggingface_hub.ResponseFormatJSONObject - -[[autodoc]] huggingface_hub.ResponseFormatJSONSchema - -[[autodoc]] huggingface_hub.ResponseFormatText - ## depth_estimation[[huggingface_hub.DepthEstimationInput]] diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 43112b5f06..702778e0c4 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -301,9 +301,13 @@ "ChatCompletionInputFunctionDefinition", "ChatCompletionInputFunctionName", "ChatCompletionInputGrammarType", + "ChatCompletionInputJSONSchema", "ChatCompletionInputMessage", "ChatCompletionInputMessageChunk", "ChatCompletionInputMessageChunkType", + "ChatCompletionInputResponseFormatJSONObject", + "ChatCompletionInputResponseFormatJSONSchema", + "ChatCompletionInputResponseFormatText", "ChatCompletionInputStreamOptions", "ChatCompletionInputTool", "ChatCompletionInputToolCall", @@ -356,7 +360,6 @@ "ImageToTextInput", "ImageToTextOutput", "ImageToTextParameters", - "JSONSchema", "ObjectDetectionBoundingBox", "ObjectDetectionInput", "ObjectDetectionOutputElement", @@ -366,9 +369,6 @@ "QuestionAnsweringInputData", "QuestionAnsweringOutputElement", "QuestionAnsweringParameters", - "ResponseFormatJSONObject", - "ResponseFormatJSONSchema", - "ResponseFormatText", "SentenceSimilarityInput", "SentenceSimilarityInputData", "SummarizationInput", diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py index 4bce05317a..63f6a653d6 100644 --- a/src/huggingface_hub/inference/_generated/types/__init__.py +++ b/src/huggingface_hub/inference/_generated/types/__init__.py @@ -24,9 +24,13 @@ ChatCompletionInputFunctionDefinition, ChatCompletionInputFunctionName, ChatCompletionInputGrammarType, + ChatCompletionInputJSONSchema, ChatCompletionInputMessage, ChatCompletionInputMessageChunk, ChatCompletionInputMessageChunkType, + ChatCompletionInputResponseFormatJSONObject, + ChatCompletionInputResponseFormatJSONSchema, + ChatCompletionInputResponseFormatText, ChatCompletionInputStreamOptions, ChatCompletionInputTool, ChatCompletionInputToolCall, @@ -51,10 +55,6 @@ ChatCompletionStreamOutputLogprobs, ChatCompletionStreamOutputTopLogprob, ChatCompletionStreamOutputUsage, - JSONSchema, - ResponseFormatJSONObject, - ResponseFormatJSONSchema, - ResponseFormatText, ) from .depth_estimation import DepthEstimationInput, DepthEstimationOutput from .document_question_answering import ( diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py index 53c864c0cd..fe455ee710 100644 --- a/src/huggingface_hub/inference/_generated/types/chat_completion.py +++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py @@ -46,7 +46,7 @@ class ChatCompletionInputMessage(BaseInferenceType): @dataclass_with_extra -class JSONSchema(BaseInferenceType): +class ChatCompletionInputJSONSchema(BaseInferenceType): name: str """ The name of the response format. @@ -70,22 +70,26 @@ class JSONSchema(BaseInferenceType): @dataclass_with_extra -class ResponseFormatText(BaseInferenceType): +class ChatCompletionInputResponseFormatText(BaseInferenceType): type: Literal["text"] @dataclass_with_extra -class ResponseFormatJSONSchema(BaseInferenceType): +class ChatCompletionInputResponseFormatJSONSchema(BaseInferenceType): type: Literal["json_schema"] - json_schema: JSONSchema + json_schema: ChatCompletionInputJSONSchema @dataclass_with_extra -class ResponseFormatJSONObject(BaseInferenceType): +class ChatCompletionInputResponseFormatJSONObject(BaseInferenceType): type: Literal["json_object"] -ChatCompletionInputGrammarType = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject] +ChatCompletionInputGrammarType = Union[ + ChatCompletionInputResponseFormatText, + ChatCompletionInputResponseFormatJSONSchema, + ChatCompletionInputResponseFormatJSONObject, +] @dataclass_with_extra From eaedda08d881656af13586f0c01df5228a4afcc7 Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 21 May 2025 17:13:41 +0200 Subject: [PATCH 07/10] review suggestions --- src/huggingface_hub/__init__.py | 16 ++++++++-------- .../inference/_providers/cohere.py | 6 ++++-- .../inference/_providers/fireworks_ai.py | 5 +++-- .../inference/_providers/hf_inference.py | 5 +++-- .../inference/_providers/nebius.py | 6 ++++-- .../inference/_providers/together.py | 4 +++- 6 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 702778e0c4..fe29e43e19 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -548,9 +548,13 @@ "ChatCompletionInputFunctionDefinition", "ChatCompletionInputFunctionName", "ChatCompletionInputGrammarType", + "ChatCompletionInputJSONSchema", "ChatCompletionInputMessage", "ChatCompletionInputMessageChunk", "ChatCompletionInputMessageChunkType", + "ChatCompletionInputResponseFormatJSONObject", + "ChatCompletionInputResponseFormatJSONSchema", + "ChatCompletionInputResponseFormatText", "ChatCompletionInputStreamOptions", "ChatCompletionInputTool", "ChatCompletionInputToolCall", @@ -648,7 +652,6 @@ "InferenceEndpointTimeoutError", "InferenceEndpointType", "InferenceTimeoutError", - "JSONSchema", "KerasModelHubMixin", "MCPClient", "ModelCard", @@ -675,9 +678,6 @@ "RepoCard", "RepoUrl", "Repository", - "ResponseFormatJSONObject", - "ResponseFormatJSONSchema", - "ResponseFormatText", "SentenceSimilarityInput", "SentenceSimilarityInputData", "SpaceCard", @@ -1273,9 +1273,13 @@ def __dir__(): ChatCompletionInputFunctionDefinition, # noqa: F401 ChatCompletionInputFunctionName, # noqa: F401 ChatCompletionInputGrammarType, # noqa: F401 + ChatCompletionInputJSONSchema, # noqa: F401 ChatCompletionInputMessage, # noqa: F401 ChatCompletionInputMessageChunk, # noqa: F401 ChatCompletionInputMessageChunkType, # noqa: F401 + ChatCompletionInputResponseFormatJSONObject, # noqa: F401 + ChatCompletionInputResponseFormatJSONSchema, # noqa: F401 + ChatCompletionInputResponseFormatText, # noqa: F401 ChatCompletionInputStreamOptions, # noqa: F401 ChatCompletionInputTool, # noqa: F401 ChatCompletionInputToolCall, # noqa: F401 @@ -1328,7 +1332,6 @@ def __dir__(): ImageToTextInput, # noqa: F401 ImageToTextOutput, # noqa: F401 ImageToTextParameters, # noqa: F401 - JSONSchema, # noqa: F401 ObjectDetectionBoundingBox, # noqa: F401 ObjectDetectionInput, # noqa: F401 ObjectDetectionOutputElement, # noqa: F401 @@ -1338,9 +1341,6 @@ def __dir__(): QuestionAnsweringInputData, # noqa: F401 QuestionAnsweringOutputElement, # noqa: F401 QuestionAnsweringParameters, # noqa: F401 - ResponseFormatJSONObject, # noqa: F401 - ResponseFormatJSONSchema, # noqa: F401 - ResponseFormatText, # noqa: F401 SentenceSimilarityInput, # noqa: F401 SentenceSimilarityInputData, # noqa: F401 SummarizationInput, # noqa: F401 diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py index 48905b84e4..4916d2a640 100644 --- a/src/huggingface_hub/inference/_providers/cohere.py +++ b/src/huggingface_hub/inference/_providers/cohere.py @@ -20,12 +20,14 @@ def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) - response_format = parameters.pop("response_format", None) - if response_format is not None and response_format["type"] == "json_schema": + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": json_schema_details = response_format.get("json_schema") if isinstance(json_schema_details, dict) and "schema" in json_schema_details: payload["response_format"] = { # type: ignore [index] "type": "json_object", "schema": json_schema_details["schema"], } + # Only remove response_format from parameters if we've handled it + parameters.pop("response_format", None) return payload diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py index 72cf285ec5..cfc8be67f3 100644 --- a/src/huggingface_hub/inference/_providers/fireworks_ai.py +++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py @@ -16,12 +16,13 @@ def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) - response_format = parameters.pop("response_format", None) - if response_format is not None and response_format["type"] == "json_schema": + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": json_schema_details = response_format.get("json_schema") if isinstance(json_schema_details, dict) and "schema" in json_schema_details: payload["response_format"] = { # type: ignore [index] "type": "json_object", "schema": json_schema_details["schema"], } + parameters.pop("response_format", None) return payload diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py index 511266eeba..90596df05f 100644 --- a/src/huggingface_hub/inference/_providers/hf_inference.py +++ b/src/huggingface_hub/inference/_providers/hf_inference.py @@ -103,12 +103,13 @@ def _prepare_payload_as_dict( if payload_model is None or payload_model.startswith(("http://", "https://")): payload_model = "dummy" - response_format = parameters.pop("response_format", None) - if response_format is not None and response_format["type"] == "json_schema": + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": payload["response_format"] = { "type": "json_object", "value": response_format["json_schema"]["schema"], } + parameters.pop("response_format", None) return {**payload, "model": payload_model, "messages": inputs} def _prepare_url(self, api_key: str, mapped_model: str) -> str: diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py index 07ec82c3e3..5a7ca72956 100644 --- a/src/huggingface_hub/inference/_providers/nebius.py +++ b/src/huggingface_hub/inference/_providers/nebius.py @@ -34,11 +34,13 @@ def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) - response_format = parameters.pop("response_format", None) - if response_format is not None and response_format["type"] == "json_schema": + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": json_schema_details = response_format.get("json_schema") if isinstance(json_schema_details, dict) and "schema" in json_schema_details: payload["guided_json"] = json_schema_details["schema"] # type: ignore [index] + # Only remove response_format from parameters if we've handled it + parameters.pop("response_format", None) return payload diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py index 5b30231f06..e1fbdbc24b 100644 --- a/src/huggingface_hub/inference/_providers/together.py +++ b/src/huggingface_hub/inference/_providers/together.py @@ -55,7 +55,7 @@ def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) - response_format = parameters.pop("response_format", None) + response_format = parameters.get("response_format") if isinstance(response_format, dict) and response_format.get("type") == "json_schema": json_schema_details = response_format.get("json_schema") if isinstance(json_schema_details, dict) and "schema" in json_schema_details: @@ -63,6 +63,8 @@ def _prepare_payload_as_dict( "type": "json_object", "schema": json_schema_details["schema"], } + # Only remove response_format from parameters if we've handled it + parameters.pop("response_format", None) return payload From ce45ef0ce6bb9da55ed98f3a240f757954e3db1f Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Wed, 21 May 2025 17:51:25 +0200 Subject: [PATCH 08/10] no need to mutate parameters --- src/huggingface_hub/inference/_providers/cohere.py | 3 +-- src/huggingface_hub/inference/_providers/fireworks_ai.py | 1 - src/huggingface_hub/inference/_providers/hf_inference.py | 1 - src/huggingface_hub/inference/_providers/nebius.py | 2 -- src/huggingface_hub/inference/_providers/together.py | 3 +-- 5 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py index 4916d2a640..a5e9191cae 100644 --- a/src/huggingface_hub/inference/_providers/cohere.py +++ b/src/huggingface_hub/inference/_providers/cohere.py @@ -28,6 +28,5 @@ def _prepare_payload_as_dict( "type": "json_object", "schema": json_schema_details["schema"], } - # Only remove response_format from parameters if we've handled it - parameters.pop("response_format", None) + return payload diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py index cfc8be67f3..b4cc19a570 100644 --- a/src/huggingface_hub/inference/_providers/fireworks_ai.py +++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py @@ -24,5 +24,4 @@ def _prepare_payload_as_dict( "type": "json_object", "schema": json_schema_details["schema"], } - parameters.pop("response_format", None) return payload diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py index 90596df05f..0b2cf1e7a3 100644 --- a/src/huggingface_hub/inference/_providers/hf_inference.py +++ b/src/huggingface_hub/inference/_providers/hf_inference.py @@ -109,7 +109,6 @@ def _prepare_payload_as_dict( "type": "json_object", "value": response_format["json_schema"]["schema"], } - parameters.pop("response_format", None) return {**payload, "model": payload_model, "messages": inputs} def _prepare_url(self, api_key: str, mapped_model: str) -> str: diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py index 5a7ca72956..85ad67c4c8 100644 --- a/src/huggingface_hub/inference/_providers/nebius.py +++ b/src/huggingface_hub/inference/_providers/nebius.py @@ -39,8 +39,6 @@ def _prepare_payload_as_dict( json_schema_details = response_format.get("json_schema") if isinstance(json_schema_details, dict) and "schema" in json_schema_details: payload["guided_json"] = json_schema_details["schema"] # type: ignore [index] - # Only remove response_format from parameters if we've handled it - parameters.pop("response_format", None) return payload diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py index e1fbdbc24b..de166b7baf 100644 --- a/src/huggingface_hub/inference/_providers/together.py +++ b/src/huggingface_hub/inference/_providers/together.py @@ -63,8 +63,7 @@ def _prepare_payload_as_dict( "type": "json_object", "schema": json_schema_details["schema"], } - # Only remove response_format from parameters if we've handled it - parameters.pop("response_format", None) + return payload From 5ea6a0fc35c0d03b7660301da4c6ccdd644569ad Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Thu, 22 May 2025 11:24:16 +0200 Subject: [PATCH 09/10] docs --- docs/source/en/guides/inference.md | 98 ++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md index d59138ab0d..57083b85d1 100644 --- a/docs/source/en/guides/inference.md +++ b/docs/source/en/guides/inference.md @@ -308,6 +308,104 @@ You might wonder why using [`InferenceClient`] instead of OpenAI's client? There +## Function Calling + +Function calling allows LLMs to interact with external tools, such as defined functions or APIs. This enables users to easily build applications tailored to specific use cases and real-world tasks. `InferenceClient` implements the same tool calling interface as the OpenAI Chat Completions API. Here is a simple example of tool calling using [Nebius](https://nebius.com/) as the inference provider: + +```python +from huggingface_hub import InferenceClient + +tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current temperature for a given location.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Paris, France" + } + }, + "required": ["location"], + }, + } + } +] + +client = InferenceClient(provider="nebius") + +response = client.chat.completions.create( + model="Qwen/Qwen2.5-72B-Instruct", + messages=[ + { + "role": "user", + "content": "What's the weather like the next 3 days in London, UK?" + } + ], + tools=tools, + tool_choice="auto", +) + +print(response.choices[0].message.tool_calls[0].function.arguments) + +``` + +## Structured Outputs & JSON Mode + +`InferenceClient` supports both JSON mode and Structured Outputs for controlling and validating the format of model responses. JSON mode ensures that the LLM's output is a syntactically valid JSON object. This is useful when you want the model to return machine-readable data but don't require strict adherence to a specific structure. Structured Outputs build on JSON mode by enforcing a predefined schema. This guarantees not only valid JSON but also that the output matches an expected structure, making it ideal for reliable downstream processing. + +We follow the OpenAI API specs for both JSON mode and Structured Outputs. You can enable them via the `response_format` argument. Here is an example of Structured Outputs using [Cerebras](https://www.cerebras.ai/) as the inference provider: + +```python +from huggingface_hub import InferenceClient + +json_schema = { + "name": "book", + "schema": { + "properties": { + "name": { + "title": "Name", + "type": "string", + }, + "authors": { + "items": {"type": "string"}, + "title": "Authors", + "type": "array", + }, + }, + "required": ["name", "authors"], + "title": "Book", + "type": "object", + }, + "strict": True, +} + +client = InferenceClient( + provider="cerebras", +) + +completion = client.chat.completions.create( + model="Qwen/Qwen3-32B", + messages=[ + {"role": "system", "content": "Extract the books information."}, + {"role": "user", "content": "I recently read 'The Great Gatsby' by F. Scott Fitzgerald."}, + ], + response_format={ + "type": "json_schema", + "json_schema": json_schema, + }, +) + +print(completion.choices[0].message) +``` + + +Please refer to the providers' documentation to verify which models are supported by them for Structured Outputs and Function Calling. + + ## Async client From edd446539f6d180222a7d6285303615d9267cd6c Mon Sep 17 00:00:00 2001 From: Celina Hanouti Date: Thu, 22 May 2025 11:33:45 +0200 Subject: [PATCH 10/10] better --- docs/source/en/guides/inference.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md index 57083b85d1..1817aa9460 100644 --- a/docs/source/en/guides/inference.md +++ b/docs/source/en/guides/inference.md @@ -310,7 +310,8 @@ You might wonder why using [`InferenceClient`] instead of OpenAI's client? There ## Function Calling -Function calling allows LLMs to interact with external tools, such as defined functions or APIs. This enables users to easily build applications tailored to specific use cases and real-world tasks. `InferenceClient` implements the same tool calling interface as the OpenAI Chat Completions API. Here is a simple example of tool calling using [Nebius](https://nebius.com/) as the inference provider: +Function calling allows LLMs to interact with external tools, such as defined functions or APIs. This enables users to easily build applications tailored to specific use cases and real-world tasks. +`InferenceClient` implements the same tool calling interface as the OpenAI Chat Completions API. Here is a simple example of tool calling using [Nebius](https://nebius.com/) as the inference provider: ```python from huggingface_hub import InferenceClient @@ -353,9 +354,15 @@ print(response.choices[0].message.tool_calls[0].function.arguments) ``` + + +Please refer to the providers' documentation to verify which models are supported by them for Function/Tool Calling. + + + ## Structured Outputs & JSON Mode -`InferenceClient` supports both JSON mode and Structured Outputs for controlling and validating the format of model responses. JSON mode ensures that the LLM's output is a syntactically valid JSON object. This is useful when you want the model to return machine-readable data but don't require strict adherence to a specific structure. Structured Outputs build on JSON mode by enforcing a predefined schema. This guarantees not only valid JSON but also that the output matches an expected structure, making it ideal for reliable downstream processing. +InferenceClient supports JSON mode for syntactically valid JSON responses and Structured Outputs for schema-enforced responses. JSON mode provides machine-readable data without strict structure, while Structured Outputs guarantee both valid JSON and adherence to a predefined schema for reliable downstream processing. We follow the OpenAI API specs for both JSON mode and Structured Outputs. You can enable them via the `response_format` argument. Here is an example of Structured Outputs using [Cerebras](https://www.cerebras.ai/) as the inference provider: @@ -383,9 +390,8 @@ json_schema = { "strict": True, } -client = InferenceClient( - provider="cerebras", -) +client = InferenceClient(provider="cerebras") + completion = client.chat.completions.create( model="Qwen/Qwen3-32B", @@ -403,7 +409,7 @@ print(completion.choices[0].message) ``` -Please refer to the providers' documentation to verify which models are supported by them for Structured Outputs and Function Calling. +Please refer to the providers' documentation to verify which models are supported by them for Structured Outputs and JSON Mode.