diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md index d59138ab0d..1817aa9460 100644 --- a/docs/source/en/guides/inference.md +++ b/docs/source/en/guides/inference.md @@ -308,6 +308,110 @@ You might wonder why using [`InferenceClient`] instead of OpenAI's client? There +## Function Calling + +Function calling allows LLMs to interact with external tools, such as defined functions or APIs. This enables users to easily build applications tailored to specific use cases and real-world tasks. +`InferenceClient` implements the same tool calling interface as the OpenAI Chat Completions API. Here is a simple example of tool calling using [Nebius](https://nebius.com/) as the inference provider: + +```python +from huggingface_hub import InferenceClient + +tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current temperature for a given location.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Paris, France" + } + }, + "required": ["location"], + }, + } + } +] + +client = InferenceClient(provider="nebius") + +response = client.chat.completions.create( + model="Qwen/Qwen2.5-72B-Instruct", + messages=[ + { + "role": "user", + "content": "What's the weather like the next 3 days in London, UK?" + } + ], + tools=tools, + tool_choice="auto", +) + +print(response.choices[0].message.tool_calls[0].function.arguments) + +``` + + + +Please refer to the providers' documentation to verify which models are supported by them for Function/Tool Calling. + + + +## Structured Outputs & JSON Mode + +InferenceClient supports JSON mode for syntactically valid JSON responses and Structured Outputs for schema-enforced responses. JSON mode provides machine-readable data without strict structure, while Structured Outputs guarantee both valid JSON and adherence to a predefined schema for reliable downstream processing. + +We follow the OpenAI API specs for both JSON mode and Structured Outputs. You can enable them via the `response_format` argument. Here is an example of Structured Outputs using [Cerebras](https://www.cerebras.ai/) as the inference provider: + +```python +from huggingface_hub import InferenceClient + +json_schema = { + "name": "book", + "schema": { + "properties": { + "name": { + "title": "Name", + "type": "string", + }, + "authors": { + "items": {"type": "string"}, + "title": "Authors", + "type": "array", + }, + }, + "required": ["name", "authors"], + "title": "Book", + "type": "object", + }, + "strict": True, +} + +client = InferenceClient(provider="cerebras") + + +completion = client.chat.completions.create( + model="Qwen/Qwen3-32B", + messages=[ + {"role": "system", "content": "Extract the books information."}, + {"role": "user", "content": "I recently read 'The Great Gatsby' by F. Scott Fitzgerald."}, + ], + response_format={ + "type": "json_schema", + "json_schema": json_schema, + }, +) + +print(completion.choices[0].message) +``` + + +Please refer to the providers' documentation to verify which models are supported by them for Structured Outputs and JSON Mode. + + ## Async client diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md index 9bd2528fe5..1c90e9facb 100644 --- a/docs/source/en/package_reference/inference_types.md +++ b/docs/source/en/package_reference/inference_types.md @@ -57,12 +57,18 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName -[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType +[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema [[autodoc]] huggingface_hub.ChatCompletionInputMessage [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText + [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions [[autodoc]] huggingface_hub.ChatCompletionInputTool diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md index bf9de4b8e7..3746086ed2 100644 --- a/docs/source/ko/package_reference/inference_types.md +++ b/docs/source/ko/package_reference/inference_types.md @@ -56,12 +56,18 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.ChatCompletionInputFunctionName -[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType +[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema [[autodoc]] huggingface_hub.ChatCompletionInputMessage [[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema + +[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText + [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions [[autodoc]] huggingface_hub.ChatCompletionInputTool diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 8e1a99787f..fe29e43e19 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -301,10 +301,13 @@ "ChatCompletionInputFunctionDefinition", "ChatCompletionInputFunctionName", "ChatCompletionInputGrammarType", - "ChatCompletionInputGrammarTypeType", + "ChatCompletionInputJSONSchema", "ChatCompletionInputMessage", "ChatCompletionInputMessageChunk", "ChatCompletionInputMessageChunkType", + "ChatCompletionInputResponseFormatJSONObject", + "ChatCompletionInputResponseFormatJSONSchema", + "ChatCompletionInputResponseFormatText", "ChatCompletionInputStreamOptions", "ChatCompletionInputTool", "ChatCompletionInputToolCall", @@ -545,10 +548,13 @@ "ChatCompletionInputFunctionDefinition", "ChatCompletionInputFunctionName", "ChatCompletionInputGrammarType", - "ChatCompletionInputGrammarTypeType", + "ChatCompletionInputJSONSchema", "ChatCompletionInputMessage", "ChatCompletionInputMessageChunk", "ChatCompletionInputMessageChunkType", + "ChatCompletionInputResponseFormatJSONObject", + "ChatCompletionInputResponseFormatJSONSchema", + "ChatCompletionInputResponseFormatText", "ChatCompletionInputStreamOptions", "ChatCompletionInputTool", "ChatCompletionInputToolCall", @@ -1267,10 +1273,13 @@ def __dir__(): ChatCompletionInputFunctionDefinition, # noqa: F401 ChatCompletionInputFunctionName, # noqa: F401 ChatCompletionInputGrammarType, # noqa: F401 - ChatCompletionInputGrammarTypeType, # noqa: F401 + ChatCompletionInputJSONSchema, # noqa: F401 ChatCompletionInputMessage, # noqa: F401 ChatCompletionInputMessageChunk, # noqa: F401 ChatCompletionInputMessageChunkType, # noqa: F401 + ChatCompletionInputResponseFormatJSONObject, # noqa: F401 + ChatCompletionInputResponseFormatJSONSchema, # noqa: F401 + ChatCompletionInputResponseFormatText, # noqa: F401 ChatCompletionInputStreamOptions, # noqa: F401 ChatCompletionInputTool, # noqa: F401 ChatCompletionInputToolCall, # noqa: F401 diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py index 92f286792b..63f6a653d6 100644 --- a/src/huggingface_hub/inference/_generated/types/__init__.py +++ b/src/huggingface_hub/inference/_generated/types/__init__.py @@ -24,10 +24,13 @@ ChatCompletionInputFunctionDefinition, ChatCompletionInputFunctionName, ChatCompletionInputGrammarType, - ChatCompletionInputGrammarTypeType, + ChatCompletionInputJSONSchema, ChatCompletionInputMessage, ChatCompletionInputMessageChunk, ChatCompletionInputMessageChunkType, + ChatCompletionInputResponseFormatJSONObject, + ChatCompletionInputResponseFormatJSONSchema, + ChatCompletionInputResponseFormatText, ChatCompletionInputStreamOptions, ChatCompletionInputTool, ChatCompletionInputToolCall, diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py index 9978c0a5a9..fe455ee710 100644 --- a/src/huggingface_hub/inference/_generated/types/chat_completion.py +++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py @@ -3,7 +3,7 @@ # See: # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. -from typing import Any, List, Literal, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union from .base import BaseInferenceType, dataclass_with_extra @@ -45,17 +45,51 @@ class ChatCompletionInputMessage(BaseInferenceType): tool_calls: Optional[List[ChatCompletionInputToolCall]] = None -ChatCompletionInputGrammarTypeType = Literal["json", "regex", "json_schema"] +@dataclass_with_extra +class ChatCompletionInputJSONSchema(BaseInferenceType): + name: str + """ + The name of the response format. + """ + description: Optional[str] = None + """ + A description of what the response format is for, used by the model to determine + how to respond in the format. + """ + schema: Optional[Dict[str, object]] = None + """ + The schema for the response format, described as a JSON Schema object. Learn how + to build JSON schemas [here](https://json-schema.org/). + """ + strict: Optional[bool] = None + """ + Whether to enable strict schema adherence when generating the output. If set to + true, the model will always follow the exact schema defined in the `schema` + field. + """ @dataclass_with_extra -class ChatCompletionInputGrammarType(BaseInferenceType): - type: "ChatCompletionInputGrammarTypeType" - value: Any - """A string that represents a [JSON Schema](https://json-schema.org/). - JSON Schema is a declarative language that allows to annotate JSON documents - with types and descriptions. - """ +class ChatCompletionInputResponseFormatText(BaseInferenceType): + type: Literal["text"] + + +@dataclass_with_extra +class ChatCompletionInputResponseFormatJSONSchema(BaseInferenceType): + type: Literal["json_schema"] + json_schema: ChatCompletionInputJSONSchema + + +@dataclass_with_extra +class ChatCompletionInputResponseFormatJSONObject(BaseInferenceType): + type: Literal["json_object"] + + +ChatCompletionInputGrammarType = Union[ + ChatCompletionInputResponseFormatText, + ChatCompletionInputResponseFormatJSONSchema, + ChatCompletionInputResponseFormatJSONObject, +] @dataclass_with_extra diff --git a/src/huggingface_hub/inference/_providers/cerebras.py b/src/huggingface_hub/inference/_providers/cerebras.py index 12b1815832..a9b9c3aacb 100644 --- a/src/huggingface_hub/inference/_providers/cerebras.py +++ b/src/huggingface_hub/inference/_providers/cerebras.py @@ -1,4 +1,4 @@ -from huggingface_hub.inference._providers._common import BaseConversationalTask +from ._common import BaseConversationalTask class CerebrasConversationalTask(BaseConversationalTask): diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py index 0dc35c7e6c..a5e9191cae 100644 --- a/src/huggingface_hub/inference/_providers/cohere.py +++ b/src/huggingface_hub/inference/_providers/cohere.py @@ -1,6 +1,8 @@ -from huggingface_hub.inference._providers._common import ( - BaseConversationalTask, -) +from typing import Any, Dict, Optional + +from huggingface_hub.hf_api import InferenceProviderMapping + +from ._common import BaseConversationalTask _PROVIDER = "cohere" @@ -13,3 +15,18 @@ def __init__(self): def _prepare_route(self, mapped_model: str, api_key: str) -> str: return "/compatibility/v1/chat/completions" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["response_format"] = { # type: ignore [index] + "type": "json_object", + "schema": json_schema_details["schema"], + } + + return payload diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py index 9fc9aba806..b4cc19a570 100644 --- a/src/huggingface_hub/inference/_providers/fireworks_ai.py +++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py @@ -1,3 +1,7 @@ +from typing import Any, Dict, Optional + +from huggingface_hub.hf_api import InferenceProviderMapping + from ._common import BaseConversationalTask @@ -7,3 +11,17 @@ def __init__(self): def _prepare_route(self, mapped_model: str, api_key: str) -> str: return "/inference/v1/chat/completions" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["response_format"] = { # type: ignore [index] + "type": "json_object", + "schema": json_schema_details["schema"], + } + return payload diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py index 7923567be3..0b2cf1e7a3 100644 --- a/src/huggingface_hub/inference/_providers/hf_inference.py +++ b/src/huggingface_hub/inference/_providers/hf_inference.py @@ -96,13 +96,20 @@ def __init__(self): def _prepare_payload_as_dict( self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[Dict]: + payload = filter_none(parameters) mapped_model = provider_mapping_info.provider_id payload_model = parameters.get("model") or mapped_model if payload_model is None or payload_model.startswith(("http://", "https://")): payload_model = "dummy" - return {**filter_none(parameters), "model": payload_model, "messages": inputs} + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": + payload["response_format"] = { + "type": "json_object", + "value": response_format["json_schema"]["schema"], + } + return {**payload, "model": payload_model, "messages": inputs} def _prepare_url(self, api_key: str, mapped_model: str) -> str: base_url = ( diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py index eccc9ee83b..85ad67c4c8 100644 --- a/src/huggingface_hub/inference/_providers/nebius.py +++ b/src/huggingface_hub/inference/_providers/nebius.py @@ -30,6 +30,17 @@ class NebiusConversationalTask(BaseConversationalTask): def __init__(self): super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai") + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["guided_json"] = json_schema_details["schema"] # type: ignore [index] + return payload + class NebiusTextToImageTask(TaskProviderHelper): def __init__(self): diff --git a/src/huggingface_hub/inference/_providers/sambanova.py b/src/huggingface_hub/inference/_providers/sambanova.py index 92bc95daa4..ed96fb766c 100644 --- a/src/huggingface_hub/inference/_providers/sambanova.py +++ b/src/huggingface_hub/inference/_providers/sambanova.py @@ -9,6 +9,20 @@ class SambanovaConversationalTask(BaseConversationalTask): def __init__(self): super().__init__(provider="sambanova", base_url="https://api.sambanova.ai") + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + response_format_config = parameters.get("response_format") + if isinstance(response_format_config, dict): + if response_format_config.get("type") == "json_schema": + json_schema_config = response_format_config.get("json_schema", {}) + strict = json_schema_config.get("strict") + if isinstance(json_schema_config, dict) and (strict is True or strict is None): + json_schema_config["strict"] = False + + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + return payload + class SambanovaFeatureExtractionTask(TaskProviderHelper): def __init__(self): diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py index b27e332938..de166b7baf 100644 --- a/src/huggingface_hub/inference/_providers/together.py +++ b/src/huggingface_hub/inference/_providers/together.py @@ -51,6 +51,21 @@ class TogetherConversationalTask(BaseConversationalTask): def __init__(self): super().__init__(provider=_PROVIDER, base_url=_BASE_URL) + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) + response_format = parameters.get("response_format") + if isinstance(response_format, dict) and response_format.get("type") == "json_schema": + json_schema_details = response_format.get("json_schema") + if isinstance(json_schema_details, dict) and "schema" in json_schema_details: + payload["response_format"] = { # type: ignore [index] + "type": "json_object", + "schema": json_schema_details["schema"], + } + + return payload + class TogetherTextToImageTask(TogetherTask): def __init__(self):