From 33eac4406659a3efad82bffe3ff6204d52b11609 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Mon, 21 Jul 2025 18:36:33 +0200 Subject: [PATCH] Add tools to config --- .gitignore | 5 +- examples/tool_filtering_demo/README.md | 55 ++++++++++++++++ examples/tool_filtering_demo/agent.json | 14 +++++ .../inference/_mcp/mcp_client.py | 63 ++++++++++++++++++- src/huggingface_hub/inference/_mcp/types.py | 8 +++ tiny_agents.md | 46 ++++++++++++++ 6 files changed, 188 insertions(+), 3 deletions(-) create mode 100644 examples/tool_filtering_demo/README.md create mode 100644 examples/tool_filtering_demo/agent.json create mode 100644 tiny_agents.md diff --git a/.gitignore b/.gitignore index 705fdcc38b..f989a75311 100644 --- a/.gitignore +++ b/.gitignore @@ -139,4 +139,7 @@ dmypy.json # Spell checker config cspell.json -tmp* \ No newline at end of file +tmp* + +# Claude Code +CLAUDE.md \ No newline at end of file diff --git a/examples/tool_filtering_demo/README.md b/examples/tool_filtering_demo/README.md new file mode 100644 index 0000000000..c4c15f9ccd --- /dev/null +++ b/examples/tool_filtering_demo/README.md @@ -0,0 +1,55 @@ +# Tool Filtering Demo + +This example demonstrates the new tool filtering feature for tiny agents. + +## Configuration + +The `agent.json` shows how to filter tools from MCP servers: + +```json +{ + "servers": [ + { + "type": "stdio", + "command": "npx", + "args": ["@playwright/mcp@latest"], + "tools": { + "include": ["browser_click", "browser_close"] + } + } + ] +} +``` + +## Tool Filtering Options + +### Include only specific tools +```json +"tools": { + "include": ["tool1", "tool2", "tool3"] +} +``` + +### Exclude specific tools +```json +"tools": { + "exclude": ["unwanted_tool1", "unwanted_tool2"] +} +``` + +### Combine both (exclude takes precedence) +```json +"tools": { + "include": ["tool1", "tool2", "tool3"], + "exclude": ["tool2"] +} +``` +Result: Only `tool1` and `tool3` will be available. + +## Running the Example + +```bash +tiny-agents run examples/tool_filtering_demo +``` + +This agent will have access to only the `browser_click` and `browser_close` tools from Playwright, instead of all 30+ tools that Playwright provides by default. \ No newline at end of file diff --git a/examples/tool_filtering_demo/agent.json b/examples/tool_filtering_demo/agent.json new file mode 100644 index 0000000000..db4ba88cb2 --- /dev/null +++ b/examples/tool_filtering_demo/agent.json @@ -0,0 +1,14 @@ +{ + "model": "meta-llama/Meta-Llama-3-8B-Instruct", + "provider": "auto", + "servers": [ + { + "type": "stdio", + "command": "npx", + "args": ["@playwright/mcp@latest"], + "tools": { + "include": ["browser_click", "browser_close"] + } + } + ] +} \ No newline at end of file diff --git a/src/huggingface_hub/inference/_mcp/mcp_client.py b/src/huggingface_hub/inference/_mcp/mcp_client.py index 2712dea121..a5d22c97f6 100644 --- a/src/huggingface_hub/inference/_mcp/mcp_client.py +++ b/src/huggingface_hub/inference/_mcp/mcp_client.py @@ -139,21 +139,27 @@ async def add_mcp_server(self, type: ServerType, **params: Any): - args (List[str], optional): Arguments for the command - env (Dict[str, str], optional): Environment variables for the command - cwd (Union[str, Path, None], optional): Working directory for the command + - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists - For SSE servers: - url (str): The URL of the SSE server - headers (Dict[str, Any], optional): Headers for the SSE connection - timeout (float, optional): Connection timeout - sse_read_timeout (float, optional): SSE read timeout + - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists - For StreamableHTTP servers: - url (str): The URL of the StreamableHTTP server - headers (Dict[str, Any], optional): Headers for the StreamableHTTP connection - timeout (timedelta, optional): Connection timeout - sse_read_timeout (timedelta, optional): SSE read timeout - terminate_on_close (bool, optional): Whether to terminate on close + - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists """ from mcp import ClientSession, StdioServerParameters from mcp import types as mcp_types + # Extract tools configuration if provided + tools_config = params.pop("tools", None) + # Determine server type and create appropriate parameters if type == "stdio": # Handle stdio server @@ -209,9 +215,18 @@ async def add_mcp_server(self, type: ServerType, **params: Any): # List available tools response = await session.list_tools() - logger.debug("Connected to server with tools:", [tool.name for tool in response.tools]) + all_tool_names = [tool.name for tool in response.tools] + logger.debug("Connected to server with tools:", all_tool_names) + + # Filter tools based on configuration + filtered_tools = self._filter_tools(response.tools, tools_config, all_tool_names) + + if tools_config: + logger.info( + f"Tool filtering applied. Using {len(filtered_tools)} of {len(response.tools)} available tools: {[tool.name for tool in filtered_tools]}" + ) - for tool in response.tools: + for tool in filtered_tools: if tool.name in self.sessions: logger.warning(f"Tool '{tool.name}' already defined by another server. Skipping.") continue @@ -233,6 +248,50 @@ async def add_mcp_server(self, type: ServerType, **params: Any): ) ) + def _filter_tools( + self, tools: List[Any], tools_config: Optional[Dict[str, Any]], all_tool_names: List[str] + ) -> List[Any]: + """Filter tools based on include/exclude configuration. + + Args: + tools: List of MCP tool objects + tools_config: Optional tools configuration dict with 'include' and/or 'exclude' keys + all_tool_names: List of all available tool names for validation + + Returns: + Filtered list of tools + """ + if not tools_config: + return tools + + include_list = tools_config.get("include") + exclude_list = tools_config.get("exclude") + + # Validate that specified tools exist + if include_list: + missing_tools = set(include_list) - set(all_tool_names) + if missing_tools: + logger.warning(f"Tools specified in 'include' list not found on server: {list(missing_tools)}") + + if exclude_list: + missing_tools = set(exclude_list) - set(all_tool_names) + if missing_tools: + logger.warning(f"Tools specified in 'exclude' list not found on server: {list(missing_tools)}") + + filtered_tools = [] + for tool in tools: + # If include list is specified, only include tools in that list + if include_list and tool.name not in include_list: + continue + + # If exclude list is specified, exclude tools in that list + if exclude_list and tool.name in exclude_list: + continue + + filtered_tools.append(tool) + + return filtered_tools + async def process_single_turn_with_tools( self, messages: List[Union[Dict, ChatCompletionInputMessage]], diff --git a/src/huggingface_hub/inference/_mcp/types.py b/src/huggingface_hub/inference/_mcp/types.py index cfb5e0eac9..7305b0a56b 100644 --- a/src/huggingface_hub/inference/_mcp/types.py +++ b/src/huggingface_hub/inference/_mcp/types.py @@ -10,24 +10,32 @@ class InputConfig(TypedDict, total=False): password: bool +class ToolsConfig(TypedDict, total=False): + include: NotRequired[List[str]] + exclude: NotRequired[List[str]] + + class StdioServerConfig(TypedDict): type: Literal["stdio"] command: str args: List[str] env: Dict[str, str] cwd: str + tools: NotRequired[ToolsConfig] class HTTPServerConfig(TypedDict): type: Literal["http"] url: str headers: Dict[str, str] + tools: NotRequired[ToolsConfig] class SSEServerConfig(TypedDict): type: Literal["sse"] url: str headers: Dict[str, str] + tools: NotRequired[ToolsConfig] ServerConfig = Union[StdioServerConfig, HTTPServerConfig, SSEServerConfig] diff --git a/tiny_agents.md b/tiny_agents.md new file mode 100644 index 0000000000..00fe9b9e84 --- /dev/null +++ b/tiny_agents.md @@ -0,0 +1,46 @@ +Tiny agents (https://huggingface.co/blog/python-tiny-agents) is a minimalistic framework for running AI agents. When running a tiny agent with `huggingface_hub` using the `tiny-agents run agent` command, the command will look for an `agent.json` file which defines the configuration of the agent. Each agent is defined by an LLM (powered by Hugging Face Inference Providers which is similar to the OpenAI API) as well as a set of MCP servers, whose tools will be provided to the LLM. Currently, one can just add certain MCP servers to the config, such as the one below: + +```json +{ + "model": "Qwen/Qwen2.5-72B-Instruct", + "provider": "nebius", + "inputs": [ + { + "type": "promptString", + "id": "github-personal-access-token", + "description": "Github Personal Access Token (read-only)", + "password": true + } + ], + "servers": [ + { + "type": "stdio", + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "-e", + "GITHUB_PERSONAL_ACCESS_TOKEN", + "-e", + "GITHUB_TOOLSETS=repos,issues,pull_requests", + "ghcr.io/github/github-mcp-server" + ], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "${input:github-personal-access-token}" + } + }, + { + "type": "stdio", + "command": "npx", + "args": [ + "@playwright/mcp@latest" + ] + } + ] +} +``` + +However it would be nice to have a feature that allows users to define which tools to enable/disable in the config JSON file. For example, for the Playwright MCP server (which by default has more than 30 tools), I actually only need the `browser_click` and `browser_close` tools. Enabling only a handful of tools makes AI agents much more reliable. + +Would you be able to implement this feature? \ No newline at end of file