Skip to content

Added Audio to FastMCP #1130

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/mcp/server/fastmcp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from importlib.metadata import version

from .server import Context, FastMCP
from .utilities.types import Image
from .utilities.types import Audio, Image

__version__ = version("mcp")
__all__ = ["FastMCP", "Context", "Image"]
__all__ = ["FastMCP", "Context", "Image", "Audio"]
5 changes: 4 additions & 1 deletion src/mcp/server/fastmcp/utilities/func_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from mcp.server.fastmcp.exceptions import InvalidSignature
from mcp.server.fastmcp.utilities.logging import get_logger
from mcp.server.fastmcp.utilities.types import Image
from mcp.server.fastmcp.utilities.types import Audio, Image
from mcp.types import ContentBlock, TextContent

logger = get_logger(__name__)
Expand Down Expand Up @@ -506,6 +506,9 @@ def _convert_to_content(
if isinstance(result, Image):
return [result.to_image_content()]

if isinstance(result, Audio):
return [result.to_audio_content()]

if isinstance(result, list | tuple):
return list(
chain.from_iterable(
Expand Down
51 changes: 50 additions & 1 deletion src/mcp/server/fastmcp/utilities/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import base64
from pathlib import Path

from mcp.types import ImageContent
from mcp.types import AudioContent, ImageContent


class Image:
Expand Down Expand Up @@ -52,3 +52,52 @@ def to_image_content(self) -> ImageContent:
raise ValueError("No image data available")

return ImageContent(type="image", data=data, mimeType=self._mime_type)


class Audio:
"""Helper class for returning audio from tools."""

def __init__(
self,
path: str | Path | None = None,
data: bytes | None = None,
format: str | None = None,
):
if path is None and data is None:
raise ValueError("Either path or data must be provided")
if path is not None and data is not None:
raise ValueError("Only one of path or data can be provided")

self.path = Path(path) if path else None
self.data = data
self._format = format
self._mime_type = self._get_mime_type()

def _get_mime_type(self) -> str:
"""Get MIME type from format or guess from file extension."""
if self._format:
return f"audio/{self._format.lower()}"

if self.path:
suffix = self.path.suffix.lower()
return {
".wav": "audio/wav",
".mp3": "audio/mpeg",
".ogg": "audio/ogg",
".flac": "audio/flac",
".aac": "audio/aac",
".m4a": "audio/mp4",
}.get(suffix, "application/octet-stream")
return "audio/wav" # default for raw binary data

def to_audio_content(self) -> AudioContent:
"""Convert to MCP AudioContent."""
if self.path:
with open(self.path, "rb") as f:
data = base64.b64encode(f.read()).decode()
elif self.data is not None:
data = base64.b64encode(self.data).decode()
else:
raise ValueError("No audio data available")

return AudioContent(type="audio", data=data, mimeType=self._mime_type)
68 changes: 67 additions & 1 deletion tests/server/fastmcp/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from mcp.server.fastmcp import Context, FastMCP
from mcp.server.fastmcp.prompts.base import Message, UserMessage
from mcp.server.fastmcp.resources import FileResource, FunctionResource
from mcp.server.fastmcp.utilities.types import Image
from mcp.server.fastmcp.utilities.types import Audio, Image
from mcp.shared.exceptions import McpError
from mcp.shared.memory import (
create_connected_server_and_client_session as client_session,
Expand Down Expand Up @@ -194,6 +194,10 @@ def image_tool_fn(path: str) -> Image:
return Image(path)


def audio_tool_fn(path: str) -> Audio:
return Audio(path)


def mixed_content_tool_fn() -> list[ContentBlock]:
return [
TextContent(type="text", text="Hello"),
Expand Down Expand Up @@ -299,6 +303,27 @@ async def test_tool_image_helper(self, tmp_path: Path):
# Check structured content - Image return type should NOT have structured output
assert result.structuredContent is None

@pytest.mark.anyio
async def test_tool_audio_helper(self, tmp_path: Path):
# Create a test audio
audio_path = tmp_path / "test.wav"
audio_path.write_bytes(b"fake wav data")

mcp = FastMCP()
mcp.add_tool(audio_tool_fn)
async with client_session(mcp._mcp_server) as client:
result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)})
assert len(result.content) == 1
content = result.content[0]
assert isinstance(content, AudioContent)
assert content.type == "audio"
assert content.mimeType == "audio/wav"
# Verify base64 encoding
decoded = base64.b64decode(content.data)
assert decoded == b"fake wav data"
# Check structured content - Image return type should NOT have structured output
assert result.structuredContent is None

@pytest.mark.anyio
async def test_tool_mixed_content(self):
mcp = FastMCP()
Expand Down Expand Up @@ -371,6 +396,47 @@ def mixed_list_fn() -> list:
# Check structured content - untyped list with Image objects should NOT have structured output
assert result.structuredContent is None

@pytest.mark.anyio
async def test_tool_mixed_list_with_audio(self, tmp_path: Path):
"""Test that lists containing Audio objects and other types are handled
correctly"""
# Create a test audio
audio_path = tmp_path / "test.wav"
audio_path.write_bytes(b"test audio data")

def mixed_list_fn() -> list:
return [
"text message",
Audio(audio_path),
{"key": "value"},
TextContent(type="text", text="direct content"),
]

mcp = FastMCP()
mcp.add_tool(mixed_list_fn)
async with client_session(mcp._mcp_server) as client:
result = await client.call_tool("mixed_list_fn", {})
assert len(result.content) == 4
# Check text conversion
content1 = result.content[0]
assert isinstance(content1, TextContent)
assert content1.text == "text message"
# Check audio conversion
content2 = result.content[1]
assert isinstance(content2, AudioContent)
assert content2.mimeType == "audio/wav"
assert base64.b64decode(content2.data) == b"test audio data"
# Check dict conversion
content3 = result.content[2]
assert isinstance(content3, TextContent)
assert '"key": "value"' in content3.text
# Check direct TextContent
content4 = result.content[3]
assert isinstance(content4, TextContent)
assert content4.text == "direct content"
# Check structured content - untyped list with Audio objects should NOT have structured output
assert result.structuredContent is None

@pytest.mark.anyio
async def test_tool_structured_output_basemodel(self):
"""Test tool with structured output returning BaseModel"""
Expand Down
Loading