Fix native function calling in adapters (#8479)

chenmoneygithub · web-flow · commit c0506d18a4ba · 2025-07-05T10:23:11.000-04:00
* fix

* fix test

* better test

* fix

* fix tests

* lint fix

* better test
diff --git a/dspy/adapters/base.py b/dspy/adapters/base.py
@@ -17,8 +17,9 @@
 
 
 class Adapter:
-    def __init__(self, callbacks: list[BaseCallback] | None = None):
+    def __init__(self, callbacks: list[BaseCallback] | None = None, use_native_function_calling: bool = False):
         self.callbacks = callbacks or []
+        self.use_native_function_calling = use_native_function_calling
 
     def __init_subclass__(cls, **kwargs) -> None:
         super().__init_subclass__(**kwargs)
@@ -33,9 +34,8 @@ def _call_preprocess(
         lm_kwargs: dict[str, Any],
         signature: Type[Signature],
         inputs: dict[str, Any],
-        use_native_function_calling: bool = False,
     ) -> dict[str, Any]:
-        if use_native_function_calling:
+        if self.use_native_function_calling:
             tool_call_input_field_name = self._get_tool_call_input_field_name(signature)
             tool_call_output_field_name = self._get_tool_call_output_field_name(signature)
 
@@ -57,19 +57,23 @@ def _call_preprocess(
                 lm_kwargs["tools"] = litellm_tools
 
                 signature_for_native_function_calling = signature.delete(tool_call_output_field_name)
+                signature_for_native_function_calling = signature_for_native_function_calling.delete(
+                    tool_call_input_field_name
+                )
 
                 return signature_for_native_function_calling
 
         return signature
 
     def _call_postprocess(
         self,
-        signature: Type[Signature],
+        processed_signature: Type[Signature],
+        original_signature: Type[Signature],
         outputs: list[dict[str, Any]],
     ) -> list[dict[str, Any]]:
         values = []
 
-        tool_call_output_field_name = self._get_tool_call_output_field_name(signature)
+        tool_call_output_field_name = self._get_tool_call_output_field_name(original_signature)
 
         for output in outputs:
             output_logprobs = None
@@ -82,10 +86,14 @@ def _call_postprocess(
                 tool_calls = output.get("tool_calls")
 
             if text:
-                value = self.parse(signature, text)
+                value = self.parse(processed_signature, text)
+                for field_name in original_signature.output_fields.keys():
+                    if field_name not in value:
+                        # We need to set the field not present in the processed signature to None for consistency.
+                        value[field_name] = None
             else:
                 value = {}
-                for field_name in signature.output_fields.keys():
+                for field_name in original_signature.output_fields.keys():
                     value[field_name] = None
 
             if tool_calls and tool_call_output_field_name:
@@ -117,7 +125,7 @@ def __call__(
         inputs = self.format(processed_signature, demos, inputs)
 
         outputs = lm(messages=inputs, **lm_kwargs)
-        return self._call_postprocess(signature, outputs)
+        return self._call_postprocess(processed_signature, signature, outputs)
 
     async def acall(
         self,
@@ -131,7 +139,7 @@ async def acall(
         inputs = self.format(processed_signature, demos, inputs)
 
         outputs = await lm.acall(messages=inputs, **lm_kwargs)
-        return self._call_postprocess(signature, outputs)
+        return self._call_postprocess(processed_signature, signature, outputs)
 
     def format(
         self,
diff --git a/dspy/adapters/chat_adapter.py b/dspy/adapters/chat_adapter.py
@@ -15,7 +15,6 @@
 )
 from dspy.clients.lm import LM
 from dspy.signatures.signature import Signature
-from dspy.utils.callback import BaseCallback
 from dspy.utils.exceptions import AdapterParseError
 
 field_header_pattern = re.compile(r"\[\[ ## (\w+) ## \]\]")
@@ -27,9 +26,6 @@ class FieldInfoWithName(NamedTuple):
 
 
 class ChatAdapter(Adapter):
-    def __init__(self, callbacks: list[BaseCallback] | None = None):
-        super().__init__(callbacks)
-
     def __call__(
         self,
         lm: LM,
diff --git a/dspy/adapters/json_adapter.py b/dspy/adapters/json_adapter.py
@@ -9,6 +9,7 @@
 from pydantic.fields import FieldInfo
 
 from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName
+from dspy.adapters.types.tool import ToolCalls
 from dspy.adapters.utils import (
     format_field_value,
     get_annotation_name,
@@ -18,6 +19,7 @@
 )
 from dspy.clients.lm import LM
 from dspy.signatures.signature import Signature, SignatureMeta
+from dspy.utils.callback import BaseCallback
 from dspy.utils.exceptions import AdapterParseError
 
 logger = logging.getLogger(__name__)
@@ -37,6 +39,10 @@ def _has_open_ended_mapping(signature: SignatureMeta) -> bool:
 
 
 class JSONAdapter(ChatAdapter):
+    def __init__(self, callbacks: list[BaseCallback] | None = None, use_native_function_calling: bool = True):
+        # JSONAdapter uses native function calling by default.
+        super().__init__(callbacks=callbacks, use_native_function_calling=use_native_function_calling)
+
     def _json_adapter_call_common(self, lm, lm_kwargs, signature, demos, inputs, call_fn):
         """Common call logic to be used for both sync and async calls."""
         provider = lm.model.split("/", 1)[0] or "openai"
@@ -45,7 +51,10 @@ def _json_adapter_call_common(self, lm, lm_kwargs, signature, demos, inputs, cal
         if not params or "response_format" not in params:
             return call_fn(lm, lm_kwargs, signature, demos, inputs)
 
-        if _has_open_ended_mapping(signature):
+        has_tool_calls = any(field.annotation == ToolCalls for field in signature.output_fields.values())
+        if _has_open_ended_mapping(signature) or (not self.use_native_function_calling and has_tool_calls):
+            # We found that structured output mode doesn't work well with dspy.ToolCalls as output field.
+            # So we fall back to json mode if native function calling is disabled and ToolCalls is present.
             lm_kwargs["response_format"] = {"type": "json_object"}
             return call_fn(lm, lm_kwargs, signature, demos, inputs)
 
@@ -62,7 +71,9 @@ def __call__(
             return result
 
         try:
-            structured_output_model = _get_structured_outputs_response_format(signature)
+            structured_output_model = _get_structured_outputs_response_format(
+                signature, self.use_native_function_calling
+            )
             lm_kwargs["response_format"] = structured_output_model
             return super().__call__(lm, lm_kwargs, signature, demos, inputs)
         except Exception:
@@ -91,16 +102,6 @@ async def acall(
             lm_kwargs["response_format"] = {"type": "json_object"}
             return await super().acall(lm, lm_kwargs, signature, demos, inputs)
 
-    def _call_preprocess(
-        self,
-        lm: "LM",
-        lm_kwargs: dict[str, Any],
-        signature: Type[Signature],
-        inputs: dict[str, Any],
-        use_native_function_calling: bool = True,
-    ) -> dict[str, Any]:
-        return super()._call_preprocess(lm, lm_kwargs, signature, inputs, use_native_function_calling)
-
     def format_field_structure(self, signature: Type[Signature]) -> str:
         parts = []
         parts.append("All interactions will be structured in the following way, with the appropriate values filled in.")
@@ -206,7 +207,10 @@ def format_finetune_data(
         raise NotImplementedError
 
 
-def _get_structured_outputs_response_format(signature: SignatureMeta) -> type[pydantic.BaseModel]:
+def _get_structured_outputs_response_format(
+    signature: SignatureMeta,
+    use_native_function_calling: bool = True,
+) -> type[pydantic.BaseModel]:
     """
     Builds a Pydantic model from a DSPy signature's output_fields and ensures the generated JSON schema
     is compatible with OpenAI Structured Outputs (all objects have a "required" key listing every property,
@@ -227,6 +231,9 @@ def _get_structured_outputs_response_format(signature: SignatureMeta) -> type[py
     fields = {}
     for name, field in signature.output_fields.items():
         annotation = field.annotation
+        if use_native_function_calling and annotation == ToolCalls:
+            # Skip ToolCalls field if native function calling is enabled.
+            continue
         default = field.default if hasattr(field, "default") else ...
         fields[name] = (annotation, default)
 
diff --git a/dspy/adapters/two_step_adapter.py b/dspy/adapters/two_step_adapter.py
@@ -39,7 +39,8 @@ class TwoStepAdapter(Adapter):
     ```
     """
 
-    def __init__(self, extraction_model: LM):
+    def __init__(self, extraction_model: LM, **kwargs):
+        super().__init__(**kwargs)
         if not isinstance(extraction_model, LM):
             raise ValueError("extraction_model must be an instance of LM")
         self.extraction_model = extraction_model
diff --git a/tests/adapters/test_chat_adapter.py b/tests/adapters/test_chat_adapter.py
@@ -3,7 +3,7 @@
 
 import pydantic
 import pytest
-from litellm.utils import Choices, Message, ModelResponse
+from litellm.utils import ChatCompletionMessageToolCall, Choices, Function, Message, ModelResponse
 
 import dspy
 
@@ -422,3 +422,70 @@ async def test_chat_adapter_fallback_to_json_adapter_on_exception_async():
         # The parse should succeed
         result = await adapter.acall(lm, {}, signature, [], {"question": "What is the capital of France?"})
         assert result == [{"answer": "Paris"}]
+
+
+def test_chat_adapter_toolcalls_native_function_calling():
+    class MySignature(dspy.Signature):
+        question: str = dspy.InputField()
+        tools: list[dspy.Tool] = dspy.InputField()
+        answer: str = dspy.OutputField()
+        tool_calls: dspy.ToolCalls = dspy.OutputField()
+
+    def get_weather(city: str) -> str:
+        return f"The weather in {city} is sunny"
+
+    tools = [dspy.Tool(get_weather)]
+
+    adapter = dspy.JSONAdapter(use_native_function_calling=True)
+
+    # Case 1: Tool calls are present in the response, while content is None.
+    with mock.patch("litellm.completion") as mock_completion:
+        mock_completion.return_value = ModelResponse(
+            choices=[
+                Choices(
+                    finish_reason="tool_calls",
+                    index=0,
+                    message=Message(
+                        content=None,
+                        role="assistant",
+                        tool_calls=[
+                            ChatCompletionMessageToolCall(
+                                function=Function(arguments='{"city":"Paris"}', name="get_weather"),
+                                id="call_pQm8ajtSMxgA0nrzK2ivFmxG",
+                                type="function",
+                            )
+                        ],
+                    ),
+                ),
+            ],
+            model="openai/gpt-4o-mini",
+        )
+        result = adapter(
+            dspy.LM(model="openai/gpt-4o-mini", cache=False),
+            {},
+            MySignature,
+            [],
+            {"question": "What is the weather in Paris?", "tools": tools},
+        )
+
+        assert result[0]["tool_calls"] == dspy.ToolCalls(
+            tool_calls=[dspy.ToolCalls.ToolCall(name="get_weather", args={"city": "Paris"})]
+        )
+        # `answer` is not present, so we set it to None
+        assert result[0]["answer"] is None
+
+    # Case 2: Tool calls are not present in the response, while content is present.
+    with mock.patch("litellm.completion") as mock_completion:
+        mock_completion.return_value = ModelResponse(
+            choices=[Choices(message=Message(content="{'answer': 'Paris'}"))],
+            model="openai/gpt-4o-mini",
+        )
+        result = adapter(
+            dspy.LM(model="openai/gpt-4o-mini", cache=False),
+            {},
+            MySignature,
+            [],
+            {"question": "What is the weather in Paris?", "tools": tools},
+        )
+        assert result[0]["answer"] == "Paris"
+        assert result[0]["tool_calls"] is None
diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py
@@ -2,7 +2,7 @@
 
 import pydantic
 import pytest
-from litellm.utils import Choices, Message, ModelResponse
+from litellm.utils import ChatCompletionMessageToolCall, Choices, Function, Message, ModelResponse
 
 import dspy
 
@@ -650,3 +650,102 @@ class TestSignature(dspy.Signature):
                 await program.acall(question="Dummy question!")
 
             assert "ValueError!" in str(error.value)
+
+
+def test_json_adapter_toolcalls_native_function_calling():
+    class MySignature(dspy.Signature):
+        question: str = dspy.InputField()
+        tools: list[dspy.Tool] = dspy.InputField()
+        answer: str = dspy.OutputField()
+        tool_calls: dspy.ToolCalls = dspy.OutputField()
+
+    def get_weather(city: str) -> str:
+        return f"The weather in {city} is sunny"
+
+    tools = [dspy.Tool(get_weather)]
+
+    adapter = dspy.JSONAdapter(use_native_function_calling=True)
+
+    # Case 1: Tool calls are present in the response, while content is None.
+    with mock.patch("litellm.completion") as mock_completion:
+        mock_completion.return_value = ModelResponse(
+            choices=[
+                Choices(
+                    finish_reason="tool_calls",
+                    index=0,
+                    message=Message(
+                        content=None,
+                        role="assistant",
+                        tool_calls=[
+                            ChatCompletionMessageToolCall(
+                                function=Function(arguments='{"city":"Paris"}', name="get_weather"),
+                                id="call_pQm8ajtSMxgA0nrzK2ivFmxG",
+                                type="function",
+                            )
+                        ],
+                    ),
+                ),
+            ],
+            model="openai/gpt-4o-mini",
+        )
+        result = adapter(
+            dspy.LM(model="openai/gpt-4o-mini", cache=False),
+            {},
+            MySignature,
+            [],
+            {"question": "What is the weather in Paris?", "tools": tools},
+        )
+
+        assert result[0]["tool_calls"] == dspy.ToolCalls(
+            tool_calls=[dspy.ToolCalls.ToolCall(name="get_weather", args={"city": "Paris"})]
+        )
+        # `answer` is not present, so we set it to None
+        assert result[0]["answer"] is None
+
+    # Case 2: Tool calls are not present in the response, while content is present.
+    with mock.patch("litellm.completion") as mock_completion:
+        mock_completion.return_value = ModelResponse(
+            choices=[Choices(message=Message(content="{'answer': 'Paris'}"))],
+            model="openai/gpt-4o-mini",
+        )
+        result = adapter(
+            dspy.LM(model="openai/gpt-4o-mini", cache=False),
+            {},
+            MySignature,
+            [],
+            {"question": "What is the weather in Paris?", "tools": tools},
+        )
+        assert result[0]["answer"] == "Paris"
+        assert result[0]["tool_calls"] is None
+
+
+def test_json_adapter_toolcalls_no_native_function_calling():
+    class MySignature(dspy.Signature):
+        question: str = dspy.InputField()
+        tools: list[dspy.Tool] = dspy.InputField()
+        answer: str = dspy.OutputField()
+        tool_calls: dspy.ToolCalls = dspy.OutputField()
+
+    def get_weather(city: str) -> str:
+        return f"The weather in {city} is sunny"
+
+    tools = [dspy.Tool(get_weather)]
+
+    # Patch _get_structured_outputs_response_format to track calls
+    with mock.patch("dspy.adapters.json_adapter._get_structured_outputs_response_format") as mock_structured:
+        # Patch litellm.completion to return a dummy response
+        with mock.patch("litellm.completion") as mock_completion:
+            mock_completion.return_value = ModelResponse(
+                choices=[Choices(message=Message(content="{'answer': 'sunny', 'tool_calls': {'tool_calls': []}}"))],
+                model="openai/gpt-4o-mini",
+            )
+            adapter = dspy.JSONAdapter(use_native_function_calling=False)
+            lm = dspy.LM(model="openai/gpt-4o-mini", cache=False)
+            adapter(lm, {}, MySignature, [], {"question": "What is the weather in Tokyo?", "tools": tools})
+
+        # _get_structured_outputs_response_format is not called because without using native function calling,
+        # JSONAdapter falls back to json mode for stable quality.
+        mock_structured.assert_not_called()
+        mock_completion.assert_called_once()
+        _, call_kwargs = mock_completion.call_args
+        assert call_kwargs["response_format"] == {"type": "json_object"}