Stop double counting retries and reset on success

DouweM · DouweM · commit 84cd9547a14e · 2025-06-27T04:16:10.000Z
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -12,7 +12,6 @@
 from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Union, cast
 
 from opentelemetry.trace import Tracer
-from pydantic import ValidationError
 from typing_extensions import TypeGuard, TypeVar, assert_never
 
 from pydantic_ai._function_schema import _takes_ctx as is_takes_ctx  # type: ignore
@@ -610,7 +609,11 @@ async def process_function_tools(  # noqa: C901
         else:
             try:
                 result_data = await _call_tool(toolset, call, run_context)
+            except exceptions.UnexpectedModelBehavior as e:
+                ctx.state.increment_retries(ctx.deps.max_result_retries, e)
+                raise e
             except _output.ToolRetryError as e:
+                ctx.state.increment_retries(ctx.deps.max_result_retries, e)
                 yield _messages.FunctionToolCallEvent(call)
                 parts.append(e.tool_retry)
                 yield _messages.FunctionToolResultEvent(e.tool_retry, tool_call_id=call.tool_call_id)
@@ -792,26 +795,8 @@ async def _call_tool(
     toolset: AbstractToolset[DepsT], tool_call: _messages.ToolCallPart, run_context: RunContext[DepsT]
 ) -> Any:
     run_context = dataclasses.replace(run_context, tool_call_id=tool_call.tool_call_id)
-
-    try:
-        args_dict = toolset.validate_tool_args(run_context, tool_call.tool_name, tool_call.args)
-        response_content = await toolset.call_tool(run_context, tool_call.tool_name, args_dict)
-    except (ValidationError, exceptions.ModelRetry) as e:
-        if isinstance(e, ValidationError):
-            m = _messages.RetryPromptPart(
-                tool_name=tool_call.tool_name,
-                content=e.errors(include_url=False, include_context=False),
-                tool_call_id=tool_call.tool_call_id,
-            )
-        else:
-            m = _messages.RetryPromptPart(
-                tool_name=tool_call.tool_name,
-                content=e.message,
-                tool_call_id=tool_call.tool_call_id,
-            )
-        raise _output.ToolRetryError(m)
-
-    return response_content
+    args_dict = toolset.validate_tool_args(run_context, tool_call.tool_name, tool_call.args)
+    return await toolset.call_tool(run_context, tool_call.tool_name, args_dict)
 
 
 async def _validate_output(
diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -364,6 +364,7 @@ def __init__(
         self._function_toolset = FunctionToolset[AgentDepsT](tools, max_retries=retries)
 
         # This will raise errors for any name conflicts
+        # TODO: Also include toolsets (not mcp_serves as we won't have tool defs yet)
         CombinedToolset[AgentDepsT]([self._output_toolset, self._function_toolset])
 
         # TODO: Set max_retries on MCPServer
diff --git a/pydantic_ai_slim/pydantic_ai/mcp.py b/pydantic_ai_slim/pydantic_ai/mcp.py
@@ -182,14 +182,14 @@ async def list_tool_defs(self) -> list[ToolDefinition]:
             for mcp_tool in mcp_tools
         ]
 
-    def get_tool_args_validator(self, ctx: RunContext[Any], name: str) -> pydantic_core.SchemaValidator:
+    def _get_tool_args_validator(self, ctx: RunContext[Any], name: str) -> pydantic_core.SchemaValidator:
         return pydantic_core.SchemaValidator(
             schema=pydantic_core.core_schema.dict_schema(
                 pydantic_core.core_schema.str_schema(), pydantic_core.core_schema.any_schema()
             )
         )
 
-    def max_retries_for_tool(self, name: str) -> int:
+    def _max_retries_for_tool(self, name: str) -> int:
         return 1
 
     def set_mcp_sampling_model(self, model: models.Model) -> None:
diff --git a/pydantic_ai_slim/pydantic_ai/toolset.py b/pydantic_ai_slim/pydantic_ai/toolset.py
@@ -2,19 +2,20 @@
 
 import asyncio
 from abc import ABC, abstractmethod
-from collections.abc import Awaitable, Sequence
-from contextlib import AsyncExitStack
+from collections.abc import Awaitable, Iterator, Sequence
+from contextlib import AsyncExitStack, contextmanager
 from dataclasses import dataclass, field, replace
 from functools import partial
 from types import TracebackType
-from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Protocol, overload
+from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Protocol, assert_never, overload
 
 from pydantic import ValidationError
 from pydantic.json_schema import GenerateJsonSchema
 from pydantic_core import SchemaValidator
-from typing_extensions import Never, Self
+from typing_extensions import Self
 
-from ._output import BaseOutputSchema, OutputValidator
+from . import messages as _messages
+from ._output import BaseOutputSchema, OutputValidator, ToolRetryError
 from ._run_context import AgentDepsT, RunContext
 from .exceptions import ModelRetry, UnexpectedModelBehavior, UserError
 from .tools import (
@@ -70,21 +71,21 @@ def tool_names(self) -> list[str]:
         return [tool_def.name for tool_def in self.tool_defs]
 
     @abstractmethod
-    def get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
+    def _get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
         raise NotImplementedError()
 
     def validate_tool_args(
         self, ctx: RunContext[AgentDepsT], name: str, args: str | dict[str, Any] | None, allow_partial: bool = False
     ) -> dict[str, Any]:
         pyd_allow_partial: Literal['off', 'trailing-strings'] = 'trailing-strings' if allow_partial else 'off'
-        validator = self.get_tool_args_validator(ctx, name)
+        validator = self._get_tool_args_validator(ctx, name)
         if isinstance(args, str):
             return validator.validate_json(args or '{}', allow_partial=pyd_allow_partial)
         else:
             return validator.validate_python(args or {}, allow_partial=pyd_allow_partial)
 
     @abstractmethod
-    def max_retries_for_tool(self, name: str) -> int:
+    def _max_retries_for_tool(self, name: str) -> int:
         raise NotImplementedError()
 
     @abstractmethod
@@ -273,10 +274,10 @@ async def _prepare_tool_def(self, ctx: RunContext[AgentDepsT], tool_def: ToolDef
     def tool_defs(self) -> list[ToolDefinition]:
         return [tool.tool_def for tool in self.tools.values()]
 
-    def get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
+    def _get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
         return self.tools[name].function_schema.validator
 
-    def max_retries_for_tool(self, name: str) -> int:
+    def _max_retries_for_tool(self, name: str) -> int:
         tool = self.tools[name]
         return tool.max_retries if tool.max_retries is not None else self.max_retries
 
@@ -298,10 +299,10 @@ class OutputToolset(AbstractToolset[AgentDepsT]):
     def tool_defs(self) -> list[ToolDefinition]:
         return [tool.tool_def for tool in self.output_schema.tools.values()]
 
-    def get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
+    def _get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
         return self.output_schema.tools[name].processor.validator
 
-    def max_retries_for_tool(self, name: str) -> int:
+    def _max_retries_for_tool(self, name: str) -> int:
         return self.max_retries
 
     async def call_tool(
@@ -365,16 +366,16 @@ def tool_defs(self) -> list[ToolDefinition]:
     def tool_names(self) -> list[str]:
         return list(self._toolset_per_tool_name.keys())
 
-    def get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
-        return self._toolset_for_tool_name(name).get_tool_args_validator(ctx, name)
+    def _get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
+        return self._toolset_for_tool_name(name)._get_tool_args_validator(ctx, name)
 
     def validate_tool_args(
         self, ctx: RunContext[AgentDepsT], name: str, args: str | dict[str, Any] | None, allow_partial: bool = False
     ) -> dict[str, Any]:
         return self._toolset_for_tool_name(name).validate_tool_args(ctx, name, args, allow_partial)
 
-    def max_retries_for_tool(self, name: str) -> int:
-        return self._toolset_for_tool_name(name).max_retries_for_tool(name)
+    def _max_retries_for_tool(self, name: str) -> int:
+        return self._toolset_for_tool_name(name)._max_retries_for_tool(name)
 
     async def call_tool(
         self, ctx: RunContext[AgentDepsT], name: str, tool_args: dict[str, Any], *args: Any, **kwargs: Any
@@ -419,11 +420,11 @@ async def __aexit__(
     def tool_defs(self) -> list[ToolDefinition]:
         return self.wrapped.tool_defs
 
-    def get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
-        return self.wrapped.get_tool_args_validator(ctx, name)
+    def _get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
+        return self.wrapped._get_tool_args_validator(ctx, name)
 
-    def max_retries_for_tool(self, name: str) -> int:
-        return self.wrapped.max_retries_for_tool(name)
+    def _max_retries_for_tool(self, name: str) -> int:
+        return self.wrapped._max_retries_for_tool(name)
 
     async def call_tool(
         self, ctx: RunContext[AgentDepsT], name: str, tool_args: dict[str, Any], *args: Any, **kwargs: Any
@@ -452,11 +453,11 @@ async def prepare_for_run(self, ctx: RunContext[AgentDepsT]) -> RunToolset[Agent
     def tool_defs(self) -> list[ToolDefinition]:
         return [replace(tool_def, name=self._prefixed_tool_name(tool_def.name)) for tool_def in super().tool_defs]
 
-    def get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
-        return super().get_tool_args_validator(ctx, self._unprefixed_tool_name(name))
+    def _get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
+        return super()._get_tool_args_validator(ctx, self._unprefixed_tool_name(name))
 
-    def max_retries_for_tool(self, name: str) -> int:
-        return super().max_retries_for_tool(self._unprefixed_tool_name(name))
+    def _max_retries_for_tool(self, name: str) -> int:
+        return super()._max_retries_for_tool(self._unprefixed_tool_name(name))
 
     async def call_tool(
         self, ctx: RunContext[AgentDepsT], name: str, tool_args: dict[str, Any], *args: Any, **kwargs: Any
@@ -519,11 +520,11 @@ async def prepare_for_run(self, ctx: RunContext[AgentDepsT]) -> RunToolset[Agent
     def tool_defs(self) -> list[ToolDefinition]:
         return self._tool_defs
 
-    def get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
-        return super().get_tool_args_validator(ctx, self._map_name(name))
+    def _get_tool_args_validator(self, ctx: RunContext[AgentDepsT], name: str) -> SchemaValidator:
+        return super()._get_tool_args_validator(ctx, self._map_name(name))
 
-    def max_retries_for_tool(self, name: str) -> int:
-        return super().max_retries_for_tool(self._map_name(name))
+    def _max_retries_for_tool(self, name: str) -> int:
+        return super()._max_retries_for_tool(self._map_name(name))
 
     async def call_tool(
         self, ctx: RunContext[AgentDepsT], name: str, tool_args: dict[str, Any], *args: Any, **kwargs: Any
@@ -660,40 +661,66 @@ def tool_names(self) -> list[str]:
     def validate_tool_args(
         self, ctx: RunContext[AgentDepsT], name: str, args: str | dict[str, Any] | None, allow_partial: bool = False
     ) -> dict[str, Any]:
-        try:
-            self._validate_tool_name(name)
-
-            ctx = replace(ctx, tool_name=name, retry=self._retries.get(name, 0))
+        with self._with_retry(name, ctx) as ctx:
             return super().validate_tool_args(ctx, name, args, allow_partial)
-        except ValidationError as e:
-            return self._on_error(name, e)
 
     async def call_tool(
         self, ctx: RunContext[AgentDepsT], name: str, tool_args: dict[str, Any], *args: Any, **kwargs: Any
     ) -> Any:
+        with self._with_retry(name, ctx) as ctx:
+            try:
+                output = await super().call_tool(ctx, name, tool_args, *args, **kwargs)
+            except Exception as e:
+                raise e
+            else:
+                self._retries.pop(name, None)
+                return output
+
+    @contextmanager
+    def _with_retry(self, name: str, ctx: RunContext[AgentDepsT]) -> Iterator[RunContext[AgentDepsT]]:
         try:
-            self._validate_tool_name(name)
-
-            ctx = replace(ctx, tool_name=name, retry=self._retries.get(name, 0))
-            return await super().call_tool(ctx, name, tool_args, *args, **kwargs)
-        except ModelRetry as e:
-            return self._on_error(name, e)
-
-    def _on_error(self, name: str, e: Exception) -> Never:
-        max_retries = self.max_retries_for_tool(name)
-        current_retry = self._retries.get(name, 0)
-        if current_retry == max_retries:
-            raise UnexpectedModelBehavior(f'Tool {name!r} exceeded max retries count of {max_retries}') from e
-        else:
-            self._retries[name] = current_retry + 1  # TODO: Reset on successful call!
-            raise e
+            if name not in self.tool_names:
+                if self.tool_names:
+                    msg = f'Available tools: {", ".join(self.tool_names)}'
+                else:
+                    msg = 'No tools available.'
+                raise ModelRetry(f'Unknown tool name: {name!r}. {msg}')
+
+            ctx = replace(ctx, tool_name=name, retry=self._retries.get(name, 0), retries={})
+            yield ctx
+        except (ValidationError, ModelRetry, UnexpectedModelBehavior, ToolRetryError) as e:
+            if isinstance(e, ToolRetryError):
+                pass
+            elif isinstance(e, ValidationError):
+                if ctx.tool_call_id:
+                    m = _messages.RetryPromptPart(
+                        tool_name=name,
+                        content=e.errors(include_url=False, include_context=False),
+                        tool_call_id=ctx.tool_call_id,
+                    )
+                    e = ToolRetryError(m)
+            elif isinstance(e, ModelRetry):
+                if ctx.tool_call_id:
+                    m = _messages.RetryPromptPart(
+                        tool_name=name,
+                        content=e.message,
+                        tool_call_id=ctx.tool_call_id,
+                    )
+                    e = ToolRetryError(m)
+            elif isinstance(e, UnexpectedModelBehavior):
+                if e.__cause__ is not None:
+                    e = e.__cause__
+            else:
+                assert_never(e)
 
-    def _validate_tool_name(self, name: str) -> None:
-        if name in self.tool_names:
-            return
+            try:
+                max_retries = self._max_retries_for_tool(name)
+            except Exception:
+                max_retries = 1
+            current_retry = self._retries.get(name, 0)
 
-        if self.tool_names:
-            msg = f'Available tools: {", ".join(self.tool_names)}'
-        else:
-            msg = 'No tools available.'
-        raise ModelRetry(f'Unknown tool name: {name!r}. {msg}')
+            if current_retry == max_retries:
+                raise UnexpectedModelBehavior(f'Tool {name!r} exceeded max retries count of {max_retries}') from e
+            else:
+                self._retries[name] = current_retry + 1
+                raise e
diff --git a/tests/models/test_model_test.py b/tests/models/test_model_test.py
@@ -4,6 +4,7 @@
 
 import asyncio
 import dataclasses
+import re
 from datetime import timezone
 from typing import Annotated, Any, Literal
 
@@ -157,7 +158,7 @@ def validate_output(ctx: RunContext[None], output: OutputModel) -> OutputModel:
         call_count += 1
         raise ModelRetry('Fail')
 
-    with pytest.raises(UnexpectedModelBehavior, match="Tool 'final_result' exceeded max retries count of 2"):
+    with pytest.raises(UnexpectedModelBehavior, match=re.escape('Exceeded maximum retries (2) for result validation')):
         agent.run_sync('Hello', model=TestModel())
 
     assert call_count == 3
@@ -200,7 +201,7 @@ class ResultModel(BaseModel):
 
     agent = Agent('test', output_type=ResultModel, retries=2)
 
-    with pytest.raises(UnexpectedModelBehavior, match="Tool 'final_result' exceeded max retries count of 2"):
+    with pytest.raises(UnexpectedModelBehavior, match=r'Exceeded maximum retries \(2\) for result validation'):
         agent.run_sync('Hello', model=TestModel(custom_output_args={'foo': 'a', 'bar': 1}))
 
 
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -270,10 +270,10 @@ async def __aexit__(self, *args: Any) -> None:
     def tool_defs(self) -> list[ToolDefinition]:
         return []
 
-    def get_tool_args_validator(self, ctx: RunContext[Any], name: str) -> SchemaValidator:
+    def _get_tool_args_validator(self, ctx: RunContext[Any], name: str) -> SchemaValidator:
         return SchemaValidator(core_schema.any_schema())  # pragma: lax no cover
 
-    def max_retries_for_tool(self, name: str) -> int:
+    def _max_retries_for_tool(self, name: str) -> int:
         return 0  # pragma: lax no cover
 
     async def call_tool(

Original file line number	Diff line number	Diff line change
`@@ -182,14 +182,14 @@ async def list_tool_defs(self) -> list[ToolDefinition]:`
`182`	`182`	`for mcp_tool in mcp_tools`
`183`	`183`	`]`
`184`	`184`
`185`		`- def get_tool_args_validator(self, ctx: RunContext[Any], name: str) -> pydantic_core.SchemaValidator:`
	`185`	`+ def _get_tool_args_validator(self, ctx: RunContext[Any], name: str) -> pydantic_core.SchemaValidator:`
`186`	`186`	`return pydantic_core.SchemaValidator(`
`187`	`187`	`schema=pydantic_core.core_schema.dict_schema(`
`188`	`188`	`pydantic_core.core_schema.str_schema(), pydantic_core.core_schema.any_schema()`
`189`	`189`	`)`
`190`	`190`	`)`
`191`	`191`
`192`		`- def max_retries_for_tool(self, name: str) -> int:`
	`192`	`+ def _max_retries_for_tool(self, name: str) -> int:`
`193`	`193`	`return 1`
`194`	`194`
`195`	`195`	`def set_mcp_sampling_model(self, model: models.Model) -> None:`