Skip to content

Commit 9e8c636

Browse files
fixing toolsets to tools
1 parent 0b4a78b commit 9e8c636

File tree

8 files changed

+21
-15
lines changed

8 files changed

+21
-15
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ The format follows the principles of [Keep a Changelog](https://keepachangelog.c
77
## [Unreleased](https://github.com/PabloCabaleiro/pondera/tree/main)
88

99

10+
## [v0.6.1](https://github.com/PabloCabaleiro/pondera/releases/tag/v0.6.1) - 2025-10-15
11+
12+
<!-- markdownlint-disable-next-line MD024 -->
13+
### Fixed
14+
15+
- Renamed `toolsets` parameter to `tools` in Judge constructor and internal implementation for consistency with PydanticAI API
16+
17+
1018
## [v0.6.0](https://github.com/PabloCabaleiro/pondera/releases/tag/v0.6.0) - 2025-10-13
1119

1220
<!-- markdownlint-disable-next-line MD024 -->

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ class ConstantJudge(JudgeProtocol):
101101

102102
```bash
103103
# Using uv (recommended)
104-
uv add 'git+ssh://git@github.com/PabloCabaleiro/pondera.git@v0.6.0'
104+
uv add 'git+ssh://git@github.com/PabloCabaleiro/pondera.git@v0.6.1'
105105
# or from source in editable mode
106-
uv pip install 'git+ssh://git@github.com/PabloCabaleiro/pondera.git@v0.6.0'
106+
uv pip install 'git+ssh://git@github.com/PabloCabaleiro/pondera.git@v0.6.1'
107107
```
108108

109109
The judge uses the pydantic-ai ecosystem. Configure provider credentials via env vars (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `AZURE_OPENAI_API_KEY`, etc.) plus optional `PONDERA_` settings.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "pondera"
3-
version = "0.6.0"
3+
version = "0.6.1"
44
description = "YAML-first, pluggable runners & LLM-as-a-judge evaluation framework."
55
readme = "README.md"
66
requires-python = ">=3.10"

src/pondera/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@
2222
"ValidationError",
2323
]
2424

25-
__version__ = "0.6.0"
25+
__version__ = "0.6.1"

src/pondera/judge/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ def __init__(
1919
model: str | None = None,
2020
rubric: list[RubricCriterion] | None = None,
2121
system_append: str = "",
22-
toolsets: tuple[Any, ...] = (),
22+
tools: tuple[Any, ...] = (),
2323
) -> None:
2424
self._default_rubric = rubric or default_rubric()
2525
self._system_append = system_append
2626
self._model = model
27-
self._toolsets = toolsets
27+
self._tools = tools
2828

2929
async def judge(
3030
self,
@@ -44,7 +44,7 @@ async def judge(
4444
use_system = self._system_prompt(
4545
rb, self._system_append + ("\n" + system_append if system_append else "")
4646
)
47-
agent = get_agent(system_prompt=use_system, output_type=Judgment, toolsets=self._toolsets)
47+
agent = get_agent(system_prompt=use_system, output_type=Judgment, tools=self._tools)
4848

4949
files_section = "\n".join(f"- {p}" for p in (files or [])) or "(none)"
5050

src/pondera/judge/pydantic_ai.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ def get_agent(
163163
instructions: str | None = None,
164164
system_prompt: str | tuple[str, ...] = (),
165165
tools: tuple[Any, ...] = (),
166-
toolsets: tuple[Any, ...] = (),
167166
model_settings: ModelSettings | None = None,
168167
output_type: Any = str,
169168
deps_type: type = NoneType,
@@ -184,7 +183,6 @@ def get_agent(
184183
deps_type=deps_type,
185184
model_settings=model_settings,
186185
tools=tools,
187-
toolsets=toolsets,
188186
instrument=True,
189187
)
190188
return agent

tests/judge/test_base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,10 @@ async def test_judge_user_prompt_format(
9898
@patch("pondera.judge.base.run_agent")
9999
@patch("pondera.judge.base.default_rubric")
100100
@pytest.mark.asyncio
101-
async def test_judge_calls_get_agent_with_toolsets(
101+
async def test_judge_calls_get_agent_with_tools(
102102
mock_default_rubric: Any, mock_run_agent: Any, mock_get_agent: Any
103103
) -> None:
104-
"""Test that judge passes toolsets to get_agent."""
104+
"""Test that judge passes tools to get_agent."""
105105

106106
def sample_tool(x: int) -> int:
107107
"""Sample tool that doubles input."""
@@ -116,7 +116,7 @@ def sample_tool(x: int) -> int:
116116
score=90, evaluation_passed=True, reasoning="Excellent", criteria_scores={"accuracy": 90}
117117
)
118118
mock_run_agent.return_value = (expected_judgment, [])
119-
judge = Judge(toolsets=(sample_tool,))
119+
judge = Judge(tools=(sample_tool,))
120120

121121
await judge.judge(
122122
question="What is 2+2?",
@@ -127,7 +127,7 @@ def sample_tool(x: int) -> int:
127127

128128
mock_get_agent.assert_called_once()
129129
call_kwargs = mock_get_agent.call_args.kwargs
130-
assert "toolsets" in call_kwargs
131-
assert call_kwargs["toolsets"] == (sample_tool,)
130+
assert "tools" in call_kwargs
131+
assert call_kwargs["tools"] == (sample_tool,)
132132
assert call_kwargs["output_type"] == Judgment
133133
mock_run_agent.assert_called_once()

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)