Skip to content

Commit 88ad258

Browse files
authored
Adding ModelResponse.usage (#1647)
1 parent 6e6fee9 commit 88ad258

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+923
-118
lines changed

docs/agents.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ async def main():
145145
CallToolsNode(
146146
model_response=ModelResponse(
147147
parts=[TextPart(content='Paris', part_kind='text')],
148+
usage=Usage(
149+
requests=1,
150+
request_tokens=56,
151+
response_tokens=1,
152+
total_tokens=57,
153+
details=None,
154+
),
148155
model_name='gpt-4o',
149156
timestamp=datetime.datetime(...),
150157
kind='response',
@@ -209,6 +216,13 @@ async def main():
209216
CallToolsNode(
210217
model_response=ModelResponse(
211218
parts=[TextPart(content='Paris', part_kind='text')],
219+
usage=Usage(
220+
requests=1,
221+
request_tokens=56,
222+
response_tokens=1,
223+
total_tokens=57,
224+
details=None,
225+
),
212226
model_name='gpt-4o',
213227
timestamp=datetime.datetime(...),
214228
kind='response',
@@ -805,6 +819,13 @@ with capture_run_messages() as messages: # (2)!
805819
part_kind='tool-call',
806820
)
807821
],
822+
usage=Usage(
823+
requests=1,
824+
request_tokens=62,
825+
response_tokens=4,
826+
total_tokens=66,
827+
details=None,
828+
),
808829
model_name='gpt-4o',
809830
timestamp=datetime.datetime(...),
810831
kind='response',
@@ -831,6 +852,13 @@ with capture_run_messages() as messages: # (2)!
831852
part_kind='tool-call',
832853
)
833854
],
855+
usage=Usage(
856+
requests=1,
857+
request_tokens=72,
858+
response_tokens=8,
859+
total_tokens=80,
860+
details=None,
861+
),
834862
model_name='gpt-4o',
835863
timestamp=datetime.datetime(...),
836864
kind='response',

docs/message-history.md

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ and [`StreamedRunResult`][pydantic_ai.result.StreamedRunResult] (returned by [`A
2727

2828
Example of accessing methods on a [`RunResult`][pydantic_ai.agent.AgentRunResult] :
2929

30-
```python {title="run_result_messages.py" hl_lines="10 28"}
30+
```python {title="run_result_messages.py" hl_lines="10"}
3131
from pydantic_ai import Agent
3232

3333
agent = Agent('openai:gpt-4o', system_prompt='Be a helpful assistant.')
@@ -64,6 +64,13 @@ print(result.all_messages())
6464
part_kind='text',
6565
)
6666
],
67+
usage=Usage(
68+
requests=1,
69+
request_tokens=60,
70+
response_tokens=12,
71+
total_tokens=72,
72+
details=None,
73+
),
6774
model_name='gpt-4o',
6875
timestamp=datetime.datetime(...),
6976
kind='response',
@@ -75,7 +82,7 @@ _(This example is complete, it can be run "as is")_
7582

7683
Example of accessing methods on a [`StreamedRunResult`][pydantic_ai.result.StreamedRunResult] :
7784

78-
```python {title="streamed_run_result_messages.py" hl_lines="9 31"}
85+
```python {title="streamed_run_result_messages.py" hl_lines="9 40"}
7986
from pydantic_ai import Agent
8087

8188
agent = Agent('openai:gpt-4o', system_prompt='Be a helpful assistant.')
@@ -142,6 +149,13 @@ async def main():
142149
part_kind='text',
143150
)
144151
],
152+
usage=Usage(
153+
requests=0,
154+
request_tokens=50,
155+
response_tokens=12,
156+
total_tokens=62,
157+
details=None,
158+
),
145159
model_name='gpt-4o',
146160
timestamp=datetime.datetime(...),
147161
kind='response',
@@ -201,6 +215,13 @@ print(result2.all_messages())
201215
part_kind='text',
202216
)
203217
],
218+
usage=Usage(
219+
requests=1,
220+
request_tokens=60,
221+
response_tokens=12,
222+
total_tokens=72,
223+
details=None,
224+
),
204225
model_name='gpt-4o',
205226
timestamp=datetime.datetime(...),
206227
kind='response',
@@ -223,6 +244,13 @@ print(result2.all_messages())
223244
part_kind='text',
224245
)
225246
],
247+
usage=Usage(
248+
requests=1,
249+
request_tokens=61,
250+
response_tokens=26,
251+
total_tokens=87,
252+
details=None,
253+
),
226254
model_name='gpt-4o',
227255
timestamp=datetime.datetime(...),
228256
kind='response',
@@ -285,7 +313,7 @@ The message format is independent of the model used, so you can use messages in
285313

286314
In the example below, we reuse the message from the first agent run, which uses the `openai:gpt-4o` model, in a second agent run using the `google-gla:gemini-1.5-pro` model.
287315

288-
```python {title="Reusing messages with a different model" hl_lines="11"}
316+
```python {title="Reusing messages with a different model" hl_lines="17"}
289317
from pydantic_ai import Agent
290318

291319
agent = Agent('openai:gpt-4o', system_prompt='Be a helpful assistant.')
@@ -329,6 +357,13 @@ print(result2.all_messages())
329357
part_kind='text',
330358
)
331359
],
360+
usage=Usage(
361+
requests=1,
362+
request_tokens=60,
363+
response_tokens=12,
364+
total_tokens=72,
365+
details=None,
366+
),
332367
model_name='gpt-4o',
333368
timestamp=datetime.datetime(...),
334369
kind='response',
@@ -351,6 +386,13 @@ print(result2.all_messages())
351386
part_kind='text',
352387
)
353388
],
389+
usage=Usage(
390+
requests=1,
391+
request_tokens=61,
392+
response_tokens=26,
393+
total_tokens=87,
394+
details=None,
395+
),
354396
model_name='gemini-1.5-pro',
355397
timestamp=datetime.datetime(...),
356398
kind='response',

docs/testing.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ from pydantic_ai.messages import (
9797
UserPromptPart,
9898
ModelRequest,
9999
)
100+
from pydantic_ai.usage import Usage
100101

101102
from fake_database import DatabaseConn
102103
from weather_app import run_weather_forecast, weather_agent
@@ -140,6 +141,13 @@ async def test_forecast():
140141
tool_call_id=IsStr(),
141142
)
142143
],
144+
usage=Usage(
145+
requests=1,
146+
request_tokens=71,
147+
response_tokens=7,
148+
total_tokens=78,
149+
details=None,
150+
),
143151
model_name='test',
144152
timestamp=IsNow(tz=timezone.utc),
145153
),
@@ -159,6 +167,13 @@ async def test_forecast():
159167
content='{"weather_forecast":"Sunny with a chance of rain"}',
160168
)
161169
],
170+
usage=Usage(
171+
requests=1,
172+
request_tokens=77,
173+
response_tokens=16,
174+
total_tokens=93,
175+
details=None,
176+
),
162177
model_name='test',
163178
timestamp=IsNow(tz=timezone.utc),
164179
),

docs/tools.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@ print(dice_result.all_messages())
9696
part_kind='tool-call',
9797
)
9898
],
99+
usage=Usage(
100+
requests=1,
101+
request_tokens=90,
102+
response_tokens=2,
103+
total_tokens=92,
104+
details=None,
105+
),
99106
model_name='gemini-1.5-flash',
100107
timestamp=datetime.datetime(...),
101108
kind='response',
@@ -122,6 +129,13 @@ print(dice_result.all_messages())
122129
part_kind='tool-call',
123130
)
124131
],
132+
usage=Usage(
133+
requests=1,
134+
request_tokens=91,
135+
response_tokens=4,
136+
total_tokens=95,
137+
details=None,
138+
),
125139
model_name='gemini-1.5-flash',
126140
timestamp=datetime.datetime(...),
127141
kind='response',
@@ -146,6 +160,13 @@ print(dice_result.all_messages())
146160
part_kind='text',
147161
)
148162
],
163+
usage=Usage(
164+
requests=1,
165+
request_tokens=92,
166+
response_tokens=12,
167+
total_tokens=104,
168+
details=None,
169+
),
149170
model_name='gemini-1.5-flash',
150171
timestamp=datetime.datetime(...),
151172
kind='response',

pydantic_ai_slim/pydantic_ai/_agent_graph.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,16 +301,15 @@ async def _stream(
301301
ctx.state.message_history, model_settings, model_request_parameters
302302
) as streamed_response:
303303
self._did_stream = True
304-
ctx.state.usage.incr(_usage.Usage(), requests=1)
304+
ctx.state.usage.requests += 1
305305
yield streamed_response
306306
# In case the user didn't manually consume the full stream, ensure it is fully consumed here,
307307
# otherwise usage won't be properly counted:
308308
async for _ in streamed_response:
309309
pass
310310
model_response = streamed_response.get()
311-
request_usage = streamed_response.usage()
312311

313-
self._finish_handling(ctx, model_response, request_usage)
312+
self._finish_handling(ctx, model_response)
314313
assert self._result is not None # this should be set by the previous line
315314

316315
async def _make_request(
@@ -321,12 +320,12 @@ async def _make_request(
321320

322321
model_settings, model_request_parameters = await self._prepare_request(ctx)
323322
model_request_parameters = ctx.deps.model.customize_request_parameters(model_request_parameters)
324-
model_response, request_usage = await ctx.deps.model.request(
323+
model_response = await ctx.deps.model.request(
325324
ctx.state.message_history, model_settings, model_request_parameters
326325
)
327-
ctx.state.usage.incr(_usage.Usage(), requests=1)
326+
ctx.state.usage.incr(_usage.Usage())
328327

329-
return self._finish_handling(ctx, model_response, request_usage)
328+
return self._finish_handling(ctx, model_response)
330329

331330
async def _prepare_request(
332331
self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]]
@@ -348,10 +347,9 @@ def _finish_handling(
348347
self,
349348
ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]],
350349
response: _messages.ModelResponse,
351-
usage: _usage.Usage,
352350
) -> CallToolsNode[DepsT, NodeRunEndT]:
353351
# Update usage
354-
ctx.state.usage.incr(usage, requests=0)
352+
ctx.state.usage.incr(response.usage)
355353
if ctx.deps.usage_limits:
356354
ctx.deps.usage_limits.check_tokens(ctx.state.usage)
357355

pydantic_ai_slim/pydantic_ai/agent.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,13 @@ async def main():
551551
CallToolsNode(
552552
model_response=ModelResponse(
553553
parts=[TextPart(content='Paris', part_kind='text')],
554+
usage=Usage(
555+
requests=1,
556+
request_tokens=56,
557+
response_tokens=1,
558+
total_tokens=57,
559+
details=None,
560+
),
554561
model_name='gpt-4o',
555562
timestamp=datetime.datetime(...),
556563
kind='response',
@@ -1715,6 +1722,13 @@ async def main():
17151722
CallToolsNode(
17161723
model_response=ModelResponse(
17171724
parts=[TextPart(content='Paris', part_kind='text')],
1725+
usage=Usage(
1726+
requests=1,
1727+
request_tokens=56,
1728+
response_tokens=1,
1729+
total_tokens=57,
1730+
details=None,
1731+
),
17181732
model_name='gpt-4o',
17191733
timestamp=datetime.datetime(...),
17201734
kind='response',
@@ -1853,6 +1867,13 @@ async def main():
18531867
CallToolsNode(
18541868
model_response=ModelResponse(
18551869
parts=[TextPart(content='Paris', part_kind='text')],
1870+
usage=Usage(
1871+
requests=1,
1872+
request_tokens=56,
1873+
response_tokens=1,
1874+
total_tokens=57,
1875+
details=None,
1876+
),
18561877
model_name='gpt-4o',
18571878
timestamp=datetime.datetime(...),
18581879
kind='response',

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from ._utils import generate_tool_call_id as _generate_tool_call_id, now_utc as _now_utc
1616
from .exceptions import UnexpectedModelBehavior
17+
from .usage import Usage
1718

1819
AudioMediaType: TypeAlias = Literal['audio/wav', 'audio/mpeg']
1920
ImageMediaType: TypeAlias = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
@@ -554,6 +555,12 @@ class ModelResponse:
554555
parts: list[ModelResponsePart]
555556
"""The parts of the model message."""
556557

558+
usage: Usage = field(default_factory=Usage)
559+
"""Usage information for the request.
560+
561+
This has a default to make tests easier, and to support loading old messages where usage will be missing.
562+
"""
563+
557564
model_name: str | None = None
558565
"""The name of the model that generated the response."""
559566

pydantic_ai_slim/pydantic_ai/models/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ async def request(
278278
messages: list[ModelMessage],
279279
model_settings: ModelSettings | None,
280280
model_request_parameters: ModelRequestParameters,
281-
) -> tuple[ModelResponse, Usage]:
281+
) -> ModelResponse:
282282
"""Make a request to the model."""
283283
raise NotImplementedError()
284284

@@ -365,7 +365,10 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
365365
def get(self) -> ModelResponse:
366366
"""Build a [`ModelResponse`][pydantic_ai.messages.ModelResponse] from the data received from the stream so far."""
367367
return ModelResponse(
368-
parts=self._parts_manager.get_parts(), model_name=self.model_name, timestamp=self.timestamp
368+
parts=self._parts_manager.get_parts(),
369+
model_name=self.model_name,
370+
timestamp=self.timestamp,
371+
usage=self.usage(),
369372
)
370373

371374
def usage(self) -> Usage:

0 commit comments

Comments
 (0)