Skip to content

Commit 74046ed

Browse files
authored
Add include_binary_content flag to InstrumentationSettings, rename OTel attribute key from content to binary_content for BinaryParts (#1739)
1 parent 222bec4 commit 74046ed

File tree

5 files changed

+112
-58
lines changed

5 files changed

+112
-58
lines changed

docs/logfire.md

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,11 @@ print(result.output)
238238
#> Paris
239239
```
240240

241-
## Data format
241+
## Advanced usage
242242

243-
PydanticAI follows the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/), with one caveat. The semantic conventions specify that messages should be captured as individual events (logs) that are children of the request span. By default, PydanticAI instead collects these events into a JSON array which is set as a single large attribute called `events` on the request span. To change this, use [`InstrumentationSettings(event_mode='logs')`][pydantic_ai.agent.InstrumentationSettings].
243+
### Configuring data format
244+
245+
PydanticAI follows the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/), with one caveat. The semantic conventions specify that messages should be captured as individual events (logs) that are children of the request span. By default, PydanticAI instead collects these events into a JSON array which is set as a single large attribute called `events` on the request span. To change this, use `event_mode='logs'`:
244246

245247
```python {title="instrumentation_settings_event_mode.py"}
246248
import logfire
@@ -261,23 +263,27 @@ If you have very long conversations, the `events` span attribute may be truncate
261263

262264
Note that the OpenTelemetry Semantic Conventions are still experimental and are likely to change.
263265

264-
## Setting OpenTelemetry SDK providers
266+
### Setting OpenTelemetry SDK providers
265267

266268
By default, the global `TracerProvider` and `EventLoggerProvider` are used. These are set automatically by `logfire.configure()`. They can also be set by the `set_tracer_provider` and `set_event_logger_provider` functions in the OpenTelemetry Python SDK. You can set custom providers with [`InstrumentationSettings`][pydantic_ai.models.instrumented.InstrumentationSettings].
267269

268270
```python {title="instrumentation_settings_providers.py"}
269271
from opentelemetry.sdk._events import EventLoggerProvider
270272
from opentelemetry.sdk.trace import TracerProvider
271273

272-
from pydantic_ai.agent import InstrumentationSettings
274+
from pydantic_ai.agent import Agent, InstrumentationSettings
273275

274276
instrumentation_settings = InstrumentationSettings(
275277
tracer_provider=TracerProvider(),
276278
event_logger_provider=EventLoggerProvider(),
277279
)
280+
281+
agent = Agent('gpt-4o', instrument=instrumentation_settings)
282+
# or to instrument all agents:
283+
Agent.instrument_all(instrumentation_settings)
278284
```
279285

280-
## Instrumenting a specific `Model`
286+
### Instrumenting a specific `Model`
281287

282288
```python {title="instrumented_model_example.py"}
283289
from pydantic_ai import Agent
@@ -287,3 +293,15 @@ settings = InstrumentationSettings()
287293
model = InstrumentedModel('gpt-4o', settings)
288294
agent = Agent(model)
289295
```
296+
297+
### Excluding binary content
298+
299+
```python {title="excluding_binary_content.py"}
300+
from pydantic_ai.agent import Agent, InstrumentationSettings
301+
302+
instrumentation_settings = InstrumentationSettings(include_binary_content=False)
303+
304+
agent = Agent('gpt-4o', instrument=instrumentation_settings)
305+
# or to instrument all agents:
306+
Agent.instrument_all(instrumentation_settings)
307+
```

pydantic_ai_slim/pydantic_ai/agent.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -654,8 +654,10 @@ async def main():
654654
usage_limits = usage_limits or _usage.UsageLimits()
655655

656656
if isinstance(model_used, InstrumentedModel):
657+
instrumentation_settings = model_used.settings
657658
tracer = model_used.settings.tracer
658659
else:
660+
instrumentation_settings = None
659661
tracer = NoOpTracer()
660662
agent_name = self.name or 'agent'
661663
run_span = tracer.start_span(
@@ -723,19 +725,18 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
723725
)
724726
finally:
725727
try:
726-
if run_span.is_recording():
727-
run_span.set_attributes(self._run_span_end_attributes(state, usage))
728+
if instrumentation_settings and run_span.is_recording():
729+
run_span.set_attributes(self._run_span_end_attributes(state, usage, instrumentation_settings))
728730
finally:
729731
run_span.end()
730732

731-
def _run_span_end_attributes(self, state: _agent_graph.GraphAgentState, usage: _usage.Usage):
733+
def _run_span_end_attributes(
734+
self, state: _agent_graph.GraphAgentState, usage: _usage.Usage, settings: InstrumentationSettings
735+
):
732736
return {
733737
**usage.opentelemetry_attributes(),
734738
'all_messages_events': json.dumps(
735-
[
736-
InstrumentedModel.event_to_dict(e)
737-
for e in InstrumentedModel.messages_to_otel_events(state.message_history)
738-
]
739+
[InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(state.message_history)]
739740
),
740741
'logfire.json_schema': json.dumps(
741742
{

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from dataclasses import dataclass, field, replace
77
from datetime import datetime
88
from mimetypes import guess_type
9-
from typing import Annotated, Any, Literal, Union, cast, overload
9+
from typing import TYPE_CHECKING, Annotated, Any, Literal, Union, cast, overload
1010

1111
import pydantic
1212
import pydantic_core
@@ -17,6 +17,10 @@
1717
from .exceptions import UnexpectedModelBehavior
1818
from .usage import Usage
1919

20+
if TYPE_CHECKING:
21+
from .models.instrumented import InstrumentationSettings
22+
23+
2024
AudioMediaType: TypeAlias = Literal['audio/wav', 'audio/mpeg']
2125
ImageMediaType: TypeAlias = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
2226
DocumentMediaType: TypeAlias = Literal[
@@ -68,7 +72,7 @@ class SystemPromptPart:
6872
part_kind: Literal['system-prompt'] = 'system-prompt'
6973
"""Part type identifier, this is available on all parts as a discriminator."""
7074

71-
def otel_event(self) -> Event:
75+
def otel_event(self, _settings: InstrumentationSettings) -> Event:
7276
return Event('gen_ai.system.message', body={'content': self.content, 'role': 'system'})
7377

7478

@@ -305,7 +309,7 @@ class UserPromptPart:
305309
part_kind: Literal['user-prompt'] = 'user-prompt'
306310
"""Part type identifier, this is available on all parts as a discriminator."""
307311

308-
def otel_event(self) -> Event:
312+
def otel_event(self, settings: InstrumentationSettings) -> Event:
309313
content: str | list[dict[str, Any] | str]
310314
if isinstance(self.content, str):
311315
content = self.content
@@ -317,8 +321,10 @@ def otel_event(self) -> Event:
317321
elif isinstance(part, (ImageUrl, AudioUrl, DocumentUrl, VideoUrl)):
318322
content.append({'kind': part.kind, 'url': part.url})
319323
elif isinstance(part, BinaryContent):
320-
base64_data = base64.b64encode(part.data).decode()
321-
content.append({'kind': part.kind, 'content': base64_data, 'media_type': part.media_type})
324+
converted_part = {'kind': part.kind, 'media_type': part.media_type}
325+
if settings.include_binary_content:
326+
converted_part['binary_content'] = base64.b64encode(part.data).decode()
327+
content.append(converted_part)
322328
else:
323329
content.append({'kind': part.kind})
324330
return Event('gen_ai.user.message', body={'content': content, 'role': 'user'})
@@ -361,7 +367,7 @@ def model_response_object(self) -> dict[str, Any]:
361367
else:
362368
return {'return_value': tool_return_ta.dump_python(self.content, mode='json')}
363369

364-
def otel_event(self) -> Event:
370+
def otel_event(self, _settings: InstrumentationSettings) -> Event:
365371
return Event(
366372
'gen_ai.tool.message',
367373
body={'content': self.content, 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name},
@@ -418,7 +424,7 @@ def model_response(self) -> str:
418424
description = f'{len(self.content)} validation errors: {json_errors.decode()}'
419425
return f'{description}\n\nFix the errors and try again.'
420426

421-
def otel_event(self) -> Event:
427+
def otel_event(self, _settings: InstrumentationSettings) -> Event:
422428
if self.tool_name is None:
423429
return Event('gen_ai.user.message', body={'content': self.model_response(), 'role': 'user'})
424430
else:

pydantic_ai_slim/pydantic_ai/models/instrumented.py

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,15 @@ class InstrumentationSettings:
7777
tracer: Tracer = field(repr=False)
7878
event_logger: EventLogger = field(repr=False)
7979
event_mode: Literal['attributes', 'logs'] = 'attributes'
80+
include_binary_content: bool = True
8081

8182
def __init__(
8283
self,
8384
*,
8485
event_mode: Literal['attributes', 'logs'] = 'attributes',
8586
tracer_provider: TracerProvider | None = None,
8687
event_logger_provider: EventLoggerProvider | None = None,
88+
include_binary_content: bool = True,
8789
):
8890
"""Create instrumentation options.
8991
@@ -97,6 +99,7 @@ def __init__(
9799
If not provided, the global event logger provider is used.
98100
Calling `logfire.configure()` sets the global event logger provider, so most users don't need this.
99101
This is only used if `event_mode='logs'`.
102+
include_binary_content: Whether to include binary content in the instrumentation events.
100103
"""
101104
from pydantic_ai import __version__
102105

@@ -105,6 +108,40 @@ def __init__(
105108
self.tracer = tracer_provider.get_tracer('pydantic-ai', __version__)
106109
self.event_logger = event_logger_provider.get_event_logger('pydantic-ai', __version__)
107110
self.event_mode = event_mode
111+
self.include_binary_content = include_binary_content
112+
113+
def messages_to_otel_events(self, messages: list[ModelMessage]) -> list[Event]:
114+
"""Convert a list of model messages to OpenTelemetry events.
115+
116+
Args:
117+
messages: The messages to convert.
118+
119+
Returns:
120+
A list of OpenTelemetry events.
121+
"""
122+
events: list[Event] = []
123+
instructions = InstrumentedModel._get_instructions(messages) # pyright: ignore [reportPrivateUsage]
124+
if instructions is not None:
125+
events.append(Event('gen_ai.system.message', body={'content': instructions, 'role': 'system'}))
126+
127+
for message_index, message in enumerate(messages):
128+
message_events: list[Event] = []
129+
if isinstance(message, ModelRequest):
130+
for part in message.parts:
131+
if hasattr(part, 'otel_event'):
132+
message_events.append(part.otel_event(self))
133+
elif isinstance(message, ModelResponse):
134+
message_events = message.otel_events()
135+
for event in message_events:
136+
event.attributes = {
137+
'gen_ai.message.index': message_index,
138+
**(event.attributes or {}),
139+
}
140+
events.extend(message_events)
141+
142+
for event in events:
143+
event.body = InstrumentedModel.serialize_any(event.body)
144+
return events
108145

109146

110147
GEN_AI_SYSTEM_ATTRIBUTE = 'gen_ai.system'
@@ -193,8 +230,8 @@ def finish(response: ModelResponse):
193230
if not span.is_recording():
194231
return
195232

196-
events = self.messages_to_otel_events(messages)
197-
for event in self.messages_to_otel_events([response]):
233+
events = self.settings.messages_to_otel_events(messages)
234+
for event in self.settings.messages_to_otel_events([response]):
198235
events.append(
199236
Event(
200237
'gen_ai.choice',
@@ -270,32 +307,6 @@ def event_to_dict(event: Event) -> dict[str, Any]:
270307
body = {'body': event.body}
271308
return {**body, **(event.attributes or {})}
272309

273-
@staticmethod
274-
def messages_to_otel_events(messages: list[ModelMessage]) -> list[Event]:
275-
events: list[Event] = []
276-
instructions = InstrumentedModel._get_instructions(messages)
277-
if instructions is not None:
278-
events.append(Event('gen_ai.system.message', body={'content': instructions, 'role': 'system'}))
279-
280-
for message_index, message in enumerate(messages):
281-
message_events: list[Event] = []
282-
if isinstance(message, ModelRequest):
283-
for part in message.parts:
284-
if hasattr(part, 'otel_event'):
285-
message_events.append(part.otel_event())
286-
elif isinstance(message, ModelResponse):
287-
message_events = message.otel_events()
288-
for event in message_events:
289-
event.attributes = {
290-
'gen_ai.message.index': message_index,
291-
**(event.attributes or {}),
292-
}
293-
events.extend(message_events)
294-
295-
for event in events:
296-
event.body = InstrumentedModel.serialize_any(event.body)
297-
return events
298-
299310
@staticmethod
300311
def serialize_any(value: Any) -> str:
301312
try:

tests/models/test_instrumented.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,8 @@ def __repr__(self):
675675
ModelRequest(parts=[ToolReturnPart('tool', Bar(), tool_call_id='return_tool_call_id')]),
676676
]
677677

678-
assert [InstrumentedModel.event_to_dict(e) for e in InstrumentedModel.messages_to_otel_events(messages)] == [
678+
settings = InstrumentationSettings()
679+
assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == [
679680
{
680681
'body': "{'role': 'assistant', 'tool_calls': [{'id': 'tool_call_id', 'type': 'function', 'function': {'name': 'tool', 'arguments': {'arg': Foo()}}}]}",
681682
'gen_ai.message.index': 0,
@@ -694,9 +695,8 @@ def test_messages_to_otel_events_instructions():
694695
ModelRequest(instructions='instructions', parts=[UserPromptPart('user_prompt')]),
695696
ModelResponse(parts=[TextPart('text1')]),
696697
]
697-
assert [
698-
InstrumentedModel.event_to_dict(e) for e in InstrumentedModel.messages_to_otel_events(messages)
699-
] == snapshot(
698+
settings = InstrumentationSettings()
699+
assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot(
700700
[
701701
{'content': 'instructions', 'role': 'system', 'event.name': 'gen_ai.system.message'},
702702
{'content': 'user_prompt', 'role': 'user', 'gen_ai.message.index': 0, 'event.name': 'gen_ai.user.message'},
@@ -716,9 +716,8 @@ def test_messages_to_otel_events_instructions_multiple_messages():
716716
ModelResponse(parts=[TextPart('text1')]),
717717
ModelRequest(instructions='instructions2', parts=[UserPromptPart('user_prompt2')]),
718718
]
719-
assert [
720-
InstrumentedModel.event_to_dict(e) for e in InstrumentedModel.messages_to_otel_events(messages)
721-
] == snapshot(
719+
settings = InstrumentationSettings()
720+
assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot(
722721
[
723722
{'content': 'instructions2', 'role': 'system', 'event.name': 'gen_ai.system.message'},
724723
{'content': 'user_prompt', 'role': 'user', 'gen_ai.message.index': 0, 'event.name': 'gen_ai.user.message'},
@@ -755,9 +754,8 @@ def test_messages_to_otel_events_image_url(document_content: BinaryContent):
755754
ModelRequest(parts=[UserPromptPart(content=['user_prompt6', document_content])]),
756755
ModelResponse(parts=[TextPart('text1')]),
757756
]
758-
assert [
759-
InstrumentedModel.event_to_dict(e) for e in InstrumentedModel.messages_to_otel_events(messages)
760-
] == snapshot(
757+
settings = InstrumentationSettings()
758+
assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot(
761759
[
762760
{
763761
'content': ['user_prompt', {'kind': 'image-url', 'url': 'https://example.com/image.png'}],
@@ -796,7 +794,10 @@ def test_messages_to_otel_events_image_url(document_content: BinaryContent):
796794
'event.name': 'gen_ai.user.message',
797795
},
798796
{
799-
'content': ['user_prompt6', {'kind': 'binary', 'content': IsStr(), 'media_type': 'application/pdf'}],
797+
'content': [
798+
'user_prompt6',
799+
{'kind': 'binary', 'binary_content': IsStr(), 'media_type': 'application/pdf'},
800+
],
800801
'role': 'user',
801802
'gen_ai.message.index': 5,
802803
'event.name': 'gen_ai.user.message',
@@ -809,3 +810,20 @@ def test_messages_to_otel_events_image_url(document_content: BinaryContent):
809810
},
810811
]
811812
)
813+
814+
815+
def test_messages_to_otel_events_without_binary_content(document_content: BinaryContent):
816+
messages: list[ModelMessage] = [
817+
ModelRequest(parts=[UserPromptPart(content=['user_prompt6', document_content])]),
818+
]
819+
settings = InstrumentationSettings(include_binary_content=False)
820+
assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot(
821+
[
822+
{
823+
'content': ['user_prompt6', {'kind': 'binary', 'media_type': 'application/pdf'}],
824+
'role': 'user',
825+
'gen_ai.message.index': 0,
826+
'event.name': 'gen_ai.user.message',
827+
}
828+
]
829+
)

0 commit comments

Comments
 (0)