Skip to content

Commit cbe0a92

Browse files
Scrubbing sensitive content (#2014)
Co-authored-by: Alex Hall <alex.mojaki@gmail.com>
1 parent 8039c20 commit cbe0a92

File tree

8 files changed

+172
-15
lines changed

8 files changed

+172
-15
lines changed

docs/logfire.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,3 +323,22 @@ agent = Agent('gpt-4o', instrument=instrumentation_settings)
323323
# or to instrument all agents:
324324
Agent.instrument_all(instrumentation_settings)
325325
```
326+
327+
### Excluding prompts and completions
328+
329+
For privacy and security reasons, you may want to monitor your agent's behavior and performance without exposing sensitive user data or proprietary prompts in your observability platform. PydanticAI allows you to exclude the actual content from instrumentation events while preserving the structural information needed for debugging and monitoring.
330+
331+
When `include_content=False` is set, PydanticAI will exclude sensitive content from OpenTelemetry events, including user prompts and model completions, tool call arguments and responses, and any other message content.
332+
333+
```python {title="excluding_sensitive_content.py"}
334+
from pydantic_ai.agent import Agent
335+
from pydantic_ai.models.instrumented import InstrumentationSettings
336+
337+
instrumentation_settings = InstrumentationSettings(include_content=False)
338+
339+
agent = Agent('gpt-4o', instrument=instrumentation_settings)
340+
# or to instrument all agents:
341+
Agent.instrument_all(instrumentation_settings)
342+
```
343+
344+
This setting is particularly useful in production environments where compliance requirements or data sensitivity concerns make it necessary to limit what content is sent to your observability platform.

pydantic_ai_slim/pydantic_ai/_agent_graph.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
if TYPE_CHECKING:
2626
from .mcp import MCPServer
27+
from .models.instrumented import InstrumentationSettings
2728

2829
__all__ = (
2930
'GraphAgentState',
@@ -112,6 +113,7 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]):
112113
default_retries: int
113114

114115
tracer: Tracer
116+
instrumentation_settings: InstrumentationSettings | None = None
115117

116118
prepare_tools: ToolsPrepareFunc[DepsT] | None = None
117119

@@ -712,6 +714,10 @@ async def process_function_tools( # noqa C901
712714

713715
user_parts: list[_messages.UserPromptPart] = []
714716

717+
include_content = (
718+
ctx.deps.instrumentation_settings is not None and ctx.deps.instrumentation_settings.include_content
719+
)
720+
715721
# Run all tool tasks in parallel
716722
results_by_index: dict[int, _messages.ModelRequestPart] = {}
717723
with ctx.deps.tracer.start_as_current_span(
@@ -722,7 +728,7 @@ async def process_function_tools( # noqa C901
722728
},
723729
):
724730
tasks = [
725-
asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer), name=call.tool_name)
731+
asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer, include_content), name=call.tool_name)
726732
for tool, call in calls_to_run
727733
]
728734

pydantic_ai_slim/pydantic_ai/agent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,7 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
719719
tracer=tracer,
720720
prepare_tools=self._prepare_tools,
721721
get_instructions=get_instructions,
722+
instrumentation_settings=instrumentation_settings,
722723
)
723724
start_node = _agent_graph.UserPromptNode[AgentDepsT](
724725
user_prompt=user_prompt,

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,11 @@ class SystemPromptPart:
7676
part_kind: Literal['system-prompt'] = 'system-prompt'
7777
"""Part type identifier, this is available on all parts as a discriminator."""
7878

79-
def otel_event(self, _settings: InstrumentationSettings) -> Event:
80-
return Event('gen_ai.system.message', body={'content': self.content, 'role': 'system'})
79+
def otel_event(self, settings: InstrumentationSettings) -> Event:
80+
return Event(
81+
'gen_ai.system.message',
82+
body={'role': 'system', **({'content': self.content} if settings.include_content else {})},
83+
)
8184

8285
__repr__ = _utils.dataclasses_no_defaults_repr
8386

@@ -362,12 +365,12 @@ def otel_event(self, settings: InstrumentationSettings) -> Event:
362365
content = []
363366
for part in self.content:
364367
if isinstance(part, str):
365-
content.append(part)
368+
content.append(part if settings.include_content else {'kind': 'text'})
366369
elif isinstance(part, (ImageUrl, AudioUrl, DocumentUrl, VideoUrl)):
367-
content.append({'kind': part.kind, 'url': part.url})
370+
content.append({'kind': part.kind, **({'url': part.url} if settings.include_content else {})})
368371
elif isinstance(part, BinaryContent):
369372
converted_part = {'kind': part.kind, 'media_type': part.media_type}
370-
if settings.include_binary_content:
373+
if settings.include_content and settings.include_binary_content:
371374
converted_part['binary_content'] = base64.b64encode(part.data).decode()
372375
content.append(converted_part)
373376
else:
@@ -414,10 +417,15 @@ def model_response_object(self) -> dict[str, Any]:
414417
else:
415418
return {'return_value': tool_return_ta.dump_python(self.content, mode='json')}
416419

417-
def otel_event(self, _settings: InstrumentationSettings) -> Event:
420+
def otel_event(self, settings: InstrumentationSettings) -> Event:
418421
return Event(
419422
'gen_ai.tool.message',
420-
body={'content': self.content, 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name},
423+
body={
424+
**({'content': self.content} if settings.include_content else {}),
425+
'role': 'tool',
426+
'id': self.tool_call_id,
427+
'name': self.tool_name,
428+
},
421429
)
422430

423431
__repr__ = _utils.dataclasses_no_defaults_repr
@@ -473,14 +481,14 @@ def model_response(self) -> str:
473481
description = f'{len(self.content)} validation errors: {json_errors.decode()}'
474482
return f'{description}\n\nFix the errors and try again.'
475483

476-
def otel_event(self, _settings: InstrumentationSettings) -> Event:
484+
def otel_event(self, settings: InstrumentationSettings) -> Event:
477485
if self.tool_name is None:
478486
return Event('gen_ai.user.message', body={'content': self.model_response(), 'role': 'user'})
479487
else:
480488
return Event(
481489
'gen_ai.tool.message',
482490
body={
483-
'content': self.model_response(),
491+
**({'content': self.model_response()} if settings.include_content else {}),
484492
'role': 'tool',
485493
'id': self.tool_call_id,
486494
'name': self.tool_name,
@@ -657,7 +665,7 @@ class ModelResponse:
657665
vendor_id: str | None = None
658666
"""Vendor ID as specified by the model provider. This can be used to track the specific request to the model."""
659667

660-
def otel_events(self) -> list[Event]:
668+
def otel_events(self, settings: InstrumentationSettings) -> list[Event]:
661669
"""Return OpenTelemetry events for the response."""
662670
result: list[Event] = []
663671

@@ -683,7 +691,8 @@ def new_event_body():
683691
elif isinstance(part, TextPart):
684692
if body.get('content'):
685693
body = new_event_body()
686-
body['content'] = part.content
694+
if settings.include_content:
695+
body['content'] = part.content
687696

688697
return result
689698

pydantic_ai_slim/pydantic_ai/models/instrumented.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def __init__(
9292
meter_provider: MeterProvider | None = None,
9393
event_logger_provider: EventLoggerProvider | None = None,
9494
include_binary_content: bool = True,
95+
include_content: bool = True,
9596
):
9697
"""Create instrumentation options.
9798
@@ -109,6 +110,8 @@ def __init__(
109110
Calling `logfire.configure()` sets the global event logger provider, so most users don't need this.
110111
This is only used if `event_mode='logs'`.
111112
include_binary_content: Whether to include binary content in the instrumentation events.
113+
include_content: Whether to include prompts, completions, and tool call arguments and responses
114+
in the instrumentation events.
112115
"""
113116
from pydantic_ai import __version__
114117

@@ -121,6 +124,7 @@ def __init__(
121124
self.event_logger = event_logger_provider.get_event_logger(scope_name, __version__)
122125
self.event_mode = event_mode
123126
self.include_binary_content = include_binary_content
127+
self.include_content = include_content
124128

125129
# As specified in the OpenTelemetry GenAI metrics spec:
126130
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclienttokenusage
@@ -161,7 +165,7 @@ def messages_to_otel_events(self, messages: list[ModelMessage]) -> list[Event]:
161165
if hasattr(part, 'otel_event'):
162166
message_events.append(part.otel_event(self))
163167
elif isinstance(message, ModelResponse): # pragma: no branch
164-
message_events = message.otel_events()
168+
message_events = message.otel_events(self)
165169
for event in message_events:
166170
event.attributes = {
167171
'gen_ai.message.index': message_index,

pydantic_ai_slim/pydantic_ai/tools.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ async def run(
327327
message: _messages.ToolCallPart,
328328
run_context: RunContext[AgentDepsT],
329329
tracer: Tracer,
330+
include_content: bool = False,
330331
) -> _messages.ToolReturnPart | _messages.RetryPromptPart:
331332
"""Run the tool function asynchronously.
332333
@@ -338,14 +339,14 @@ async def run(
338339
'gen_ai.tool.name': self.name,
339340
# NOTE: this means `gen_ai.tool.call.id` will be included even if it was generated by pydantic-ai
340341
'gen_ai.tool.call.id': message.tool_call_id,
341-
'tool_arguments': message.args_as_json_str(),
342+
**({'tool_arguments': message.args_as_json_str()} if include_content else {}),
342343
'logfire.msg': f'running tool: {self.name}',
343344
# add the JSON schema so these attributes are formatted nicely in Logfire
344345
'logfire.json_schema': json.dumps(
345346
{
346347
'type': 'object',
347348
'properties': {
348-
'tool_arguments': {'type': 'object'},
349+
**({'tool_arguments': {'type': 'object'}} if include_content else {}),
349350
'gen_ai.tool.name': {},
350351
'gen_ai.tool.call.id': {},
351352
},

tests/models/test_instrumented.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,3 +837,88 @@ def test_messages_to_otel_events_without_binary_content(document_content: Binary
837837
}
838838
]
839839
)
840+
841+
842+
def test_messages_without_content(document_content: BinaryContent):
843+
messages: list[ModelMessage] = [
844+
ModelRequest(parts=[SystemPromptPart('system_prompt')]),
845+
ModelResponse(parts=[TextPart('text1')]),
846+
ModelRequest(
847+
parts=[
848+
UserPromptPart(
849+
content=[
850+
'user_prompt1',
851+
VideoUrl('https://example.com/video.mp4'),
852+
ImageUrl('https://example.com/image.png'),
853+
AudioUrl('https://example.com/audio.mp3'),
854+
DocumentUrl('https://example.com/document.pdf'),
855+
document_content,
856+
]
857+
)
858+
]
859+
),
860+
ModelResponse(parts=[TextPart('text2'), ToolCallPart(tool_name='my_tool', args={'a': 13, 'b': 4})]),
861+
ModelRequest(parts=[ToolReturnPart('tool', 'tool_return_content', 'tool_call_1')]),
862+
ModelRequest(parts=[RetryPromptPart('retry_prompt', tool_name='tool', tool_call_id='tool_call_2')]),
863+
ModelRequest(parts=[UserPromptPart(content=['user_prompt2', document_content])]),
864+
]
865+
settings = InstrumentationSettings(include_content=False)
866+
assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot(
867+
[
868+
{
869+
'role': 'system',
870+
'gen_ai.message.index': 0,
871+
'event.name': 'gen_ai.system.message',
872+
},
873+
{
874+
'role': 'assistant',
875+
'gen_ai.message.index': 1,
876+
'event.name': 'gen_ai.assistant.message',
877+
},
878+
{
879+
'content': [
880+
{'kind': 'text'},
881+
{'kind': 'video-url'},
882+
{'kind': 'image-url'},
883+
{'kind': 'audio-url'},
884+
{'kind': 'document-url'},
885+
{'kind': 'binary', 'media_type': 'application/pdf'},
886+
],
887+
'role': 'user',
888+
'gen_ai.message.index': 2,
889+
'event.name': 'gen_ai.user.message',
890+
},
891+
{
892+
'role': 'assistant',
893+
'tool_calls': [
894+
{
895+
'id': IsStr(),
896+
'type': 'function',
897+
'function': {'name': 'my_tool', 'arguments': {'a': 13, 'b': 4}},
898+
}
899+
],
900+
'gen_ai.message.index': 3,
901+
'event.name': 'gen_ai.assistant.message',
902+
},
903+
{
904+
'role': 'tool',
905+
'id': 'tool_call_1',
906+
'name': 'tool',
907+
'gen_ai.message.index': 4,
908+
'event.name': 'gen_ai.tool.message',
909+
},
910+
{
911+
'role': 'tool',
912+
'id': 'tool_call_2',
913+
'name': 'tool',
914+
'gen_ai.message.index': 5,
915+
'event.name': 'gen_ai.tool.message',
916+
},
917+
{
918+
'content': [{'kind': 'text'}, {'kind': 'binary', 'media_type': 'application/pdf'}],
919+
'role': 'user',
920+
'gen_ai.message.index': 6,
921+
'event.name': 'gen_ai.user.message',
922+
},
923+
]
924+
)

tests/test_logfire.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,3 +525,35 @@ async def test_feedback(capfire: CaptureLogfire) -> None:
525525
},
526526
]
527527
)
528+
529+
530+
@pytest.mark.skipif(not logfire_installed, reason='logfire not installed')
531+
@pytest.mark.parametrize('include_content', [True, False])
532+
def test_include_tool_args_span_attributes(
533+
get_logfire_summary: Callable[[], LogfireSummary],
534+
include_content: bool,
535+
) -> None:
536+
"""Test that tool arguments are included/excluded in span attributes based on instrumentation settings."""
537+
538+
instrumentation_settings = InstrumentationSettings(include_content=include_content)
539+
test_model = TestModel(seed=42)
540+
my_agent = Agent(model=test_model, instrument=instrumentation_settings)
541+
542+
@my_agent.tool_plain
543+
async def add_numbers(x: int, y: int) -> int:
544+
"""Add two numbers together."""
545+
return x + y
546+
547+
result = my_agent.run_sync('Add 42 and 42')
548+
assert result.output == snapshot('{"add_numbers":84}')
549+
550+
summary = get_logfire_summary()
551+
552+
[tool_attributes] = [
553+
attributes for attributes in summary.attributes.values() if attributes.get('gen_ai.tool.name') == 'add_numbers'
554+
]
555+
556+
if include_content:
557+
assert tool_attributes['tool_arguments'] == snapshot('{"x":42,"y":42}')
558+
else:
559+
assert 'tool_arguments' not in tool_attributes

0 commit comments

Comments
 (0)