55
66from __future__ import annotations
77
8- from typing import TypedDict , get_args
8+ from collections .abc import Callable
9+ from copy import deepcopy
10+ from typing import Any , TypedDict , get_args
911
1012import pytest
1113
1214from mirascope import llm
1315
16+ SENSITIVE_HEADERS = [
17+ # Common API authentication headers
18+ "authorization" ,
19+ "x-api-key" ,
20+ "x-goog-api-key" ,
21+ "anthropic-organization-id" ,
22+ ]
23+
1424PROVIDER_MODEL_ID_PAIRS : list [tuple [llm .Provider , llm .ModelId ]] = [
1525 ("anthropic" , "claude-sonnet-4-0" ),
1626 ("google" , "gemini-2.5-flash" ),
@@ -51,22 +61,36 @@ class VCRConfig(TypedDict):
5161 - 'headers': Request headers
5262 """
5363
54- filter_headers : list [ str ]
55- """Headers to filter out from recordings for security/privacy .
64+ before_record_request : Callable [[ Any ], Any ]
65+ """Callback to sanitize requests before saving to cassette .
5666
57- These headers will be removed from both recorded cassettes and
58- when matching requests during playback. Commonly used for:
59- - Authentication tokens
60- - API keys
61- - Organization identifiers
67+ This function is called AFTER the real HTTP request is sent (with valid auth),
68+ but BEFORE it's written to the cassette file. Use this to sanitize sensitive
69+ headers without affecting the actual HTTP requests.
6270 """
6371
64- filter_post_data_parameters : list [str ]
65- """POST data parameters to filter out from recordings.
6672
67- Similar to filter_headers but for form data and request body parameters.
68- Useful for removing sensitive data from request bodies.
73+ def sanitize_request (request : Any ) -> Any : # noqa: ANN401
74+ """Sanitize sensitive headers in VCR request before recording to cassette.
75+
76+ This hook is called AFTER the real HTTP request is sent (with valid auth),
77+ but BEFORE it's written to the cassette file. We deep copy the request
78+ and replace sensitive headers with placeholders.
79+
80+ Args:
81+ request: VCR request object to sanitize (Any type since VCR doesn't
82+ provide typed request objects)
83+
84+ Returns:
85+ Sanitized copy of the request safe for cassette storage
6986 """
87+ request = deepcopy (request )
88+
89+ for header in SENSITIVE_HEADERS :
90+ if header in request .headers :
91+ request .headers [header ] = ["<filtered>" ]
92+
93+ return request
7094
7195
7296@pytest .fixture (scope = "session" )
@@ -79,19 +103,19 @@ def vcr_config() -> VCRConfig:
79103 - Google/Gemini (x-goog-api-key header)
80104 - Anthropic (x-api-key, anthropic-organization-id headers)
81105
106+ Note:
107+ We use before_record_request hook for sanitizing sensitive headers.
108+ This ensures the real HTTP requests (with valid auth) are sent
109+ successfully, but sensitive headers are replaced with placeholders
110+ in the cassette files.
111+
82112 Returns:
83113 VCRConfig: Dictionary with VCR.py configuration settings
84114 """
85115 return {
86116 "record_mode" : "once" ,
87117 "match_on" : ["method" , "uri" , "body" ],
88- "filter_headers" : [
89- "authorization" , # OpenAI Bearer tokens
90- "x-api-key" , # Anthropic API keys
91- "x-goog-api-key" , # Google/Gemini API keys
92- "anthropic-organization-id" , # Anthropic org identifiers
93- ],
94- "filter_post_data_parameters" : [],
118+ "before_record_request" : sanitize_request ,
95119 }
96120
97121
0 commit comments