77
88import hashlib
99import inspect
10+ import json
11+ import re
1012from collections .abc import Awaitable , Callable , Generator
1113from copy import deepcopy
12- from typing import TypedDict , get_args
14+ from typing import TYPE_CHECKING , TypedDict , get_args
1315from typing_extensions import TypeIs
1416
1517import httpx
2224
2325from mirascope import llm
2426
27+ if TYPE_CHECKING :
28+ from typing import Any
29+
30+ from vcr .response import Response as VCRResponse
31+
2532PROVIDER_MODEL_ID_PAIRS : list [tuple [llm .Provider , llm .ModelId ]] = [
2633 ("anthropic" , "claude-sonnet-4-0" ),
2734 ("anthropic-bedrock" , "us.anthropic.claude-haiku-4-5-20251001-v1:0" ),
35+ ("anthropic-vertex" , "claude-haiku-4-5@20251001" ),
2836 ("google" , "gemini-2.5-flash" ),
2937 ("openai:completions" , "gpt-4o" ),
3038 ("openai:responses" , "gpt-4o" ),
@@ -104,6 +112,15 @@ class VCRConfig(TypedDict, total=False):
104112 headers without affecting the actual HTTP requests.
105113 """
106114
115+ before_record_response : Callable [[dict [str , Any ]], dict [str , Any ]]
116+ """Callback to sanitize responses before saving to cassette.
117+
118+ This function is called AFTER the real HTTP response is received,
119+ but BEFORE it's written to the cassette file. Use this to sanitize sensitive
120+ data in response bodies (e.g., OAuth tokens) without affecting the actual
121+ HTTP responses received by the application.
122+ """
123+
107124 decode_compressed_response : bool
108125 """Whether to decode compressed responses.
109126
@@ -114,12 +131,15 @@ class VCRConfig(TypedDict, total=False):
114131
115132
116133def sanitize_request (request : VCRRequest ) -> VCRRequest :
117- """Sanitize sensitive headers in VCR request before recording to cassette .
134+ """Sanitize sensitive headers and OAuth tokens in VCR request before recording.
118135
119136 This hook is called AFTER the real HTTP request is sent (with valid auth),
120137 but BEFORE it's written to the cassette file. We deep copy the request
121138 and replace sensitive headers with placeholders.
122139
140+ Also sanitizes OAuth token refresh requests to Google's OAuth2 endpoint,
141+ which contain sensitive refresh_token, client_id, and client_secret.
142+
123143 Args:
124144 request: VCR request object to sanitize
125145
@@ -132,9 +152,79 @@ def sanitize_request(request: VCRRequest) -> VCRRequest:
132152 if header in request .headers :
133153 request .headers [header ] = ["<filtered>" ]
134154
155+ if "oauth2.googleapis.com/token" in request .uri and request .body :
156+ body_str = request .body .decode () if isinstance (request .body , bytes ) else request .body
157+ body_str = re .sub (
158+ r"refresh_token=[^&]+" ,
159+ "refresh_token=<filtered>" ,
160+ body_str
161+ )
162+ body_str = re .sub (
163+ r"client_secret=[^&]+" ,
164+ "client_secret=<filtered>" ,
165+ body_str
166+ )
167+ request .body = body_str
168+
135169 return request
136170
137171
172+ def sanitize_response (response : dict [str , Any ]) -> dict [str , Any ]:
173+ """Sanitize sensitive tokens in VCR response before recording to cassette.
174+
175+ This hook is called AFTER the real HTTP response is received,
176+ but BEFORE it's written to the cassette file. We sanitize OAuth tokens
177+ from Google OAuth2 responses (used by AnthropicVertex).
178+
179+ Args:
180+ response: VCR response dict to sanitize
181+
182+ Returns:
183+ Sanitized copy of the response safe for cassette storage
184+ """
185+ response = deepcopy (response )
186+
187+ if "body" in response and "string" in response ["body" ]:
188+ body_str = response ["body" ]["string" ]
189+ if isinstance (body_str , bytes ):
190+ try :
191+ body_str = body_str .decode ()
192+ except UnicodeDecodeError :
193+ # Body is likely compressed (gzip) or binary data
194+ # Skip sanitization for these responses
195+ return response
196+
197+ if "access_token" in body_str or "id_token" in body_str :
198+ try :
199+ body_json = json .loads (body_str )
200+ if "access_token" in body_json :
201+ body_json ["access_token" ] = "<filtered>"
202+ if "id_token" in body_json :
203+ body_json ["id_token" ] = "<filtered>"
204+ if "refresh_token" in body_json :
205+ body_json ["refresh_token" ] = "<filtered>"
206+ response ["body" ]["string" ] = json .dumps (body_json )
207+ except (json .JSONDecodeError , KeyError ):
208+ body_str = re .sub (
209+ r'"access_token":\s*"[^"]+"' ,
210+ '"access_token": "<filtered>"' ,
211+ body_str
212+ )
213+ body_str = re .sub (
214+ r'"id_token":\s*"[^"]+"' ,
215+ '"id_token": "<filtered>"' ,
216+ body_str
217+ )
218+ body_str = re .sub (
219+ r'"refresh_token":\s*"[^"]+"' ,
220+ '"refresh_token": "<filtered>"' ,
221+ body_str
222+ )
223+ response ["body" ]["string" ] = body_str
224+
225+ return response
226+
227+
138228@pytest .fixture (scope = "session" )
139229def vcr_config () -> VCRConfig :
140230 """VCR configuration for all API tests.
@@ -144,13 +234,15 @@ def vcr_config() -> VCRConfig:
144234 - OpenAI (authorization header)
145235 - Google/Gemini (x-goog-api-key header)
146236 - Anthropic (x-api-key, anthropic-organization-id headers)
237+ - AnthropicVertex (Google OAuth tokens in request/response bodies)
147238 - AWS Bedrock (AWS SigV4 headers: authorization, x-amz-*)
148239
149240 Note:
150- We use before_record_request hook for sanitizing sensitive headers.
151- This ensures the real HTTP requests (with valid auth) are sent
152- successfully, but sensitive headers are replaced with placeholders
153- in the cassette files.
241+ We use before_record_request hook for sanitizing sensitive headers
242+ and OAuth request bodies. We use before_record_response hook for
243+ sanitizing OAuth tokens in response bodies (e.g., access_token, id_token).
244+ This ensures the real HTTP requests/responses (with valid auth) work
245+ correctly, but sensitive data is replaced with placeholders in cassettes.
154246
155247 We use 'raw_body' in match_on for exact binary matching and
156248 decode_compressed_response=False to preserve exact response bytes
@@ -165,6 +257,7 @@ def vcr_config() -> VCRConfig:
165257 "filter_headers" : [], # Don't filter here; use before_record_request
166258 "filter_post_data_parameters" : [],
167259 "before_record_request" : sanitize_request ,
260+ "before_record_response" : sanitize_response ,
168261 "decode_compressed_response" : False , # Preserve exact response bytes
169262 }
170263
0 commit comments