77
88import  hashlib 
99import  inspect 
10+ import  json 
11+ import  re 
1012from  collections .abc  import  Awaitable , Callable , Generator 
1113from  copy  import  deepcopy 
12- from  typing  import  TypedDict , get_args 
14+ from  typing  import  TYPE_CHECKING ,  TypedDict , get_args 
1315from  typing_extensions  import  TypeIs 
1416
1517import  httpx 
2123from  vcr .stubs  import  httpx_stubs 
2224
2325from  mirascope  import  llm 
26+ from  mirascope .llm .clients .anthropic_vertex  import  clients  as  anthropic_vertex_clients 
27+ 
28+ if  TYPE_CHECKING :
29+     from  typing  import  Any 
30+ 
2431
2532PROVIDER_MODEL_ID_PAIRS : list [tuple [llm .Provider , llm .ModelId ]] =  [
2633    ("anthropic" , "claude-sonnet-4-0" ),
2734    ("anthropic-bedrock" , "us.anthropic.claude-haiku-4-5-20251001-v1:0" ),
35+     ("anthropic-vertex" , "claude-haiku-4-5@20251001" ),
2836    ("google" , "gemini-2.5-flash" ),
2937    ("openai:completions" , "gpt-4o" ),
3038    ("openai:responses" , "gpt-4o" ),
@@ -104,6 +112,15 @@ class VCRConfig(TypedDict, total=False):
104112    headers without affecting the actual HTTP requests. 
105113    """ 
106114
115+     before_record_response : Callable [[dict [str , Any ]], dict [str , Any ]]
116+     """Callback to sanitize responses before saving to cassette. 
117+ 
118+     This function is called AFTER the real HTTP response is received, 
119+     but BEFORE it's written to the cassette file. Use this to sanitize sensitive 
120+     data in response bodies (e.g., OAuth tokens) without affecting the actual 
121+     HTTP responses received by the application. 
122+     """ 
123+ 
107124    decode_compressed_response : bool 
108125    """Whether to decode compressed responses. 
109126
@@ -114,12 +131,15 @@ class VCRConfig(TypedDict, total=False):
114131
115132
116133def  sanitize_request (request : VCRRequest ) ->  VCRRequest :
117-     """Sanitize sensitive headers in VCR request before recording to cassette . 
134+     """Sanitize sensitive headers and OAuth tokens  in VCR request before recording. 
118135
119136    This hook is called AFTER the real HTTP request is sent (with valid auth), 
120137    but BEFORE it's written to the cassette file. We deep copy the request 
121138    and replace sensitive headers with placeholders. 
122139
140+     Also sanitizes OAuth token refresh requests to Google's OAuth2 endpoint, 
141+     which contain sensitive refresh_token, client_id, and client_secret. 
142+ 
123143    Args: 
124144        request: VCR request object to sanitize 
125145
@@ -132,9 +152,81 @@ def sanitize_request(request: VCRRequest) -> VCRRequest:
132152        if  header  in  request .headers :
133153            request .headers [header ] =  ["<filtered>" ]
134154
155+     if  "oauth2.googleapis.com/token"  in  request .uri  and  request .body :
156+         body_str  =  (
157+             request .body .decode () if  isinstance (request .body , bytes ) else  request .body 
158+         )
159+         body_str  =  re .sub (r"refresh_token=[^&]+" , "refresh_token=<filtered>" , body_str )
160+         body_str  =  re .sub (r"client_secret=[^&]+" , "client_secret=<filtered>" , body_str )
161+         request .body  =  body_str 
162+ 
135163    return  request 
136164
137165
166+ def  sanitize_response (response : dict [str , Any ]) ->  dict [str , Any ]:
167+     """Sanitize sensitive tokens in VCR response before recording to cassette. 
168+ 
169+     This hook is called AFTER the real HTTP response is received, 
170+     but BEFORE it's written to the cassette file. We sanitize OAuth tokens 
171+     from Google OAuth2 responses (used by AnthropicVertex). 
172+ 
173+     Args: 
174+         response: VCR response dict to sanitize 
175+ 
176+     Returns: 
177+         Sanitized copy of the response safe for cassette storage 
178+     """ 
179+     response  =  deepcopy (response )
180+ 
181+     if  "body"  in  response  and  "string"  in  response ["body" ]:
182+         body_str  =  response ["body" ]["string" ]
183+         if  isinstance (body_str , bytes ):
184+             try :
185+                 body_str  =  body_str .decode ()
186+             except  UnicodeDecodeError :
187+                 # Body is likely compressed (gzip) or binary data 
188+                 # Skip sanitization for these responses 
189+                 return  response 
190+ 
191+         if  "access_token"  in  body_str  or  "id_token"  in  body_str :
192+             try :
193+                 body_json  =  json .loads (body_str )
194+                 if  "access_token"  in  body_json :
195+                     body_json ["access_token" ] =  "<filtered>" 
196+                 if  "id_token"  in  body_json :
197+                     body_json ["id_token" ] =  "<filtered>" 
198+                 if  "refresh_token"  in  body_json :
199+                     body_json ["refresh_token" ] =  "<filtered>" 
200+                 response ["body" ]["string" ] =  json .dumps (body_json ).encode ()
201+             except  (json .JSONDecodeError , KeyError ):
202+                 body_str  =  re .sub (
203+                     r'"access_token":\s*"[^"]+"' ,
204+                     '"access_token": "<filtered>"' ,
205+                     body_str ,
206+                 )
207+                 body_str  =  re .sub (
208+                     r'"id_token":\s*"[^"]+"' , '"id_token": "<filtered>"' , body_str 
209+                 )
210+                 body_str  =  re .sub (
211+                     r'"refresh_token":\s*"[^"]+"' ,
212+                     '"refresh_token": "<filtered>"' ,
213+                     body_str ,
214+                 )
215+                 response ["body" ]["string" ] =  body_str .encode ()
216+ 
217+     return  response 
218+ 
219+ 
220+ @pytest .fixture (autouse = True ) 
221+ def  _clear_anthropic_vertex_cache () ->  Generator [None , None , None ]:
222+     """Ensure Anthropic Vertex clients do not reuse cached credentials between tests.""" 
223+     anthropic_vertex_clients .clear_cache ()
224+     try :
225+         yield 
226+     finally :
227+         anthropic_vertex_clients .clear_cache ()
228+ 
229+ 
138230@pytest .fixture (scope = "session" ) 
139231def  vcr_config () ->  VCRConfig :
140232    """VCR configuration for all API tests. 
@@ -144,13 +236,15 @@ def vcr_config() -> VCRConfig:
144236    - OpenAI (authorization header) 
145237    - Google/Gemini (x-goog-api-key header) 
146238    - Anthropic (x-api-key, anthropic-organization-id headers) 
239+     - AnthropicVertex (Google OAuth tokens in request/response bodies) 
147240    - AWS Bedrock (AWS SigV4 headers: authorization, x-amz-*) 
148241
149242    Note: 
150-         We use before_record_request hook for sanitizing sensitive headers. 
151-         This ensures the real HTTP requests (with valid auth) are sent 
152-         successfully, but sensitive headers are replaced with placeholders 
153-         in the cassette files. 
243+         We use before_record_request hook for sanitizing sensitive headers 
244+         and OAuth request bodies. We use before_record_response hook for 
245+         sanitizing OAuth tokens in response bodies (e.g., access_token, id_token). 
246+         This ensures the real HTTP requests/responses (with valid auth) work 
247+         correctly, but sensitive data is replaced with placeholders in cassettes. 
154248
155249        We use 'raw_body' in match_on for exact binary matching and 
156250        decode_compressed_response=False to preserve exact response bytes 
@@ -165,6 +259,7 @@ def vcr_config() -> VCRConfig:
165259        "filter_headers" : [],  # Don't filter here; use before_record_request 
166260        "filter_post_data_parameters" : [],
167261        "before_record_request" : sanitize_request ,
262+         "before_record_response" : sanitize_response ,
168263        "decode_compressed_response" : False ,  # Preserve exact response bytes 
169264    }
170265
0 commit comments