77
88import  hashlib 
99import  inspect 
10+ import  json 
11+ import  re 
1012from  collections .abc  import  Awaitable , Callable , Generator 
1113from  copy  import  deepcopy 
12- from  typing  import  TypedDict , get_args 
14+ from  typing  import  TYPE_CHECKING ,  TypedDict , get_args 
1315from  typing_extensions  import  TypeIs 
1416
1517import  httpx 
2224
2325from  mirascope  import  llm 
2426
27+ if  TYPE_CHECKING :
28+     from  typing  import  Any 
29+ 
30+     from  vcr .response  import  Response  as  VCRResponse 
31+ 
2532PROVIDER_MODEL_ID_PAIRS : list [tuple [llm .Provider , llm .ModelId ]] =  [
2633    ("anthropic" , "claude-sonnet-4-0" ),
2734    ("anthropic-bedrock" , "us.anthropic.claude-haiku-4-5-20251001-v1:0" ),
35+     ("anthropic-vertex" , "claude-haiku-4-5@20251001" ),
2836    ("google" , "gemini-2.5-flash" ),
2937    ("openai:completions" , "gpt-4o" ),
3038    ("openai:responses" , "gpt-4o" ),
@@ -104,6 +112,15 @@ class VCRConfig(TypedDict, total=False):
104112    headers without affecting the actual HTTP requests. 
105113    """ 
106114
115+     before_record_response : Callable [[dict [str , Any ]], dict [str , Any ]]
116+     """Callback to sanitize responses before saving to cassette. 
117+ 
118+     This function is called AFTER the real HTTP response is received, 
119+     but BEFORE it's written to the cassette file. Use this to sanitize sensitive 
120+     data in response bodies (e.g., OAuth tokens) without affecting the actual 
121+     HTTP responses received by the application. 
122+     """ 
123+ 
107124    decode_compressed_response : bool 
108125    """Whether to decode compressed responses. 
109126
@@ -114,12 +131,15 @@ class VCRConfig(TypedDict, total=False):
114131
115132
116133def  sanitize_request (request : VCRRequest ) ->  VCRRequest :
117-     """Sanitize sensitive headers in VCR request before recording to cassette . 
134+     """Sanitize sensitive headers and OAuth tokens  in VCR request before recording. 
118135
119136    This hook is called AFTER the real HTTP request is sent (with valid auth), 
120137    but BEFORE it's written to the cassette file. We deep copy the request 
121138    and replace sensitive headers with placeholders. 
122139
140+     Also sanitizes OAuth token refresh requests to Google's OAuth2 endpoint, 
141+     which contain sensitive refresh_token, client_id, and client_secret. 
142+ 
123143    Args: 
124144        request: VCR request object to sanitize 
125145
@@ -132,9 +152,79 @@ def sanitize_request(request: VCRRequest) -> VCRRequest:
132152        if  header  in  request .headers :
133153            request .headers [header ] =  ["<filtered>" ]
134154
155+     if  "oauth2.googleapis.com/token"  in  request .uri  and  request .body :
156+         body_str  =  request .body .decode () if  isinstance (request .body , bytes ) else  request .body 
157+         body_str  =  re .sub (
158+             r"refresh_token=[^&]+" ,
159+             "refresh_token=<filtered>" ,
160+             body_str 
161+         )
162+         body_str  =  re .sub (
163+             r"client_secret=[^&]+" ,
164+             "client_secret=<filtered>" ,
165+             body_str 
166+         )
167+         request .body  =  body_str 
168+ 
135169    return  request 
136170
137171
172+ def  sanitize_response (response : dict [str , Any ]) ->  dict [str , Any ]:
173+     """Sanitize sensitive tokens in VCR response before recording to cassette. 
174+ 
175+     This hook is called AFTER the real HTTP response is received, 
176+     but BEFORE it's written to the cassette file. We sanitize OAuth tokens 
177+     from Google OAuth2 responses (used by AnthropicVertex). 
178+ 
179+     Args: 
180+         response: VCR response dict to sanitize 
181+ 
182+     Returns: 
183+         Sanitized copy of the response safe for cassette storage 
184+     """ 
185+     response  =  deepcopy (response )
186+ 
187+     if  "body"  in  response  and  "string"  in  response ["body" ]:
188+         body_str  =  response ["body" ]["string" ]
189+         if  isinstance (body_str , bytes ):
190+             try :
191+                 body_str  =  body_str .decode ()
192+             except  UnicodeDecodeError :
193+                 # Body is likely compressed (gzip) or binary data 
194+                 # Skip sanitization for these responses 
195+                 return  response 
196+ 
197+         if  "access_token"  in  body_str  or  "id_token"  in  body_str :
198+             try :
199+                 body_json  =  json .loads (body_str )
200+                 if  "access_token"  in  body_json :
201+                     body_json ["access_token" ] =  "<filtered>" 
202+                 if  "id_token"  in  body_json :
203+                     body_json ["id_token" ] =  "<filtered>" 
204+                 if  "refresh_token"  in  body_json :
205+                     body_json ["refresh_token" ] =  "<filtered>" 
206+                 response ["body" ]["string" ] =  json .dumps (body_json )
207+             except  (json .JSONDecodeError , KeyError ):
208+                 body_str  =  re .sub (
209+                     r'"access_token":\s*"[^"]+"' ,
210+                     '"access_token": "<filtered>"' ,
211+                     body_str 
212+                 )
213+                 body_str  =  re .sub (
214+                     r'"id_token":\s*"[^"]+"' ,
215+                     '"id_token": "<filtered>"' ,
216+                     body_str 
217+                 )
218+                 body_str  =  re .sub (
219+                     r'"refresh_token":\s*"[^"]+"' ,
220+                     '"refresh_token": "<filtered>"' ,
221+                     body_str 
222+                 )
223+                 response ["body" ]["string" ] =  body_str 
224+ 
225+     return  response 
226+ 
227+ 
138228@pytest .fixture (scope = "session" ) 
139229def  vcr_config () ->  VCRConfig :
140230    """VCR configuration for all API tests. 
@@ -144,13 +234,15 @@ def vcr_config() -> VCRConfig:
144234    - OpenAI (authorization header) 
145235    - Google/Gemini (x-goog-api-key header) 
146236    - Anthropic (x-api-key, anthropic-organization-id headers) 
237+     - AnthropicVertex (Google OAuth tokens in request/response bodies) 
147238    - AWS Bedrock (AWS SigV4 headers: authorization, x-amz-*) 
148239
149240    Note: 
150-         We use before_record_request hook for sanitizing sensitive headers. 
151-         This ensures the real HTTP requests (with valid auth) are sent 
152-         successfully, but sensitive headers are replaced with placeholders 
153-         in the cassette files. 
241+         We use before_record_request hook for sanitizing sensitive headers 
242+         and OAuth request bodies. We use before_record_response hook for 
243+         sanitizing OAuth tokens in response bodies (e.g., access_token, id_token). 
244+         This ensures the real HTTP requests/responses (with valid auth) work 
245+         correctly, but sensitive data is replaced with placeholders in cassettes. 
154246
155247        We use 'raw_body' in match_on for exact binary matching and 
156248        decode_compressed_response=False to preserve exact response bytes 
@@ -165,6 +257,7 @@ def vcr_config() -> VCRConfig:
165257        "filter_headers" : [],  # Don't filter here; use before_record_request 
166258        "filter_post_data_parameters" : [],
167259        "before_record_request" : sanitize_request ,
260+         "before_record_response" : sanitize_response ,
168261        "decode_compressed_response" : False ,  # Preserve exact response bytes 
169262    }
170263
0 commit comments