Skip to content

Commit b40a7f4

Browse files
committed
Add unittest for mirrored provider clients (AnthropicVertex)
1 parent 550f5fa commit b40a7f4

File tree

83 files changed

+14197
-24
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+14197
-24
lines changed

python/tests/e2e/conftest.py

Lines changed: 99 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
import hashlib
99
import inspect
10+
import json
11+
import re
1012
from collections.abc import Awaitable, Callable, Generator
1113
from copy import deepcopy
12-
from typing import TypedDict, get_args
14+
from typing import TYPE_CHECKING, TypedDict, get_args
1315
from typing_extensions import TypeIs
1416

1517
import httpx
@@ -22,9 +24,15 @@
2224

2325
from mirascope import llm
2426

27+
if TYPE_CHECKING:
28+
from typing import Any
29+
30+
from vcr.response import Response as VCRResponse
31+
2532
PROVIDER_MODEL_ID_PAIRS: list[tuple[llm.Provider, llm.ModelId]] = [
2633
("anthropic", "claude-sonnet-4-0"),
2734
("anthropic-bedrock", "us.anthropic.claude-haiku-4-5-20251001-v1:0"),
35+
("anthropic-vertex", "claude-haiku-4-5@20251001"),
2836
("google", "gemini-2.5-flash"),
2937
("openai:completions", "gpt-4o"),
3038
("openai:responses", "gpt-4o"),
@@ -104,6 +112,15 @@ class VCRConfig(TypedDict, total=False):
104112
headers without affecting the actual HTTP requests.
105113
"""
106114

115+
before_record_response: Callable[[dict[str, Any]], dict[str, Any]]
116+
"""Callback to sanitize responses before saving to cassette.
117+
118+
This function is called AFTER the real HTTP response is received,
119+
but BEFORE it's written to the cassette file. Use this to sanitize sensitive
120+
data in response bodies (e.g., OAuth tokens) without affecting the actual
121+
HTTP responses received by the application.
122+
"""
123+
107124
decode_compressed_response: bool
108125
"""Whether to decode compressed responses.
109126
@@ -114,12 +131,15 @@ class VCRConfig(TypedDict, total=False):
114131

115132

116133
def sanitize_request(request: VCRRequest) -> VCRRequest:
117-
"""Sanitize sensitive headers in VCR request before recording to cassette.
134+
"""Sanitize sensitive headers and OAuth tokens in VCR request before recording.
118135
119136
This hook is called AFTER the real HTTP request is sent (with valid auth),
120137
but BEFORE it's written to the cassette file. We deep copy the request
121138
and replace sensitive headers with placeholders.
122139
140+
Also sanitizes OAuth token refresh requests to Google's OAuth2 endpoint,
141+
which contain sensitive refresh_token, client_id, and client_secret.
142+
123143
Args:
124144
request: VCR request object to sanitize
125145
@@ -132,9 +152,79 @@ def sanitize_request(request: VCRRequest) -> VCRRequest:
132152
if header in request.headers:
133153
request.headers[header] = ["<filtered>"]
134154

155+
if "oauth2.googleapis.com/token" in request.uri and request.body:
156+
body_str = request.body.decode() if isinstance(request.body, bytes) else request.body
157+
body_str = re.sub(
158+
r"refresh_token=[^&]+",
159+
"refresh_token=<filtered>",
160+
body_str
161+
)
162+
body_str = re.sub(
163+
r"client_secret=[^&]+",
164+
"client_secret=<filtered>",
165+
body_str
166+
)
167+
request.body = body_str
168+
135169
return request
136170

137171

172+
def sanitize_response(response: dict[str, Any]) -> dict[str, Any]:
173+
"""Sanitize sensitive tokens in VCR response before recording to cassette.
174+
175+
This hook is called AFTER the real HTTP response is received,
176+
but BEFORE it's written to the cassette file. We sanitize OAuth tokens
177+
from Google OAuth2 responses (used by AnthropicVertex).
178+
179+
Args:
180+
response: VCR response dict to sanitize
181+
182+
Returns:
183+
Sanitized copy of the response safe for cassette storage
184+
"""
185+
response = deepcopy(response)
186+
187+
if "body" in response and "string" in response["body"]:
188+
body_str = response["body"]["string"]
189+
if isinstance(body_str, bytes):
190+
try:
191+
body_str = body_str.decode()
192+
except UnicodeDecodeError:
193+
# Body is likely compressed (gzip) or binary data
194+
# Skip sanitization for these responses
195+
return response
196+
197+
if "access_token" in body_str or "id_token" in body_str:
198+
try:
199+
body_json = json.loads(body_str)
200+
if "access_token" in body_json:
201+
body_json["access_token"] = "<filtered>"
202+
if "id_token" in body_json:
203+
body_json["id_token"] = "<filtered>"
204+
if "refresh_token" in body_json:
205+
body_json["refresh_token"] = "<filtered>"
206+
response["body"]["string"] = json.dumps(body_json)
207+
except (json.JSONDecodeError, KeyError):
208+
body_str = re.sub(
209+
r'"access_token":\s*"[^"]+"',
210+
'"access_token": "<filtered>"',
211+
body_str
212+
)
213+
body_str = re.sub(
214+
r'"id_token":\s*"[^"]+"',
215+
'"id_token": "<filtered>"',
216+
body_str
217+
)
218+
body_str = re.sub(
219+
r'"refresh_token":\s*"[^"]+"',
220+
'"refresh_token": "<filtered>"',
221+
body_str
222+
)
223+
response["body"]["string"] = body_str
224+
225+
return response
226+
227+
138228
@pytest.fixture(scope="session")
139229
def vcr_config() -> VCRConfig:
140230
"""VCR configuration for all API tests.
@@ -144,13 +234,15 @@ def vcr_config() -> VCRConfig:
144234
- OpenAI (authorization header)
145235
- Google/Gemini (x-goog-api-key header)
146236
- Anthropic (x-api-key, anthropic-organization-id headers)
237+
- AnthropicVertex (Google OAuth tokens in request/response bodies)
147238
- AWS Bedrock (AWS SigV4 headers: authorization, x-amz-*)
148239
149240
Note:
150-
We use before_record_request hook for sanitizing sensitive headers.
151-
This ensures the real HTTP requests (with valid auth) are sent
152-
successfully, but sensitive headers are replaced with placeholders
153-
in the cassette files.
241+
We use before_record_request hook for sanitizing sensitive headers
242+
and OAuth request bodies. We use before_record_response hook for
243+
sanitizing OAuth tokens in response bodies (e.g., access_token, id_token).
244+
This ensures the real HTTP requests/responses (with valid auth) work
245+
correctly, but sensitive data is replaced with placeholders in cassettes.
154246
155247
We use 'raw_body' in match_on for exact binary matching and
156248
decode_compressed_response=False to preserve exact response bytes
@@ -165,6 +257,7 @@ def vcr_config() -> VCRConfig:
165257
"filter_headers": [], # Don't filter here; use before_record_request
166258
"filter_post_data_parameters": [],
167259
"before_record_request": sanitize_request,
260+
"before_record_response": sanitize_response,
168261
"decode_compressed_response": False, # Preserve exact response bytes
169262
}
170263

0 commit comments

Comments
 (0)