Skip to content

Commit 0126dbf

Browse files
committed
Add unittest for mirrored provider clients (AnthropicVertex)
1 parent 28be452 commit 0126dbf

File tree

85 files changed

+17692
-10
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+17692
-10
lines changed

python/tests/e2e/conftest.py

Lines changed: 101 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
import hashlib
99
import inspect
10+
import json
11+
import re
1012
from collections.abc import Awaitable, Callable, Generator
1113
from copy import deepcopy
12-
from typing import TypedDict, get_args
14+
from typing import TYPE_CHECKING, TypedDict, get_args
1315
from typing_extensions import TypeIs
1416

1517
import httpx
@@ -21,10 +23,16 @@
2123
from vcr.stubs import httpx_stubs
2224

2325
from mirascope import llm
26+
from mirascope.llm.clients.anthropic_vertex import clients as anthropic_vertex_clients
27+
28+
if TYPE_CHECKING:
29+
from typing import Any
30+
2431

2532
PROVIDER_MODEL_ID_PAIRS: list[tuple[llm.Provider, llm.ModelId]] = [
2633
("anthropic", "claude-sonnet-4-0"),
2734
("anthropic-bedrock", "us.anthropic.claude-haiku-4-5-20251001-v1:0"),
35+
("anthropic-vertex", "claude-haiku-4-5@20251001"),
2836
("google", "gemini-2.5-flash"),
2937
("openai:completions", "gpt-4o"),
3038
("openai:responses", "gpt-4o"),
@@ -104,6 +112,15 @@ class VCRConfig(TypedDict, total=False):
104112
headers without affecting the actual HTTP requests.
105113
"""
106114

115+
before_record_response: Callable[[dict[str, Any]], dict[str, Any]]
116+
"""Callback to sanitize responses before saving to cassette.
117+
118+
This function is called AFTER the real HTTP response is received,
119+
but BEFORE it's written to the cassette file. Use this to sanitize sensitive
120+
data in response bodies (e.g., OAuth tokens) without affecting the actual
121+
HTTP responses received by the application.
122+
"""
123+
107124
decode_compressed_response: bool
108125
"""Whether to decode compressed responses.
109126
@@ -114,12 +131,15 @@ class VCRConfig(TypedDict, total=False):
114131

115132

116133
def sanitize_request(request: VCRRequest) -> VCRRequest:
117-
"""Sanitize sensitive headers in VCR request before recording to cassette.
134+
"""Sanitize sensitive headers and OAuth tokens in VCR request before recording.
118135
119136
This hook is called AFTER the real HTTP request is sent (with valid auth),
120137
but BEFORE it's written to the cassette file. We deep copy the request
121138
and replace sensitive headers with placeholders.
122139
140+
Also sanitizes OAuth token refresh requests to Google's OAuth2 endpoint,
141+
which contain sensitive refresh_token, client_id, and client_secret.
142+
123143
Args:
124144
request: VCR request object to sanitize
125145
@@ -132,9 +152,81 @@ def sanitize_request(request: VCRRequest) -> VCRRequest:
132152
if header in request.headers:
133153
request.headers[header] = ["<filtered>"]
134154

155+
if "oauth2.googleapis.com/token" in request.uri and request.body:
156+
body_str = (
157+
request.body.decode() if isinstance(request.body, bytes) else request.body
158+
)
159+
body_str = re.sub(r"refresh_token=[^&]+", "refresh_token=<filtered>", body_str)
160+
body_str = re.sub(r"client_secret=[^&]+", "client_secret=<filtered>", body_str)
161+
request.body = body_str
162+
135163
return request
136164

137165

166+
def sanitize_response(response: dict[str, Any]) -> dict[str, Any]:
167+
"""Sanitize sensitive tokens in VCR response before recording to cassette.
168+
169+
This hook is called AFTER the real HTTP response is received,
170+
but BEFORE it's written to the cassette file. We sanitize OAuth tokens
171+
from Google OAuth2 responses (used by AnthropicVertex).
172+
173+
Args:
174+
response: VCR response dict to sanitize
175+
176+
Returns:
177+
Sanitized copy of the response safe for cassette storage
178+
"""
179+
response = deepcopy(response)
180+
181+
if "body" in response and "string" in response["body"]:
182+
body_str = response["body"]["string"]
183+
if isinstance(body_str, bytes):
184+
try:
185+
body_str = body_str.decode()
186+
except UnicodeDecodeError:
187+
# Body is likely compressed (gzip) or binary data
188+
# Skip sanitization for these responses
189+
return response
190+
191+
if "access_token" in body_str or "id_token" in body_str:
192+
try:
193+
body_json = json.loads(body_str)
194+
if "access_token" in body_json:
195+
body_json["access_token"] = "<filtered>"
196+
if "id_token" in body_json:
197+
body_json["id_token"] = "<filtered>"
198+
if "refresh_token" in body_json:
199+
body_json["refresh_token"] = "<filtered>"
200+
response["body"]["string"] = json.dumps(body_json).encode()
201+
except (json.JSONDecodeError, KeyError):
202+
body_str = re.sub(
203+
r'"access_token":\s*"[^"]+"',
204+
'"access_token": "<filtered>"',
205+
body_str,
206+
)
207+
body_str = re.sub(
208+
r'"id_token":\s*"[^"]+"', '"id_token": "<filtered>"', body_str
209+
)
210+
body_str = re.sub(
211+
r'"refresh_token":\s*"[^"]+"',
212+
'"refresh_token": "<filtered>"',
213+
body_str,
214+
)
215+
response["body"]["string"] = body_str.encode()
216+
217+
return response
218+
219+
220+
@pytest.fixture(autouse=True)
221+
def _clear_anthropic_vertex_cache() -> Generator[None, None, None]:
222+
"""Ensure Anthropic Vertex clients do not reuse cached credentials between tests."""
223+
anthropic_vertex_clients.clear_cache()
224+
try:
225+
yield
226+
finally:
227+
anthropic_vertex_clients.clear_cache()
228+
229+
138230
@pytest.fixture(scope="session")
139231
def vcr_config() -> VCRConfig:
140232
"""VCR configuration for all API tests.
@@ -144,13 +236,15 @@ def vcr_config() -> VCRConfig:
144236
- OpenAI (authorization header)
145237
- Google/Gemini (x-goog-api-key header)
146238
- Anthropic (x-api-key, anthropic-organization-id headers)
239+
- AnthropicVertex (Google OAuth tokens in request/response bodies)
147240
- AWS Bedrock (AWS SigV4 headers: authorization, x-amz-*)
148241
149242
Note:
150-
We use before_record_request hook for sanitizing sensitive headers.
151-
This ensures the real HTTP requests (with valid auth) are sent
152-
successfully, but sensitive headers are replaced with placeholders
153-
in the cassette files.
243+
We use before_record_request hook for sanitizing sensitive headers
244+
and OAuth request bodies. We use before_record_response hook for
245+
sanitizing OAuth tokens in response bodies (e.g., access_token, id_token).
246+
This ensures the real HTTP requests/responses (with valid auth) work
247+
correctly, but sensitive data is replaced with placeholders in cassettes.
154248
155249
We use 'raw_body' in match_on for exact binary matching and
156250
decode_compressed_response=False to preserve exact response bytes
@@ -165,6 +259,7 @@ def vcr_config() -> VCRConfig:
165259
"filter_headers": [], # Don't filter here; use before_record_request
166260
"filter_post_data_parameters": [],
167261
"before_record_request": sanitize_request,
262+
"before_record_response": sanitize_response,
168263
"decode_compressed_response": False, # Preserve exact response bytes
169264
}
170265

0 commit comments

Comments
 (0)