Mirascope · koxudaxi · Oct 24, 2025 · teamdandelion · Oct 29, 2025
@@ -1,13 +1,10 @@
 """Anthropic Vertex AI client implementations."""
 
-from .clients import (
-    AnthropicVertexClient,
-    client,
-    get_client,
-)
+from .clients import AnthropicVertexClient, clear_cache, client, get_client
 
 __all__ = [
     "AnthropicVertexClient",
+    "clear_cache",
     "client",
     "get_client",
 ]
@@ -2,6 +2,7 @@
 
 from .anthropic import clear_cache as clear_anthropic_cache
 from .anthropic_bedrock import clear_cache as clear_bedrock_cache
+from .anthropic_vertex import clear_cache as clear_vertex_cache
 from .azure_openai.completions import clear_cache as clear_azure_completions_cache
 from .azure_openai.responses import clear_cache as clear_azure_responses_cache
 from .google import clear_cache as clear_google_cache
@@ -18,6 +19,7 @@ def clear_all_client_caches() -> None:
 
     clear_anthropic_cache()
     clear_bedrock_cache()
+    clear_vertex_cache()
     clear_azure_completions_cache()
     clear_azure_responses_cache()
     clear_google_cache()

@@ -7,9 +7,11 @@
 
 import hashlib
 import inspect
+import json
+import re
 from collections.abc import Awaitable, Callable, Generator
 from copy import deepcopy
-from typing import TypedDict, get_args
+from typing import TYPE_CHECKING, Any, TypedDict, get_args
 from typing_extensions import TypeIs
 
 import httpx
@@ -23,9 +25,14 @@
 from mirascope import llm
 from mirascope.llm.clients import clear_all_client_caches
 
+if TYPE_CHECKING:
+    from typing import Any
+
+
 PROVIDER_MODEL_ID_PAIRS: list[tuple[llm.Provider, llm.ModelId]] = [
     ("anthropic", "claude-sonnet-4-0"),
     ("anthropic-bedrock", "us.anthropic.claude-haiku-4-5-20251001-v1:0"),
+    ("anthropic-vertex", "claude-haiku-4-5@20251001"),
     ("google", "gemini-2.5-flash"),
     ("openai:completions", "gpt-4o"),
     ("openai:responses", "gpt-4o"),
@@ -105,6 +112,15 @@ class VCRConfig(TypedDict, total=False):
     headers without affecting the actual HTTP requests.
     """
 
+    before_record_response: Callable[[dict[str, Any]], dict[str, Any]]
+    """Callback to sanitize responses before saving to cassette.
+
+    This function is called AFTER the real HTTP response is received,
+    but BEFORE it's written to the cassette file. Use this to sanitize sensitive
+    data in response bodies (e.g., OAuth tokens) without affecting the actual
+    HTTP responses received by the application.
+    """
+
     decode_compressed_response: bool
     """Whether to decode compressed responses.
 
@@ -115,12 +131,15 @@ class VCRConfig(TypedDict, total=False):
 
 
 def sanitize_request(request: VCRRequest) -> VCRRequest:
-    """Sanitize sensitive headers in VCR request before recording to cassette.
+    """Sanitize sensitive headers and OAuth tokens in VCR request before recording.
 
     This hook is called AFTER the real HTTP request is sent (with valid auth),
     but BEFORE it's written to the cassette file. We deep copy the request
     and replace sensitive headers with placeholders.
 
+    Also sanitizes OAuth token refresh requests to Google's OAuth2 endpoint,
+    which contain sensitive refresh_token, client_id, and client_secret.
+
     Args:
         request: VCR request object to sanitize
 
@@ -133,9 +152,71 @@ def sanitize_request(request: VCRRequest) -> VCRRequest:
         if header in request.headers:
             request.headers[header] = ["<filtered>"]
 
+    if "oauth2.googleapis.com/token" in request.uri and request.body:
+        body_str = (
+            request.body.decode() if isinstance(request.body, bytes) else request.body
+        )
+        body_str = re.sub(r"refresh_token=[^&]+", "refresh_token=<filtered>", body_str)
+        body_str = re.sub(r"client_secret=[^&]+", "client_secret=<filtered>", body_str)
+        request.body = body_str
+
     return request
 
 
+def sanitize_response(response: dict[str, Any]) -> dict[str, Any]:
+    """Sanitize sensitive tokens in VCR response before recording to cassette.
+
+    This hook is called AFTER the real HTTP response is received,
+    but BEFORE it's written to the cassette file. We sanitize OAuth tokens
+    from Google OAuth2 responses (used by AnthropicVertex).
+
+    Args:
+        response: VCR response dict to sanitize
+
+    Returns:
+        Sanitized copy of the response safe for cassette storage
+    """
+    response = deepcopy(response)
+
+    if "body" in response and "string" in response["body"]:
+        body_str = response["body"]["string"]
+        if isinstance(body_str, bytes):
+            try:
+                body_str = body_str.decode()
+            except UnicodeDecodeError:
+                # Body is likely compressed (gzip) or binary data
+                # Skip sanitization for these responses
+                return response
+
+        if "access_token" in body_str or "id_token" in body_str:
+            try:
+                body_json = json.loads(body_str)
+                if "access_token" in body_json:
+                    body_json["access_token"] = "<filtered>"
+                if "id_token" in body_json:
+                    body_json["id_token"] = "<filtered>"
+                if "refresh_token" in body_json:
+                    body_json["refresh_token"] = "<filtered>"
+                response["body"]["string"] = json.dumps(body_json).encode()
+            except (json.JSONDecodeError, KeyError):
+                body_str = re.sub(
+                    r'"access_token":\s*"[^"]+"',
+                    '"access_token": "<filtered>"',
+                    body_str,
+                )
+                body_str = re.sub(
+                    r'"id_token":\s*"[^"]+"', '"id_token": "<filtered>"', body_str
+                )
+                body_str = re.sub(
+                    r'"refresh_token":\s*"[^"]+"',
+                    '"refresh_token": "<filtered>"',
+                    body_str,
+                )
+                response["body"]["string"] = body_str.encode()
+
+    return response
+
+
 @pytest.fixture(autouse=True)
 def _clear_client_caches() -> Generator[None, None, None]:
     """Ensure cached LLM client singletons do not bleed across e2e tests."""
@@ -155,13 +236,15 @@ def vcr_config() -> VCRConfig:
     - OpenAI (authorization header)
     - Google/Gemini (x-goog-api-key header)
     - Anthropic (x-api-key, anthropic-organization-id headers)
+    - AnthropicVertex (Google OAuth tokens in request/response bodies)
     - AWS Bedrock (AWS SigV4 headers: authorization, x-amz-*)
 
     Note:
-        We use before_record_request hook for sanitizing sensitive headers.
-        This ensures the real HTTP requests (with valid auth) are sent
-        successfully, but sensitive headers are replaced with placeholders
-        in the cassette files.
+        We use before_record_request hook for sanitizing sensitive headers
+        and OAuth request bodies. We use before_record_response hook for
+        sanitizing OAuth tokens in response bodies (e.g., access_token, id_token).
+        This ensures the real HTTP requests/responses (with valid auth) work
+        correctly, but sensitive data is replaced with placeholders in cassettes.
 
         We use 'raw_body' in match_on for exact binary matching and
         decode_compressed_response=False to preserve exact response bytes
@@ -176,6 +259,7 @@ def vcr_config() -> VCRConfig:
         "filter_headers": [],  # Don't filter here; use before_record_request
         "filter_post_data_parameters": [],
         "before_record_request": sanitize_request,
+        "before_record_response": sanitize_response,
         "decode_compressed_response": False,  # Preserve exact response bytes
     }