Skip to content

fix: Anthropic prompt caching on GCP Vertex AI #9605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions litellm/llms/anthropic/chat/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,15 @@ def get_anthropic_headers(
if user_anthropic_beta_headers is not None:
betas.update(user_anthropic_beta_headers)

# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
# Handle beta headers for Vertex AI
# We allow prompt caching beta header for Vertex, but exclude other beta headers that might cause issues
if is_vertex_request is True:
pass
vertex_safe_betas = set()
# Allow prompt caching beta header for Vertex
if "prompt-caching-2024-07-31" in betas:
vertex_safe_betas.add("prompt-caching-2024-07-31")
if len(vertex_safe_betas) > 0:
headers["anthropic-beta"] = ",".join(vertex_safe_betas)
elif len(betas) > 0:
headers["anthropic-beta"] = ",".join(betas)

Expand Down
137 changes: 137 additions & 0 deletions tests/litellm/llms/vertex_ai/test_vertex_anthropic_prompt_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import os
import sys
from unittest.mock import MagicMock, patch

import pytest

sys.path.insert(
0, os.path.abspath("../../../..")
) # Adds the parent directory to the system path

from litellm.llms.anthropic.chat.transformation import AnthropicConfig


def test_anthropic_prompt_caching_headers_for_vertex():
"""
Test that the prompt caching beta header is correctly set for Vertex AI requests
with Anthropic models when cache control is present in the messages.
"""
# Create an instance of AnthropicConfig
config = AnthropicConfig()

# Test case 1: Vertex request with prompt caching
# Create a message with cache control
messages = [
{
"role": "system",
"content": "You are a helpful assistant.",
"cache_control": {"type": "ephemeral"}
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]

# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages)
assert is_cache_control_set is True, "Cache control should be detected in messages"

# Generate headers for a Vertex AI request with prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)

# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"

# Test case 2: Vertex request without prompt caching
messages_without_cache = [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]

# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages_without_cache)
assert is_cache_control_set is False, "Cache control should not be detected in messages"

# Generate headers for a Vertex AI request without prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)

# Verify that the anthropic-beta header is not set
assert "anthropic-beta" not in headers, "anthropic-beta header should not be present"


def test_anthropic_prompt_caching_with_content_blocks():
"""
Test that prompt caching is correctly detected when cache control is in content blocks.
"""
config = AnthropicConfig()

# Message with cache control in content blocks
messages = [
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are a helpful assistant.",
"cache_control": {"type": "ephemeral"}
}
]
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]

# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages)
assert is_cache_control_set is True, "Cache control should be detected in content blocks"

# Generate headers for a Vertex AI request with prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)

# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"


def test_anthropic_vertex_other_beta_headers():
"""
Test that other beta headers are not included for Vertex AI requests.
"""
config = AnthropicConfig()

# Generate headers with multiple beta features
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=True,
computer_tool_used=True, # This should be excluded for Vertex
pdf_used=True, # This should be excluded for Vertex
is_vertex_request=True
)

# Verify that only prompt-caching is included in the beta header
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert headers["anthropic-beta"] == "prompt-caching-2024-07-31", "Only prompt-caching should be in the beta header"
assert "computer-use-2024-10-22" not in headers["anthropic-beta"], "computer-use beta should not be included"
assert "pdfs-2024-09-25" not in headers["anthropic-beta"], "pdfs beta should not be included"
Loading