Skip to content

Commit 7cb95bc

Browse files
authored
[Bug Fix] caching does not account for thinking or reasoning_effort config (#10140)
* _get_litellm_supported_chat_completion_kwargs * test caching with thinking
1 parent 104e4cb commit 7cb95bc

File tree

2 files changed

+75
-3
lines changed

2 files changed

+75
-3
lines changed

litellm/litellm_core_utils/model_param_helper.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,29 @@ def _get_all_llm_api_params() -> Set[str]:
7575
combined_kwargs = combined_kwargs.difference(exclude_kwargs)
7676
return combined_kwargs
7777

78+
@staticmethod
79+
def get_litellm_provider_specific_params_for_chat_params() -> Set[str]:
80+
return set(["thinking"])
81+
7882
@staticmethod
7983
def _get_litellm_supported_chat_completion_kwargs() -> Set[str]:
8084
"""
8185
Get the litellm supported chat completion kwargs
8286
8387
This follows the OpenAI API Spec
8488
"""
85-
all_chat_completion_kwargs = set(
89+
non_streaming_params: Set[str] = set(
8690
getattr(CompletionCreateParamsNonStreaming, "__annotations__", {}).keys()
87-
).union(
88-
set(getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys())
8991
)
92+
streaming_params: Set[str] = set(
93+
getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys()
94+
)
95+
litellm_provider_specific_params: Set[str] = (
96+
ModelParamHelper.get_litellm_provider_specific_params_for_chat_params()
97+
)
98+
all_chat_completion_kwargs: Set[str] = non_streaming_params.union(
99+
streaming_params
100+
).union(litellm_provider_specific_params)
90101
return all_chat_completion_kwargs
91102

92103
@staticmethod

tests/local_testing/test_caching.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,3 +2608,64 @@ def test_caching_with_reasoning_content():
26082608
print(f"response 2: {response_2.model_dump_json(indent=4)}")
26092609
assert response_2._hidden_params["cache_hit"] == True
26102610
assert response_2.choices[0].message.reasoning_content is not None
2611+
2612+
2613+
def test_caching_reasoning_args_miss(): # test in memory cache
2614+
try:
2615+
#litellm._turn_on_debug()
2616+
litellm.set_verbose = True
2617+
litellm.cache = Cache(
2618+
)
2619+
response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, reasoning_effort="low", mock_response="My response")
2620+
response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, mock_response="My response")
2621+
print(f"response1: {response1}")
2622+
print(f"response2: {response2}")
2623+
assert response1.id != response2.id
2624+
except Exception as e:
2625+
print(f"error occurred: {traceback.format_exc()}")
2626+
pytest.fail(f"Error occurred: {e}")
2627+
2628+
def test_caching_reasoning_args_hit(): # test in memory cache
2629+
try:
2630+
#litellm._turn_on_debug()
2631+
litellm.set_verbose = True
2632+
litellm.cache = Cache(
2633+
)
2634+
response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, reasoning_effort="low", mock_response="My response")
2635+
response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, reasoning_effort="low", mock_response="My response")
2636+
print(f"response1: {response1}")
2637+
print(f"response2: {response2}")
2638+
assert response1.id == response2.id
2639+
except Exception as e:
2640+
print(f"error occurred: {traceback.format_exc()}")
2641+
pytest.fail(f"Error occurred: {e}")
2642+
2643+
def test_caching_thinking_args_miss(): # test in memory cache
2644+
try:
2645+
#litellm._turn_on_debug()
2646+
litellm.set_verbose = True
2647+
litellm.cache = Cache(
2648+
)
2649+
response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, thinking={"type": "enabled", "budget_tokens": 1024}, mock_response="My response")
2650+
response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, mock_response="My response")
2651+
print(f"response1: {response1}")
2652+
print(f"response2: {response2}")
2653+
assert response1.id != response2.id
2654+
except Exception as e:
2655+
print(f"error occurred: {traceback.format_exc()}")
2656+
pytest.fail(f"Error occurred: {e}")
2657+
2658+
def test_caching_thinking_args_hit(): # test in memory cache
2659+
try:
2660+
#litellm._turn_on_debug()
2661+
litellm.set_verbose = True
2662+
litellm.cache = Cache(
2663+
)
2664+
response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, thinking={"type": "enabled", "budget_tokens": 1024}, mock_response="My response" )
2665+
response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, thinking={"type": "enabled", "budget_tokens": 1024}, mock_response="My response")
2666+
print(f"response1: {response1}")
2667+
print(f"response2: {response2}")
2668+
assert response1.id == response2.id
2669+
except Exception as e:
2670+
print(f"error occurred: {traceback.format_exc()}")
2671+
pytest.fail(f"Error occurred: {e}")

0 commit comments

Comments
 (0)