Skip to content

Commit f2a7eda

Browse files
fix(proxy_server.py): Fix "Circular reference detected" error when max_parallel_requests = 0 (#9671)
* fix(proxy_server.py): remove non-functional parent backoff/retry on /chat/completion Causes circular reference error * fix(http_parsing_utils.py): safely return parsed body - don't allow mutation of cached request body by client functions Root cause fix for circular reference error * Revert "fix: Anthropic prompt caching on GCP Vertex AI (#9605)" (#9670) This reverts commit a867324. * add type hints for AnthropicMessagesResponse * define types for response form AnthropicMessagesResponse * fix response typing * allow using litellm.messages.acreate and litellm.messages.create * fix anthropic_messages implementation * add clear type hints to litellm.messages.create functions * fix anthropic_messages * working anthropic API tests * fixes - anthropic messages interface * use new anthropic interface * fix code quality check * docs anthropic messages endpoint * add namespace_packages = True to mypy * fix mypy lint errors * docs anthropic messages interface * test: fix unit test * test(test_http_parsing_utils.py): update tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
1 parent 136f1d6 commit f2a7eda

File tree

4 files changed

+55
-16
lines changed

4 files changed

+55
-16
lines changed

litellm/proxy/common_request_processing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ async def base_process_llm_request(
123123
"""
124124
Common request processing logic for both chat completions and responses API endpoints
125125
"""
126+
126127
verbose_proxy_logger.debug(
127128
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
128129
)

litellm/proxy/common_utils/http_parsing_utils.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
8181
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
8282
if request is None:
8383
return None
84-
if hasattr(request, "scope") and "parsed_body" in request.scope:
85-
return request.scope["parsed_body"]
84+
if (
85+
hasattr(request, "scope")
86+
and "parsed_body" in request.scope
87+
and isinstance(request.scope["parsed_body"], tuple)
88+
):
89+
accepted_keys, parsed_body = request.scope["parsed_body"]
90+
return {key: parsed_body[key] for key in accepted_keys}
8691
return None
8792

8893

@@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
9398
try:
9499
if request is None:
95100
return
96-
request.scope["parsed_body"] = parsed_body
101+
request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
97102
except Exception as e:
98103
verbose_proxy_logger.debug(
99104
"Unexpected error setting request parsed body - {}".format(e)

litellm/proxy/proxy_server.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3308,15 +3308,6 @@ async def model_list(
33083308
tags=["chat/completions"],
33093309
responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
33103310
) # azure compatible endpoint
3311-
@backoff.on_exception(
3312-
backoff.expo,
3313-
Exception, # base exception to catch for the backoff
3314-
max_tries=global_max_parallel_request_retries, # maximum number of retries
3315-
max_time=global_max_parallel_request_retry_timeout, # maximum total time to retry for
3316-
on_backoff=on_backoff, # specifying the function to call on backoff
3317-
giveup=giveup,
3318-
logger=verbose_proxy_logger,
3319-
)
33203311
async def chat_completion( # noqa: PLR0915
33213312
request: Request,
33223313
fastapi_response: Response,

tests/litellm/proxy/common_utils/test_http_parsing_utils.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ async def test_request_body_caching():
3939
result1 = await _read_request_body(mock_request)
4040
assert result1 == test_data
4141
assert "parsed_body" in mock_request.scope
42-
assert mock_request.scope["parsed_body"] == test_data
42+
assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
4343

4444
# Verify the body was read once
4545
mock_request.body.assert_called_once()
@@ -49,7 +49,7 @@ async def test_request_body_caching():
4949

5050
# Second call should use the cached body
5151
result2 = await _read_request_body(mock_request)
52-
assert result2 == test_data
52+
assert result2 == {"key": "value"}
5353

5454
# Verify the body was not read again
5555
mock_request.body.assert_not_called()
@@ -75,7 +75,10 @@ async def test_form_data_parsing():
7575
# Verify the form data was correctly parsed
7676
assert result == test_data
7777
assert "parsed_body" in mock_request.scope
78-
assert mock_request.scope["parsed_body"] == test_data
78+
assert mock_request.scope["parsed_body"] == (
79+
("name", "message"),
80+
{"name": "test_user", "message": "hello world"},
81+
)
7982

8083
# Verify form() was called
8184
mock_request.form.assert_called_once()
@@ -101,7 +104,46 @@ async def test_empty_request_body():
101104
# Verify an empty dict is returned
102105
assert result == {}
103106
assert "parsed_body" in mock_request.scope
104-
assert mock_request.scope["parsed_body"] == {}
107+
assert mock_request.scope["parsed_body"] == ((), {})
105108

106109
# Verify the body was read
107110
mock_request.body.assert_called_once()
111+
112+
113+
@pytest.mark.asyncio
114+
async def test_circular_reference_handling():
115+
"""
116+
Test that cached request body isn't modified when the returned result is modified.
117+
Demonstrates the mutable dictionary reference issue.
118+
"""
119+
# Create a mock request with initial data
120+
mock_request = MagicMock()
121+
initial_body = {
122+
"model": "gpt-4",
123+
"messages": [{"role": "user", "content": "Hello"}],
124+
}
125+
126+
mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
127+
mock_request.headers = {"content-type": "application/json"}
128+
mock_request.scope = {}
129+
130+
# First parse
131+
result = await _read_request_body(mock_request)
132+
133+
# Verify initial parse
134+
assert result["model"] == "gpt-4"
135+
assert result["messages"] == [{"role": "user", "content": "Hello"}]
136+
137+
# Modify the result by adding proxy_server_request
138+
result["proxy_server_request"] = {
139+
"url": "http://0.0.0.0:4000/v1/chat/completions",
140+
"method": "POST",
141+
"headers": {"content-type": "application/json"},
142+
"body": result, # Creates circular reference
143+
}
144+
145+
# Second parse using the same request - will use the modified cached value
146+
result2 = await _read_request_body(mock_request)
147+
assert (
148+
"proxy_server_request" not in result2
149+
) # This will pass, showing the cache pollution

0 commit comments

Comments
 (0)