fix: make LiteLLM streaming truly asynchronous

avidelatm · copybara-github · commit 2c64a307980a · 2025-06-23T10:30:12.000-07:00
Merge google#1451 ## Description Fixes google#1306 by using `async for` with `await self.llm_client.acompletion()` instead of synchronous `for` loop. ## Changes - Updated test mocks to properly handle async streaming by creating an async generator - Ensured proper parameter handling to avoid duplicate stream parameter ## Testing Plan - All unit tests now pass with the async streaming implementation - Verified with `pytest tests/unittests/models/test_litellm.py` that all streaming tests pass - Manually tested with a sample agent using LiteLLM to confirm streaming works properly # Test Evidence: https://youtu.be/hSp3otI79DM Let me know if you need anything else from me for this PR COPYBARA_INTEGRATE_REVIEW=google#1451 from avidelatm:fix/litellm-async-streaming d35b9dc PiperOrigin-RevId: 774835130
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -679,7 +679,7 @@ async def generate_content_async(
       aggregated_llm_response_with_tool_call = None
       usage_metadata = None
       fallback_index = 0
-      for part in self.llm_client.completion(**completion_args):
+      async for part in await self.llm_client.acompletion(**completion_args):
         for chunk, finish_reason in _model_response_to_chunk(part):
           if isinstance(chunk, FunctionChunk):
             index = chunk.index or fallback_index
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
@@ -416,9 +416,26 @@ def __init__(self, acompletion_mock, completion_mock):
     self.completion_mock = completion_mock
 
   async def acompletion(self, model, messages, tools, **kwargs):
-    return await self.acompletion_mock(
-        model=model, messages=messages, tools=tools, **kwargs
-    )
+    if kwargs.get("stream", False):
+      kwargs_copy = dict(kwargs)
+      kwargs_copy.pop("stream", None)
+
+      async def stream_generator():
+        stream_data = self.completion_mock(
+            model=model,
+            messages=messages,
+            tools=tools,
+            stream=True,
+            **kwargs_copy,
+        )
+        for item in stream_data:
+          yield item
+
+      return stream_generator()
+    else:
+      return await self.acompletion_mock(
+          model=model, messages=messages, tools=tools, **kwargs
+      )
 
   def completion(self, model, messages, tools, stream, **kwargs):
     return self.completion_mock(