Allow reusing the stream listener (#8461)

chenmoneygithub · web-flow · commit 64881c76ec84 · 2025-06-25T17:30:15.000-04:00
* Allow reusing the stream listner

* split out
diff --git a/dspy/streaming/streaming_listener.py b/dspy/streaming/streaming_listener.py
@@ -17,14 +17,22 @@
 class StreamListener:
     """Class that listens to the stream to capture the streeaming of a specific output field of a predictor."""
 
-    def __init__(self, signature_field_name: str, predict: Any = None, predict_name: str | None = None):
+    def __init__(
+        self,
+        signature_field_name: str,
+        predict: Any = None,
+        predict_name: str | None = None,
+        allow_reuse: bool = False,
+    ):
         """
         Args:
             signature_field_name: The name of the field to listen to.
             predict: The predictor to listen to. If None, when calling `streamify()` it will automatically look for
                 the predictor that has the `signature_field_name` in its signature.
             predict_name: The name of the predictor to listen to. If None, when calling `streamify()` it will
                 automatically look for the predictor that has the `signature_field_name` in its signature.
+            allow_reuse: If True, the stream listener can be reused for multiple streams. Please note that this could
+                hurt the performance because the same stream chunk is sent to multiple listeners.
         """
         self.signature_field_name = signature_field_name
         self.predict = predict
@@ -35,6 +43,7 @@ def __init__(self, signature_field_name: str, predict: Any = None, predict_name:
         self.stream_start = False
         self.stream_end = False
         self.cache_hit = False
+        self.allow_reuse = allow_reuse
 
         self.json_adapter_start_identifier = f'"{self.signature_field_name}":'
         self.json_adapter_end_identifier = re.compile(r"\w*\"(,|\s*})")
@@ -53,7 +62,7 @@ def receive(self, chunk: ModelResponseStream):
             start_identifier = self.json_adapter_start_identifier
             end_identifier = self.json_adapter_end_identifier
 
-            start_indicator = "{"
+            start_indicator = '"'
         elif isinstance(settings.adapter, ChatAdapter) or settings.adapter is None:
             start_identifier = self.chat_adapter_start_identifier
             end_identifier = self.chat_adapter_end_identifier
@@ -66,7 +75,15 @@ def receive(self, chunk: ModelResponseStream):
             )
 
         if self.stream_end:
-            return
+            if self.allow_reuse:
+                # Clear up the state for the next stream.
+                self.stream_end = False
+                self.cache_hit = False
+                self.field_start_queue = []
+                self.field_end_queue = Queue()
+                self.stream_start = False
+            else:
+                return
 
         try:
             chunk_message = chunk.choices[0].delta.content
diff --git a/tests/streaming/test_streaming.py b/tests/streaming/test_streaming.py
@@ -707,3 +707,59 @@ async def aforward(self, question, **kwargs):
     # There should be ~1 second delay between the tool start and end messages because we explicitly sleep for 1 second
     # in the tool.
     assert timestamps[1] - timestamps[0] >= 1
+
+
+@pytest.mark.anyio
+async def test_stream_listener_allow_reuse():
+    class MyProgram(dspy.Module):
+        def __init__(self):
+            super().__init__()
+            self.predict = dspy.Predict("question->answer")
+
+        def forward(self, question, **kwargs):
+            self.predict(question=question, **kwargs)
+            return self.predict(question=question, **kwargs)
+
+    program = dspy.streamify(
+        MyProgram(),
+        stream_listeners=[
+            dspy.streaming.StreamListener(signature_field_name="answer", allow_reuse=True),
+        ],
+    )
+
+    async def gpt_4o_mini_stream(*args, **kwargs):
+        # Recorded streaming from openai/gpt-4o-mini
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content="[["))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" ##"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" answer"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" ##"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" ]]\n\n"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content="To"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" get"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" to"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" the"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" other"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" side"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content="!"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content="\n\n"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content="[[ ##"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" completed"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" ##"))])
+        yield ModelResponseStream(model="gpt-4o-mini", choices=[StreamingChoices(delta=Delta(content=" ]]"))])
+
+    stream_generators = [gpt_4o_mini_stream, gpt_4o_mini_stream]
+
+    async def completion_side_effect(*args, **kwargs):
+        return stream_generators.pop(0)()  # return new async generator instance
+
+    with mock.patch("litellm.acompletion", side_effect=completion_side_effect):
+        with dspy.context(lm=dspy.LM("openai/gpt-4o-mini", cache=False)):
+            output = program(question="why did a chicken cross the kitchen?")
+            all_chunks = []
+            async for value in output:
+                if isinstance(value, dspy.streaming.StreamResponse):
+                    all_chunks.append(value)
+
+    concat_message = "".join([chunk.chunk for chunk in all_chunks])
+    # The listener functions twice.
+    assert concat_message == "To get to the other side!To get to the other side!"