|
18 | 18 | import warnings
|
19 | 19 | from collections import defaultdict
|
20 | 20 | from timeit import default_timer
|
21 |
| -from typing import Any, Optional |
| 21 | +from typing import Any, Optional, AsyncGenerator, Callable, List |
| 22 | +import asyncio |
22 | 23 |
|
23 | 24 | from neo4j_graphrag.utils.logging import prettify
|
24 | 25 |
|
|
47 | 48 | )
|
48 | 49 | from neo4j_graphrag.experimental.pipeline.types.orchestration import RunResult
|
49 | 50 | from neo4j_graphrag.experimental.pipeline.types.context import RunContext
|
50 |
| -from neo4j_graphrag.experimental.pipeline.notification import EventCallbackProtocol |
| 51 | +from neo4j_graphrag.experimental.pipeline.notification import EventCallbackProtocol, Event |
51 | 52 |
|
52 | 53 |
|
53 | 54 | logger = logging.getLogger(__name__)
|
@@ -412,6 +413,64 @@ def validate_parameter_mapping_for_task(self, task: TaskPipelineNode) -> bool:
|
412 | 413 | async def get_final_results(self, run_id: str) -> dict[str, Any]:
|
413 | 414 | return await self.final_results.get(run_id) # type: ignore[no-any-return]
|
414 | 415 |
|
| 416 | + async def stream(self, data: dict[str, Any]) -> AsyncGenerator[Event, None]: |
| 417 | + """Run the pipeline and stream events for task progress. |
| 418 | + |
| 419 | + Args: |
| 420 | + data: Input data for the pipeline components |
| 421 | + |
| 422 | + Yields: |
| 423 | + Event: Pipeline and task events including start, progress, and completion |
| 424 | + """ |
| 425 | + # Create queue for events |
| 426 | + event_queue: asyncio.Queue[Event] = asyncio.Queue() |
| 427 | + |
| 428 | + # Store original callback |
| 429 | + original_callback = self.callback |
| 430 | + |
| 431 | + async def callback_and_event_stream(event: Event) -> None: |
| 432 | + # Put event in queue for streaming |
| 433 | + await event_queue.put(event) |
| 434 | + # Call original callback if it exists |
| 435 | + if original_callback: |
| 436 | + await original_callback(event) |
| 437 | + |
| 438 | + # Set up event callback |
| 439 | + self.callback = callback_and_event_stream |
| 440 | + |
| 441 | + try: |
| 442 | + # Start pipeline execution in background task |
| 443 | + run_task = asyncio.create_task(self.run(data)) |
| 444 | + |
| 445 | + while True: |
| 446 | + # Wait for next event or pipeline completion |
| 447 | + done, pending = await asyncio.wait( |
| 448 | + [run_task, event_queue.get()], |
| 449 | + return_when=asyncio.FIRST_COMPLETED |
| 450 | + ) |
| 451 | + |
| 452 | + # Pipeline finished |
| 453 | + if run_task in done: |
| 454 | + if run_task.exception(): |
| 455 | + raise run_task.exception() |
| 456 | + # Drain any remaining events |
| 457 | + while not event_queue.empty(): |
| 458 | + yield await event_queue.get() |
| 459 | + break |
| 460 | + |
| 461 | + # Got an event from queue |
| 462 | + event_future = next(f for f in done if f != run_task) |
| 463 | + try: |
| 464 | + event = event_future.result() |
| 465 | + yield event |
| 466 | + except Exception as e: |
| 467 | + logger.error(f"Error processing event: {e}") |
| 468 | + raise |
| 469 | + |
| 470 | + finally: |
| 471 | + # Restore original callback |
| 472 | + self.callback = original_callback |
| 473 | + |
415 | 474 | async def run(self, data: dict[str, Any]) -> PipelineResult:
|
416 | 475 | logger.debug("PIPELINE START")
|
417 | 476 | start_time = default_timer()
|
|
0 commit comments