2
2
from contextlib import suppress
3
3
from threading import BoundedSemaphore , Thread
4
4
from threading import Event as ThreadEvent
5
- from types import TracebackType
6
5
from typing import Optional
7
6
8
7
from fastapi_events .handlers .local import local_handler
30
29
from .session_processor_common import SessionProcessorStatus
31
30
32
31
33
- def get_stacktrace (exc_type : type , exc_value : BaseException , exc_traceback : TracebackType ) -> str :
34
- """Formats a stacktrace as a string"""
35
-
36
- return "" .join (traceback .format_exception (exc_type , exc_value , exc_traceback ))
37
-
38
-
39
32
class DefaultSessionRunner (SessionRunnerBase ):
40
33
"""Processes a single session's invocations"""
41
34
@@ -71,10 +64,16 @@ def run(self, queue_item: SessionQueueItem):
71
64
invocation = queue_item .session .next ()
72
65
# Anything other than a `NodeInputError` is handled as a processor error
73
66
except NodeInputError as e :
74
- # Must extract the exception traceback here to not lose its stacktrace when we change scope
75
- traceback = e .__traceback__
76
- assert traceback is not None
77
- self ._on_node_error (e .node , queue_item , type (e ), e , traceback )
67
+ error_type = e .__class__ .__name__
68
+ error_message = str (e )
69
+ error_traceback = traceback .format_exc ()
70
+ self ._on_node_error (
71
+ invocation = e .node ,
72
+ queue_item = queue_item ,
73
+ error_type = error_type ,
74
+ error_message = error_message ,
75
+ error_traceback = error_traceback ,
76
+ )
78
77
break
79
78
80
79
if invocation is None or self ._cancel_event .is_set ():
@@ -126,10 +125,16 @@ def run_node(self, invocation: BaseInvocation, queue_item: SessionQueueItem):
126
125
# loop go to its next iteration, and the cancel event will be handled correctly.
127
126
pass
128
127
except Exception as e :
129
- # Must extract the exception traceback here to not lose its stacktrace when we change scope
130
- exc_traceback = e .__traceback__
131
- assert exc_traceback is not None
132
- self ._on_node_error (invocation , queue_item , type (e ), e , exc_traceback )
128
+ error_type = e .__class__ .__name__
129
+ error_message = str (e )
130
+ error_traceback = traceback .format_exc ()
131
+ self ._on_node_error (
132
+ invocation = invocation ,
133
+ queue_item = queue_item ,
134
+ error_type = error_type ,
135
+ error_message = error_message ,
136
+ error_traceback = error_traceback ,
137
+ )
133
138
134
139
def _on_before_run_session (self , queue_item : SessionQueueItem ) -> None :
135
140
# If profiling is enabled, start the profiler
@@ -166,7 +171,7 @@ def _on_after_run_session(self, queue_item: SessionQueueItem) -> None:
166
171
self ._services .performance_statistics .reset_stats ()
167
172
168
173
for callback in self ._on_after_run_session_callbacks :
169
- callback (queue_item )
174
+ callback (queue_item = queue_item )
170
175
171
176
def _on_before_run_node (self , invocation : BaseInvocation , queue_item : SessionQueueItem ):
172
177
"""Run before a node is executed"""
@@ -181,7 +186,7 @@ def _on_before_run_node(self, invocation: BaseInvocation, queue_item: SessionQue
181
186
)
182
187
183
188
for callback in self ._on_before_run_node_callbacks :
184
- callback (invocation , queue_item )
189
+ callback (invocation = invocation , queue_item = queue_item )
185
190
186
191
def _on_after_run_node (
187
192
self , invocation : BaseInvocation , queue_item : SessionQueueItem , output : BaseInvocationOutput
@@ -199,23 +204,23 @@ def _on_after_run_node(
199
204
)
200
205
201
206
for callback in self ._on_after_run_node_callbacks :
202
- callback (invocation , queue_item , output )
207
+ callback (invocation = invocation , queue_item = queue_item , output = output )
203
208
204
209
def _on_node_error (
205
210
self ,
206
211
invocation : BaseInvocation ,
207
212
queue_item : SessionQueueItem ,
208
- exc_type : type ,
209
- exc_value : BaseException ,
210
- exc_traceback : TracebackType ,
213
+ error_type : str ,
214
+ error_message : str ,
215
+ error_traceback : str ,
211
216
):
212
- stacktrace = get_stacktrace ( exc_type , exc_value , exc_traceback )
213
-
214
- queue_item .session .set_node_error (invocation .id , stacktrace )
217
+ # Node errors do not get the full traceback. Only the queue item gets the full traceback.
218
+ node_error = f" { error_type } : { error_message } "
219
+ queue_item .session .set_node_error (invocation .id , node_error )
215
220
self ._services .logger .error (
216
- f"Error while invoking session { queue_item .session_id } , invocation { invocation .id } ({ invocation .get_type ()} ): { exc_type . __name__ } "
221
+ f"Error while invoking session { queue_item .session_id } , invocation { invocation .id } ({ invocation .get_type ()} ): { error_message } "
217
222
)
218
- self ._services .logger .error (stacktrace )
223
+ self ._services .logger .error (error_traceback )
219
224
220
225
# Send error event
221
226
self ._services .events .emit_invocation_error (
@@ -225,14 +230,21 @@ def _on_node_error(
225
230
graph_execution_state_id = queue_item .session .id ,
226
231
node = invocation .model_dump (),
227
232
source_node_id = queue_item .session .prepared_source_mapping [invocation .id ],
228
- error_type = exc_type .__name__ ,
229
- error = stacktrace ,
233
+ error_type = error_type ,
234
+ error_message = error_message ,
235
+ error_traceback = error_traceback ,
230
236
user_id = getattr (queue_item , "user_id" , None ),
231
237
project_id = getattr (queue_item , "project_id" , None ),
232
238
)
233
239
234
240
for callback in self ._on_node_error_callbacks :
235
- callback (invocation , queue_item , exc_type , exc_value , exc_traceback )
241
+ callback (
242
+ invocation = invocation ,
243
+ queue_item = queue_item ,
244
+ error_type = error_type ,
245
+ error_message = error_message ,
246
+ error_traceback = error_traceback ,
247
+ )
236
248
237
249
238
250
class DefaultSessionProcessor (SessionProcessorBase ):
@@ -374,16 +386,25 @@ def _process(
374
386
self .session_runner .run (queue_item = self ._queue_item )
375
387
376
388
except Exception as e :
377
- # Must extract the exception traceback here to not lose its stacktrace when we change scope
378
- exc_traceback = e .__traceback__
379
- assert exc_traceback is not None
380
- self ._on_non_fatal_processor_error (self ._queue_item , type (e ), e , exc_traceback )
381
- # Immediately poll for next queue item
389
+ error_type = e .__class__ .__name__
390
+ error_message = str (e )
391
+ error_traceback = traceback .format_exc ()
392
+ self ._on_non_fatal_processor_error (
393
+ queue_item = self ._queue_item ,
394
+ error_type = error_type ,
395
+ error_message = error_message ,
396
+ error_traceback = error_traceback ,
397
+ )
398
+ # Wait for next polling interval or event to try again
382
399
poll_now_event .wait (self ._polling_interval )
383
400
continue
384
- except Exception :
401
+ except Exception as e :
385
402
# Fatal error in processor, log and pass - we're done here
386
- self ._invoker .services .logger .error (f"Fatal Error in session processor:\n { traceback .format_exc ()} " )
403
+ error_type = e .__class__ .__name__
404
+ error_message = str (e )
405
+ error_traceback = traceback .format_exc ()
406
+ self ._invoker .services .logger .error (f"Fatal Error in session processor { error_type } : { error_message } " )
407
+ self ._invoker .services .logger .error (error_traceback )
387
408
pass
388
409
finally :
389
410
stop_event .clear ()
@@ -394,19 +415,29 @@ def _process(
394
415
def _on_non_fatal_processor_error (
395
416
self ,
396
417
queue_item : Optional [SessionQueueItem ],
397
- exc_type : type ,
398
- exc_value : BaseException ,
399
- exc_traceback : TracebackType ,
418
+ error_type : str ,
419
+ error_message : str ,
420
+ error_traceback : str ,
400
421
) -> None :
401
- stacktrace = get_stacktrace (exc_type , exc_value , exc_traceback )
402
422
# Non-fatal error in processor
403
- self ._invoker .services .logger .error (f"Non-fatal error in session processor: { exc_type .__name__ } " )
404
- self ._invoker .services .logger .error (stacktrace )
423
+ self ._invoker .services .logger .error (f"Non-fatal error in session processor { error_type } : { error_message } " )
424
+ self ._invoker .services .logger .error (error_traceback )
425
+
405
426
if queue_item is not None :
406
427
# Update the queue item with the completed session
407
428
self ._invoker .services .session_queue .set_queue_item_session (queue_item .item_id , queue_item .session )
408
- # And cancel the queue item with an error
409
- self ._invoker .services .session_queue .cancel_queue_item (queue_item .item_id , error = stacktrace )
429
+ # Fail the queue item
430
+ self ._invoker .services .session_queue .fail_queue_item (
431
+ item_id = queue_item .item_id ,
432
+ error_type = error_type ,
433
+ error_message = error_message ,
434
+ error_traceback = error_traceback ,
435
+ )
410
436
411
437
for callback in self ._on_non_fatal_processor_error_callbacks :
412
- callback (exc_type , exc_value , exc_traceback , queue_item )
438
+ callback (
439
+ queue_item = queue_item ,
440
+ error_type = error_type ,
441
+ error_message = error_message ,
442
+ error_traceback = error_traceback ,
443
+ )
0 commit comments