Skip to content

Commit 9afd28f

Browse files
committed
Nexus error chain workaround (#944)
* Always move first error message to top-level * Inject ApplicationError
1 parent 63c21fe commit 9afd28f

File tree

2 files changed

+36
-31
lines changed

2 files changed

+36
-31
lines changed

temporalio/worker/_nexus.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -337,38 +337,24 @@ async def _nexus_error_to_nexus_failure_proto(
337337
338338
See https://github.com/nexus-rpc/api/blob/main/SPEC.md#failure
339339
"""
340-
message = str(error)
341340
if cause := error.__cause__:
342341
try:
343342
failure = temporalio.api.failure.v1.Failure()
344343
await self._data_converter.encode_failure(cause, failure)
345-
# nexusrpc.HandlerError and nexusrpc.OperationError have their
346-
# own error messages and stack traces, independent of any cause
347-
# exception they may have, and it would be reasonable to expect
348-
# these to be propagated to the caller.
349-
#
350-
# In the case of OperationError (UnsuccessfulOperationError
351-
# proto), the server takes the message from the top-level
352-
# UnsuccessfulOperationError and replace the message of the
353-
# first entry in the details chain with it. Presumably the
354-
# server is anticipating that we've hoisted the message to that
355-
# position and is undoing the hoist. Therefore in that case, we
356-
# put the message from the first entry of the details chain at
357-
# the top level and accept that the message of the
358-
# OperationError itself will be lost.
359-
#
360-
# Note that other SDKs (e.g. Java) remove the message from the
361-
# first item in the details chain, since constructors are
362-
# controlled such that the nexus exception itself does not have
363-
# its own message.
364-
#
344+
# Following other SDKs, we move the message from the first item
345+
# in the details chain to the top level nexus.v1.Failure
346+
# message. In Go and Java this particularly makes sense since
347+
# their constructors are controlled such that the nexus
348+
# exception itself does not have its own message. However, in
349+
# Python, nexusrpc.HandlerError and nexusrpc.OperationError have
350+
# their own error messages and stack traces, independent of any
351+
# cause exception they may have, and this must be propagated to
352+
# the caller. See _exception_to_handler_error for how we address
353+
# this by injecting an additional error into the cause chain
354+
# before the current function is called.
365355
failure_dict = google.protobuf.json_format.MessageToDict(failure)
366-
if isinstance(error, nexusrpc.OperationError):
367-
message = failure_dict.pop("message", str(error))
368-
else:
369-
message = str(error)
370356
return temporalio.api.nexus.v1.Failure(
371-
message=message,
357+
message=failure_dict.pop("message", str(error)),
372358
metadata={"type": _TEMPORAL_FAILURE_PROTO_TYPE},
373359
details=json.dumps(
374360
failure_dict,
@@ -446,7 +432,21 @@ def _exception_to_handler_error(err: BaseException) -> nexusrpc.HandlerError:
446432
# Based on sdk-typescript's convertKnownErrors:
447433
# https://github.com/temporalio/sdk-typescript/blob/nexus/packages/worker/src/nexus.ts
448434
if isinstance(err, nexusrpc.HandlerError):
449-
return err
435+
# Insert an ApplicationError at the head of the cause chain to hold the
436+
# HandlerError's message and traceback. We do this because
437+
# _nexus_error_to_nexus_failure_proto moves the message at the head of
438+
# the cause chain to be the top-level nexus.Failure message. Therefore,
439+
# if we did not do this, then the HandlerError's own message and
440+
# traceback would be lost. (This hoisting behavior makes sense for Go
441+
# and Java since they control construction of HandlerError such that it
442+
# does not have its own message or stack trace.)
443+
handler_err = err
444+
err = ApplicationError(
445+
message=str(handler_err),
446+
non_retryable=not handler_err.retryable,
447+
)
448+
err.__traceback__ = handler_err.__traceback__
449+
err.__cause__ = handler_err.__cause__
450450
elif isinstance(err, ApplicationError):
451451
handler_err = nexusrpc.HandlerError(
452452
# TODO(nexus-preview): confirm what we want as message here

tests/nexus/test_workflow_caller_error_chains.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,14 @@ def action_in_nexus_operation():
174174
(
175175
ApplicationError,
176176
{
177-
# TODO(nexus-preview): empirically, this is "handler-error-message",
178-
# but it should be "runtime-error-message"
179-
# "message": "runtime-error-message",
177+
"message": "handler-error-message",
178+
"non_retryable": True,
179+
},
180+
),
181+
(
182+
ApplicationError,
183+
{
184+
"message": "runtime-error-message",
180185
"type": "RuntimeError",
181186
"non_retryable": False,
182187
},
@@ -234,7 +239,7 @@ def action_in_nexus_operation():
234239
(
235240
nexusrpc.HandlerError,
236241
{
237-
"message": "handler-error-message",
242+
"message": "handler-error-message-2",
238243
"type": nexusrpc.HandlerErrorType.UNAVAILABLE,
239244
"retryable": True,
240245
},

0 commit comments

Comments
 (0)