Skip to content

Commit 81c7ad2

Browse files
authored
Add application failure category (#1719)
1 parent 1bcb339 commit 81c7ad2

File tree

5 files changed

+95
-13
lines changed

5 files changed

+95
-13
lines changed

packages/common/src/converter/failure-converter.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ import {
33
ApplicationFailure,
44
CancelledFailure,
55
ChildWorkflowFailure,
6+
decodeApplicationFailureCategory,
67
decodeRetryState,
78
decodeTimeoutType,
9+
encodeApplicationFailureCategory,
810
encodeRetryState,
911
encodeTimeoutType,
1012
FAILURE_SOURCE,
@@ -127,7 +129,9 @@ export class DefaultFailureConverter implements FailureConverter {
127129
failure.applicationFailureInfo.type,
128130
Boolean(failure.applicationFailureInfo.nonRetryable),
129131
arrayFromPayloads(payloadConverter, failure.applicationFailureInfo.details?.payloads),
130-
this.optionalFailureToOptionalError(failure.cause, payloadConverter)
132+
this.optionalFailureToOptionalError(failure.cause, payloadConverter),
133+
undefined,
134+
decodeApplicationFailureCategory(failure.applicationFailureInfo.category)
131135
);
132136
}
133137
if (failure.serverFailureInfo) {
@@ -273,6 +277,7 @@ export class DefaultFailureConverter implements FailureConverter {
273277
? { payloads: toPayloads(payloadConverter, ...err.details) }
274278
: undefined,
275279
nextRetryDelay: msOptionalToTs(err.nextRetryDelay),
280+
category: encodeApplicationFailureCategory(err.category),
276281
},
277282
};
278283
}

packages/common/src/failure.ts

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,34 @@ export const [encodeRetryState, decodeRetryState] = makeProtoEnumConverters<
101101
'RETRY_STATE_'
102102
);
103103

104+
/**
105+
* A category to describe the severity and change the observability behavior of an application failure.
106+
*
107+
* Currently, observability behaviour changes are limited to:
108+
* - activities that fail due to a BENIGN application failure emit DEBUG level logs and do not record metrics
109+
*
110+
* @experimental Category is a new feature and may be subject to change.
111+
*/
112+
export const ApplicationFailureCategory = {
113+
BENIGN: 'BENIGN',
114+
} as const;
115+
116+
export type ApplicationFailureCategory = (typeof ApplicationFailureCategory)[keyof typeof ApplicationFailureCategory];
117+
118+
export const [encodeApplicationFailureCategory, decodeApplicationFailureCategory] = makeProtoEnumConverters<
119+
temporal.api.enums.v1.ApplicationErrorCategory,
120+
typeof temporal.api.enums.v1.ApplicationErrorCategory,
121+
keyof typeof temporal.api.enums.v1.ApplicationErrorCategory,
122+
typeof ApplicationFailureCategory,
123+
'APPLICATION_ERROR_CATEGORY_'
124+
>(
125+
{
126+
[ApplicationFailureCategory.BENIGN]: 1,
127+
UNSPECIFIED: 0,
128+
} as const,
129+
'APPLICATION_ERROR_CATEGORY_'
130+
);
131+
104132
export type WorkflowExecution = temporal.api.common.v1.IWorkflowExecution;
105133

106134
/**
@@ -172,7 +200,8 @@ export class ApplicationFailure extends TemporalFailure {
172200
public readonly nonRetryable?: boolean | undefined | null,
173201
public readonly details?: unknown[] | undefined | null,
174202
cause?: Error,
175-
public readonly nextRetryDelay?: Duration | undefined | null
203+
public readonly nextRetryDelay?: Duration | undefined | null,
204+
public readonly category?: ApplicationFailureCategory | undefined | null
176205
) {
177206
super(message, cause);
178207
}
@@ -195,8 +224,8 @@ export class ApplicationFailure extends TemporalFailure {
195224
* By default, will be retryable (unless its `type` is included in {@link RetryPolicy.nonRetryableErrorTypes}).
196225
*/
197226
public static create(options: ApplicationFailureOptions): ApplicationFailure {
198-
const { message, type, nonRetryable = false, details, nextRetryDelay, cause } = options;
199-
return new this(message, type, nonRetryable, details, cause, nextRetryDelay);
227+
const { message, type, nonRetryable = false, details, nextRetryDelay, cause, category } = options;
228+
return new this(message, type, nonRetryable, details, cause, nextRetryDelay, category);
200229
}
201230

202231
/**
@@ -261,6 +290,12 @@ export interface ApplicationFailureOptions {
261290
* Cause of the failure
262291
*/
263292
cause?: Error;
293+
294+
/**
295+
* Severity category of the application error.
296+
* Affects worker-side logging and metrics behavior of this failure.
297+
*/
298+
category?: ApplicationFailureCategory;
264299
}
265300

266301
/**

packages/core-bridge/src/worker.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -520,9 +520,7 @@ mod config {
520520
impl From<PollerBehavior> for CorePollerBehavior {
521521
fn from(val: PollerBehavior) -> Self {
522522
match val {
523-
PollerBehavior::SimpleMaximum { maximum } => {
524-
Self::SimpleMaximum(maximum)
525-
}
523+
PollerBehavior::SimpleMaximum { maximum } => Self::SimpleMaximum(maximum),
526524
PollerBehavior::Autoscaling {
527525
minimum,
528526
maximum,
@@ -771,10 +769,7 @@ mod custom_slot_supplier {
771769
slot_type: SK::kind().into(),
772770
task_queue: ctx.task_queue().to_string(),
773771
worker_identity: ctx.worker_identity().to_string(),
774-
worker_deployment_version: ctx
775-
.worker_deployment_version()
776-
.clone()
777-
.map(Into::into),
772+
worker_deployment_version: ctx.worker_deployment_version().clone().map(Into::into),
778773
is_sticky: ctx.is_sticky(),
779774
};
780775

packages/test/src/test-integration-workflows-with-recorded-logs.ts

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { ExecutionContext } from 'ava';
22
import * as workflow from '@temporalio/workflow';
3-
import { HandlerUnfinishedPolicy } from '@temporalio/common';
3+
import { ApplicationFailureCategory, HandlerUnfinishedPolicy } from '@temporalio/common';
44
import { LogEntry } from '@temporalio/worker';
55
import { WorkflowFailedError, WorkflowUpdateFailedError } from '@temporalio/client';
66
import { Context, helpers, makeTestFunction } from './helpers-integration';
@@ -469,3 +469,44 @@ async function assertWorkflowUpdateFailedBecauseWorkflowCompleted(t: ExecutionCo
469469
t.true((cause as workflow.ApplicationFailure).type === 'AcceptedUpdateCompletedWorkflow');
470470
t.regex((cause as workflow.ApplicationFailure).message, /Workflow completed before the Update completed/);
471471
}
472+
473+
export async function raiseErrorWorkflow(useBenign: boolean): Promise<void> {
474+
await workflow
475+
.proxyActivities({ startToCloseTimeout: '10s', retry: { maximumAttempts: 1 } })
476+
.throwApplicationFailureActivity(useBenign);
477+
}
478+
479+
test('Application failure category controls log level', async (t) => {
480+
const { createWorker, startWorkflow } = helpers(t);
481+
const worker = await createWorker({
482+
activities: {
483+
async throwApplicationFailureActivity(useBenign: boolean) {
484+
throw workflow.ApplicationFailure.create({
485+
category: useBenign ? ApplicationFailureCategory.BENIGN : undefined,
486+
});
487+
},
488+
},
489+
});
490+
491+
await worker.runUntil(async () => {
492+
// Run with BENIGN
493+
let handle = await startWorkflow(raiseErrorWorkflow, { args: [true] });
494+
try {
495+
await handle.result();
496+
} catch (_) {
497+
const logs = recordedLogs[handle.workflowId];
498+
const activityFailureLog = logs.find((log) => log.message.includes('Activity failed'));
499+
t.true(activityFailureLog !== undefined && activityFailureLog.level === 'DEBUG');
500+
}
501+
502+
// Run without BENIGN
503+
handle = await startWorkflow(raiseErrorWorkflow, { args: [false] });
504+
try {
505+
await handle.result();
506+
} catch (_) {
507+
const logs = recordedLogs[handle.workflowId];
508+
const activityFailureLog = logs.find((log) => log.message.includes('Activity failed'));
509+
t.true(activityFailureLog !== undefined && activityFailureLog.level === 'WARN');
510+
}
511+
});
512+
});

packages/worker/src/activity.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { asyncLocalStorage, CompleteAsyncError, Context, Info } from '@temporali
33
import {
44
ActivityFunction,
55
ApplicationFailure,
6+
ApplicationFailureCategory,
67
CancelledFailure,
78
ensureApplicationFailure,
89
FAILURE_SOURCE,
@@ -142,7 +143,12 @@ export class Activity {
142143
} else if (error instanceof CompleteAsyncError) {
143144
this.workerLogger.debug('Activity will complete asynchronously', { durationMs });
144145
} else {
145-
this.workerLogger.warn('Activity failed', { error, durationMs });
146+
if (error instanceof ApplicationFailure && error.category === ApplicationFailureCategory.BENIGN) {
147+
// Downgrade log level to DEBUG for benign application errors.
148+
this.workerLogger.debug('Activity failed', { error, durationMs });
149+
} else {
150+
this.workerLogger.warn('Activity failed', { error, durationMs });
151+
}
146152
}
147153
}
148154
}

0 commit comments

Comments
 (0)