Skip to content

Commit a2c2d92

Browse files
authored
Use redis worker for run heartbeats and alerts (#1669)
* Move the task run heartbeats to RedisWorker * Move alerts to redis worker, improving redis worker * Fix typecheck errors * Use single threaded tests for redis worker * Enable/disable the redis workers independently * Remove preview release from PR checks
1 parent b946b9f commit a2c2d92

33 files changed

+796
-502
lines changed

.github/workflows/pr_checks.yml

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -28,34 +28,3 @@ jobs:
2828
with:
2929
package: cli-v3
3030
secrets: inherit
31-
32-
preview-release:
33-
name: Preview Release
34-
needs: [typecheck, units, e2e]
35-
if: github.repository == 'triggerdotdev/trigger.dev'
36-
runs-on: ubuntu-latest
37-
steps:
38-
- name: ⬇️ Checkout repo
39-
uses: actions/checkout@v4
40-
with:
41-
fetch-depth: 0
42-
43-
- name: ⎔ Setup pnpm
44-
uses: pnpm/action-setup@v4
45-
with:
46-
version: 8.15.5
47-
48-
- name: ⎔ Setup node
49-
uses: buildjet/setup-node@v4
50-
with:
51-
node-version: 20.11.1
52-
cache: "pnpm"
53-
54-
- name: 📥 Download deps
55-
run: pnpm install --frozen-lockfile
56-
57-
- name: 🏗️ Build
58-
run: pnpm run build --filter "@trigger.dev/*" --filter "trigger.dev"
59-
60-
- name: ⚡ Publish preview release
61-
run: npx pkg-pr-new publish --no-template $(ls -d ./packages/*)

apps/webapp/app/env.server.ts

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,82 @@ const EnvironmentSchema = z.object({
368368
BATCH_METADATA_OPERATIONS_FLUSH_INTERVAL_MS: z.coerce.number().int().default(1000),
369369
BATCH_METADATA_OPERATIONS_FLUSH_ENABLED: z.string().default("1"),
370370
BATCH_METADATA_OPERATIONS_FLUSH_LOGGING_ENABLED: z.string().default("1"),
371+
372+
LEGACY_RUN_ENGINE_WORKER_ENABLED: z.string().default(process.env.WORKER_ENABLED ?? "true"),
373+
LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_WORKERS: z.coerce.number().int().default(2),
374+
LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_TASKS_PER_WORKER: z.coerce.number().int().default(1),
375+
LEGACY_RUN_ENGINE_WORKER_POLL_INTERVAL: z.coerce.number().int().default(1000),
376+
LEGACY_RUN_ENGINE_WORKER_IMMEDIATE_POLL_INTERVAL: z.coerce.number().int().default(50),
377+
LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_LIMIT: z.coerce.number().int().default(100),
378+
379+
LEGACY_RUN_ENGINE_WORKER_REDIS_HOST: z
380+
.string()
381+
.optional()
382+
.transform((v) => v ?? process.env.REDIS_HOST),
383+
LEGACY_RUN_ENGINE_WORKER_REDIS_READER_HOST: z
384+
.string()
385+
.optional()
386+
.transform((v) => v ?? process.env.REDIS_READER_HOST),
387+
LEGACY_RUN_ENGINE_WORKER_REDIS_READER_PORT: z.coerce
388+
.number()
389+
.optional()
390+
.transform(
391+
(v) =>
392+
v ?? (process.env.REDIS_READER_PORT ? parseInt(process.env.REDIS_READER_PORT) : undefined)
393+
),
394+
LEGACY_RUN_ENGINE_WORKER_REDIS_PORT: z.coerce
395+
.number()
396+
.optional()
397+
.transform((v) => v ?? (process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT) : undefined)),
398+
LEGACY_RUN_ENGINE_WORKER_REDIS_USERNAME: z
399+
.string()
400+
.optional()
401+
.transform((v) => v ?? process.env.REDIS_USERNAME),
402+
LEGACY_RUN_ENGINE_WORKER_REDIS_PASSWORD: z
403+
.string()
404+
.optional()
405+
.transform((v) => v ?? process.env.REDIS_PASSWORD),
406+
LEGACY_RUN_ENGINE_WORKER_REDIS_TLS_DISABLED: z
407+
.string()
408+
.default(process.env.REDIS_TLS_DISABLED ?? "false"),
409+
LEGACY_RUN_ENGINE_WORKER_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"),
410+
411+
COMMON_WORKER_ENABLED: z.string().default(process.env.WORKER_ENABLED ?? "true"),
412+
COMMON_WORKER_CONCURRENCY_WORKERS: z.coerce.number().int().default(2),
413+
COMMON_WORKER_CONCURRENCY_TASKS_PER_WORKER: z.coerce.number().int().default(10),
414+
COMMON_WORKER_POLL_INTERVAL: z.coerce.number().int().default(1000),
415+
COMMON_WORKER_IMMEDIATE_POLL_INTERVAL: z.coerce.number().int().default(50),
416+
COMMON_WORKER_CONCURRENCY_LIMIT: z.coerce.number().int().default(100),
417+
418+
COMMON_WORKER_REDIS_HOST: z
419+
.string()
420+
.optional()
421+
.transform((v) => v ?? process.env.REDIS_HOST),
422+
COMMON_WORKER_REDIS_READER_HOST: z
423+
.string()
424+
.optional()
425+
.transform((v) => v ?? process.env.REDIS_READER_HOST),
426+
COMMON_WORKER_REDIS_READER_PORT: z.coerce
427+
.number()
428+
.optional()
429+
.transform(
430+
(v) =>
431+
v ?? (process.env.REDIS_READER_PORT ? parseInt(process.env.REDIS_READER_PORT) : undefined)
432+
),
433+
COMMON_WORKER_REDIS_PORT: z.coerce
434+
.number()
435+
.optional()
436+
.transform((v) => v ?? (process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT) : undefined)),
437+
COMMON_WORKER_REDIS_USERNAME: z
438+
.string()
439+
.optional()
440+
.transform((v) => v ?? process.env.REDIS_USERNAME),
441+
COMMON_WORKER_REDIS_PASSWORD: z
442+
.string()
443+
.optional()
444+
.transform((v) => v ?? process.env.REDIS_PASSWORD),
445+
COMMON_WORKER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
446+
COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"),
371447
});
372448

373449
export type Environment = z.infer<typeof EnvironmentSchema>;

apps/webapp/app/services/worker.server.ts

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@ import { $replica, prisma } from "~/db.server";
66
import { env } from "~/env.server";
77
import { MarqsConcurrencyMonitor } from "~/v3/marqs/concurrencyMonitor.server";
88
import { RequeueV2Message } from "~/v3/marqs/requeueV2Message.server";
9-
import { RequeueTaskRunService } from "~/v3/requeueTaskRun.server";
109
import { DeliverAlertService } from "~/v3/services/alerts/deliverAlert.server";
1110
import { PerformDeploymentAlertsService } from "~/v3/services/alerts/performDeploymentAlerts.server";
12-
import { PerformTaskAttemptAlertsService } from "~/v3/services/alerts/performTaskAttemptAlerts.server";
1311
import { PerformBulkActionService } from "~/v3/services/bulk/performBulkAction.server";
1412
import { CancelTaskAttemptDependenciesService } from "~/v3/services/cancelTaskAttemptDependencies.server";
1513
import { EnqueueDelayedRunService } from "~/v3/services/enqueueDelayedRun.server";
@@ -157,9 +155,6 @@ const workerCatalog = {
157155
"v3.performTaskRunAlerts": z.object({
158156
runId: z.string(),
159157
}),
160-
"v3.performTaskAttemptAlerts": z.object({
161-
attemptId: z.string(),
162-
}),
163158
"v3.deliverAlert": z.object({
164159
alertId: z.string(),
165160
}),
@@ -610,15 +605,6 @@ function getWorkerQueue() {
610605
return await service.call(payload.runId);
611606
},
612607
},
613-
"v3.performTaskAttemptAlerts": {
614-
priority: 0,
615-
maxAttempts: 3,
616-
handler: async (payload, job) => {
617-
const service = new PerformTaskAttemptAlertsService();
618-
619-
return await service.call(payload.attemptId);
620-
},
621-
},
622608
"v3.deliverAlert": {
623609
priority: 0,
624610
maxAttempts: 8,
@@ -658,11 +644,7 @@ function getWorkerQueue() {
658644
"v3.requeueTaskRun": {
659645
priority: 0,
660646
maxAttempts: 3,
661-
handler: async (payload, job) => {
662-
const service = new RequeueTaskRunService();
663-
664-
await service.call(payload.runId);
665-
},
647+
handler: async (payload, job) => {}, // This is now handled by redisWorker
666648
},
667649
"v3.retryAttempt": {
668650
priority: 0,
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { Worker as RedisWorker } from "@internal/redis-worker";
2+
import { Logger } from "@trigger.dev/core/logger";
3+
import { z } from "zod";
4+
import { env } from "~/env.server";
5+
import { logger } from "~/services/logger.server";
6+
import { singleton } from "~/utils/singleton";
7+
import { DeliverAlertService } from "./services/alerts/deliverAlert.server";
8+
import { PerformDeploymentAlertsService } from "./services/alerts/performDeploymentAlerts.server";
9+
import { PerformTaskRunAlertsService } from "./services/alerts/performTaskRunAlerts.server";
10+
11+
function initializeWorker() {
12+
const redisOptions = {
13+
keyPrefix: "common:worker:",
14+
host: env.COMMON_WORKER_REDIS_HOST,
15+
port: env.COMMON_WORKER_REDIS_PORT,
16+
username: env.COMMON_WORKER_REDIS_USERNAME,
17+
password: env.COMMON_WORKER_REDIS_PASSWORD,
18+
enableAutoPipelining: true,
19+
...(env.COMMON_WORKER_REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }),
20+
};
21+
22+
logger.debug(`👨‍🏭 Initializing common worker at host ${env.COMMON_WORKER_REDIS_HOST}`);
23+
24+
const worker = new RedisWorker({
25+
name: "common-worker",
26+
redisOptions,
27+
catalog: {
28+
"v3.performTaskRunAlerts": {
29+
schema: z.object({
30+
runId: z.string(),
31+
}),
32+
visibilityTimeoutMs: 60_000,
33+
retry: {
34+
maxAttempts: 3,
35+
},
36+
},
37+
"v3.performDeploymentAlerts": {
38+
schema: z.object({
39+
deploymentId: z.string(),
40+
}),
41+
visibilityTimeoutMs: 60_000,
42+
retry: {
43+
maxAttempts: 3,
44+
},
45+
},
46+
"v3.deliverAlert": {
47+
schema: z.object({
48+
alertId: z.string(),
49+
}),
50+
visibilityTimeoutMs: 60_000,
51+
retry: {
52+
maxAttempts: 3,
53+
},
54+
},
55+
},
56+
concurrency: {
57+
workers: env.COMMON_WORKER_CONCURRENCY_WORKERS,
58+
tasksPerWorker: env.COMMON_WORKER_CONCURRENCY_TASKS_PER_WORKER,
59+
limit: env.COMMON_WORKER_CONCURRENCY_LIMIT,
60+
},
61+
pollIntervalMs: env.COMMON_WORKER_POLL_INTERVAL,
62+
immediatePollIntervalMs: env.COMMON_WORKER_IMMEDIATE_POLL_INTERVAL,
63+
logger: new Logger("CommonWorker", "debug"),
64+
jobs: {
65+
"v3.deliverAlert": async ({ payload }) => {
66+
const service = new DeliverAlertService();
67+
68+
return await service.call(payload.alertId);
69+
},
70+
"v3.performDeploymentAlerts": async ({ payload }) => {
71+
const service = new PerformDeploymentAlertsService();
72+
73+
return await service.call(payload.deploymentId);
74+
},
75+
"v3.performTaskRunAlerts": async ({ payload }) => {
76+
const service = new PerformTaskRunAlertsService();
77+
return await service.call(payload.runId);
78+
},
79+
},
80+
});
81+
82+
if (env.COMMON_WORKER_ENABLED === "true") {
83+
logger.debug(
84+
`👨‍🏭 Starting common worker at host ${env.COMMON_WORKER_REDIS_HOST}, pollInterval = ${env.COMMON_WORKER_POLL_INTERVAL}, immediatePollInterval = ${env.COMMON_WORKER_IMMEDIATE_POLL_INTERVAL}, workers = ${env.COMMON_WORKER_CONCURRENCY_WORKERS}, tasksPerWorker = ${env.COMMON_WORKER_CONCURRENCY_TASKS_PER_WORKER}, concurrencyLimit = ${env.COMMON_WORKER_CONCURRENCY_LIMIT}`
85+
);
86+
87+
worker.start();
88+
}
89+
90+
return worker;
91+
}
92+
93+
export const commonWorker = singleton("commonWorker", initializeWorker);
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { Worker as RedisWorker } from "@internal/redis-worker";
2+
import { Logger } from "@trigger.dev/core/logger";
3+
import { z } from "zod";
4+
import { env } from "~/env.server";
5+
import { logger } from "~/services/logger.server";
6+
import { singleton } from "~/utils/singleton";
7+
import { TaskRunHeartbeatFailedService } from "./taskRunHeartbeatFailed.server";
8+
9+
function initializeWorker() {
10+
const redisOptions = {
11+
keyPrefix: "legacy-run-engine:worker:",
12+
host: env.LEGACY_RUN_ENGINE_WORKER_REDIS_HOST,
13+
port: env.LEGACY_RUN_ENGINE_WORKER_REDIS_PORT,
14+
username: env.LEGACY_RUN_ENGINE_WORKER_REDIS_USERNAME,
15+
password: env.LEGACY_RUN_ENGINE_WORKER_REDIS_PASSWORD,
16+
enableAutoPipelining: true,
17+
...(env.LEGACY_RUN_ENGINE_WORKER_REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }),
18+
};
19+
20+
logger.debug(
21+
`👨‍🏭 Initializing legacy run engine worker at host ${env.LEGACY_RUN_ENGINE_WORKER_REDIS_HOST}`
22+
);
23+
24+
const worker = new RedisWorker({
25+
name: "legacy-run-engine-worker",
26+
redisOptions,
27+
catalog: {
28+
runHeartbeat: {
29+
schema: z.object({
30+
runId: z.string(),
31+
}),
32+
visibilityTimeoutMs: 60_000,
33+
retry: {
34+
maxAttempts: 3,
35+
},
36+
},
37+
},
38+
concurrency: {
39+
workers: env.LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_WORKERS,
40+
tasksPerWorker: env.LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_TASKS_PER_WORKER,
41+
limit: env.LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_LIMIT,
42+
},
43+
pollIntervalMs: env.LEGACY_RUN_ENGINE_WORKER_POLL_INTERVAL,
44+
immediatePollIntervalMs: env.LEGACY_RUN_ENGINE_WORKER_IMMEDIATE_POLL_INTERVAL,
45+
logger: new Logger("LegacyRunEngineWorker", "debug"),
46+
jobs: {
47+
runHeartbeat: async ({ payload }) => {
48+
const service = new TaskRunHeartbeatFailedService();
49+
50+
await service.call(payload.runId);
51+
},
52+
},
53+
});
54+
55+
if (env.LEGACY_RUN_ENGINE_WORKER_ENABLED === "true") {
56+
logger.debug(
57+
`👨‍🏭 Starting legacy run engine worker at host ${env.LEGACY_RUN_ENGINE_WORKER_REDIS_HOST}, pollInterval = ${env.LEGACY_RUN_ENGINE_WORKER_POLL_INTERVAL}, immediatePollInterval = ${env.LEGACY_RUN_ENGINE_WORKER_IMMEDIATE_POLL_INTERVAL}, workers = ${env.LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_WORKERS}, tasksPerWorker = ${env.LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_TASKS_PER_WORKER}, concurrencyLimit = ${env.LEGACY_RUN_ENGINE_WORKER_CONCURRENCY_LIMIT}`
58+
);
59+
60+
worker.start();
61+
}
62+
63+
return worker;
64+
}
65+
66+
export const legacyRunEngineWorker = singleton("legacyRunEngineWorker", initializeWorker);

apps/webapp/app/v3/marqs/index.server.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import {
3030
MessageQueueSubscriber,
3131
VisibilityTimeoutStrategy,
3232
} from "./types";
33-
import { V3VisibilityTimeout } from "./v3VisibilityTimeout.server";
33+
import { V3LegacyRunEngineWorkerVisibilityTimeout } from "./v3VisibilityTimeout.server";
3434

3535
const KEY_PREFIX = "marqs:";
3636

@@ -1611,7 +1611,7 @@ function getMarQSClient() {
16111611
name: "marqs",
16121612
tracer: trace.getTracer("marqs"),
16131613
keysProducer,
1614-
visibilityTimeoutStrategy: new V3VisibilityTimeout(),
1614+
visibilityTimeoutStrategy: new V3LegacyRunEngineWorkerVisibilityTimeout(),
16151615
queuePriorityStrategy: new FairDequeuingStrategy({
16161616
tracer: tracer,
16171617
redis,
Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,28 @@
1-
import { RequeueTaskRunService } from "../requeueTaskRun.server";
1+
import { legacyRunEngineWorker } from "../legacyRunEngineWorker.server";
2+
import { TaskRunHeartbeatFailedService } from "../taskRunHeartbeatFailed.server";
23
import { VisibilityTimeoutStrategy } from "./types";
34

4-
export class V3VisibilityTimeout implements VisibilityTimeoutStrategy {
5+
export class V3GraphileVisibilityTimeout implements VisibilityTimeoutStrategy {
56
async heartbeat(messageId: string, timeoutInMs: number): Promise<void> {
6-
await RequeueTaskRunService.enqueue(messageId, new Date(Date.now() + timeoutInMs));
7+
await TaskRunHeartbeatFailedService.enqueue(messageId, new Date(Date.now() + timeoutInMs));
78
}
89

910
async cancelHeartbeat(messageId: string): Promise<void> {
10-
await RequeueTaskRunService.dequeue(messageId);
11+
await TaskRunHeartbeatFailedService.dequeue(messageId);
12+
}
13+
}
14+
15+
export class V3LegacyRunEngineWorkerVisibilityTimeout implements VisibilityTimeoutStrategy {
16+
async heartbeat(messageId: string, timeoutInMs: number): Promise<void> {
17+
await legacyRunEngineWorker.enqueue({
18+
id: `heartbeat:${messageId}`,
19+
job: "runHeartbeat",
20+
payload: { runId: messageId },
21+
availableAt: new Date(Date.now() + timeoutInMs),
22+
});
23+
}
24+
25+
async cancelHeartbeat(messageId: string): Promise<void> {
26+
await legacyRunEngineWorker.ack(`heartbeat:${messageId}`);
1127
}
1228
}

0 commit comments

Comments
 (0)