From 356a90b39e4c6a7907714edfa36d5fd3f8b0e67e Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Wed, 26 Mar 2025 11:41:24 +0000 Subject: [PATCH 01/21] logs for optional services --- apps/supervisor/src/index.ts | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index b5d7ea36d1..a2168f7691 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -49,18 +49,31 @@ class ManagedSupervisor { const workloadApiPortExternal = env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL; if (env.POD_CLEANER_ENABLED) { + this.logger.log("[ManagedWorker] ๐Ÿงน Pod cleaner enabled", { + namespace: env.KUBERNETES_NAMESPACE, + batchSize: env.POD_CLEANER_BATCH_SIZE, + intervalMs: env.POD_CLEANER_INTERVAL_MS, + }); this.podCleaner = new PodCleaner({ namespace: env.KUBERNETES_NAMESPACE, batchSize: env.POD_CLEANER_BATCH_SIZE, intervalMs: env.POD_CLEANER_INTERVAL_MS, }); + } else { + this.logger.warn("[ManagedWorker] Pod cleaner disabled"); } if (env.FAILED_POD_HANDLER_ENABLED) { + this.logger.log("[ManagedWorker] ๐Ÿ” Failed pod handler enabled", { + namespace: env.KUBERNETES_NAMESPACE, + reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS, + }); this.failedPodHandler = new FailedPodHandler({ namespace: env.KUBERNETES_NAMESPACE, reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS, }); + } else { + this.logger.warn("[ManagedWorker] Failed pod handler disabled"); } if (this.warmStartUrl) { @@ -299,13 +312,9 @@ class ManagedSupervisor { async start() { this.logger.log("[ManagedWorker] Starting up"); - if (this.podCleaner) { - await this.podCleaner.start(); - } - - if (this.failedPodHandler) { - await this.failedPodHandler.start(); - } + // Optional services + await this.podCleaner?.start(); + await this.failedPodHandler?.start(); if (env.TRIGGER_WORKLOAD_API_ENABLED) { this.logger.log("[ManagedWorker] Workload API enabled", { @@ -319,7 +328,6 @@ class ManagedSupervisor { } await this.workerSession.start(); - await this.httpServer.start(); } @@ -327,13 +335,9 @@ class ManagedSupervisor { this.logger.log("[ManagedWorker] Shutting down"); await this.httpServer.stop(); - if (this.podCleaner) { - await this.podCleaner.stop(); - } - - if (this.failedPodHandler) { - await this.failedPodHandler.stop(); - } + // Optional services + await this.podCleaner?.stop(); + await this.failedPodHandler?.stop(); } } From a3a57d9a27a9aa27546f070acc3242300e8d1380 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Wed, 26 Mar 2025 17:53:09 +0000 Subject: [PATCH 02/21] print env vars on startup in debug mode --- apps/supervisor/src/env.ts | 3 +++ apps/supervisor/src/index.ts | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 5fe5abe21b..d7957be90b 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -63,6 +63,9 @@ const Env = z.object({ // Failed pod handler FAILED_POD_HANDLER_ENABLED: BoolEnv.default(true), FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS: z.coerce.number().int().default(1000), + + // Debug + DEBUG: BoolEnv.default(false), }); export const env = Env.parse(stdEnv); diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index a2168f7691..d69f761018 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -44,6 +44,12 @@ class ManagedSupervisor { private readonly warmStartUrl = env.TRIGGER_WARM_START_URL; constructor() { + const { TRIGGER_WORKER_TOKEN, MANAGED_WORKER_SECRET, ...envWithoutSecrets } = env; + + if (env.DEBUG) { + console.debug("[ManagedSupervisor] Starting up", { envWithoutSecrets }); + } + const workloadApiProtocol = env.TRIGGER_WORKLOAD_API_PROTOCOL; const workloadApiDomain = env.TRIGGER_WORKLOAD_API_DOMAIN; const workloadApiPortExternal = env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL; From d2dd535f720c52ae7b97e0a0216dcdce6289599d Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 27 Mar 2025 11:32:39 +0000 Subject: [PATCH 03/21] routes need to explicitly ask to keep connection alive --- packages/core/src/v3/apps/http.ts | 8 ++++---- packages/core/src/v3/serverOnly/httpServer.ts | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/packages/core/src/v3/apps/http.ts b/packages/core/src/v3/apps/http.ts index 10c02f074f..7e41f35f4a 100644 --- a/packages/core/src/v3/apps/http.ts +++ b/packages/core/src/v3/apps/http.ts @@ -45,7 +45,7 @@ export class HttpReply { constructor(private response: Parameters[1]) {} empty(status?: number) { - if (this.alreadyReplied) { + if (this.hasReplied) { return; } @@ -53,7 +53,7 @@ export class HttpReply { } text(text: string, status?: number, contentType?: string) { - if (this.alreadyReplied) { + if (this.hasReplied) { return; } @@ -63,7 +63,7 @@ export class HttpReply { } json(value: any, pretty?: boolean, status?: number) { - if (this.alreadyReplied) { + if (this.hasReplied) { return; } @@ -74,7 +74,7 @@ export class HttpReply { ); } - private get alreadyReplied() { + get hasReplied() { return this.response.headersSent; } } diff --git a/packages/core/src/v3/serverOnly/httpServer.ts b/packages/core/src/v3/serverOnly/httpServer.ts index 722fa777b3..3aa327a8bd 100644 --- a/packages/core/src/v3/serverOnly/httpServer.ts +++ b/packages/core/src/v3/serverOnly/httpServer.ts @@ -28,6 +28,7 @@ interface RouteDefinition< paramsSchema?: TParams; querySchema?: TQuery; bodySchema?: TBody; + keepConnectionAlive?: boolean; handler: RouteHandler; } @@ -156,7 +157,8 @@ export class HttpServer { return reply.empty(405); } - const { handler, paramsSchema, querySchema, bodySchema } = routeDefinition; + const { handler, paramsSchema, querySchema, bodySchema, keepConnectionAlive } = + routeDefinition; const params = this.parseRouteParams(route, url); const parsedParams = this.optionalSchema(paramsSchema, params); @@ -202,6 +204,11 @@ export class HttpServer { logger.error("Route handler error", { error }); return reply.empty(500); } + + if (keepConnectionAlive) { + // Return early to keep the connection alive + return; + } } catch (error) { logger.error("Failed to handle request", { error }); return reply.empty(500); From 29ce91ada86ac9f21c60b8534f3aeff67087ca5b Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 27 Mar 2025 15:53:10 +0000 Subject: [PATCH 04/21] log indicators for now --- packages/core/src/v3/serverOnly/k8s.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/core/src/v3/serverOnly/k8s.ts b/packages/core/src/v3/serverOnly/k8s.ts index 333166a038..4234e15c49 100644 --- a/packages/core/src/v3/serverOnly/k8s.ts +++ b/packages/core/src/v3/serverOnly/k8s.ts @@ -12,5 +12,7 @@ export function isKubernetesEnvironment(override?: boolean): boolean { env.KUBERNETES_SERVICE_PORT, ]; + console.debug("k8sIndicators", { k8sIndicators }); + return k8sIndicators.some(Boolean); } From 47f3c33f65c936271d2741020e58544f314c7365 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 27 Mar 2025 17:33:08 +0000 Subject: [PATCH 05/21] make workload api listen host configurable --- apps/supervisor/src/env.ts | 1 + apps/supervisor/src/index.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 5fe5abe21b..ab2d03e127 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -26,6 +26,7 @@ const Env = z.object({ .transform((s) => z.enum(["http", "https"]).parse(s.toLowerCase())) .default("http"), TRIGGER_WORKLOAD_API_DOMAIN: z.string().optional(), // If unset, will use orchestrator-specific default + TRIGGER_WORKLOAD_API_HOST_INTERNAL: z.string().default("0.0.0.0"), TRIGGER_WORKLOAD_API_PORT_INTERNAL: z.coerce.number().default(8020), // This is the port the workload API listens on TRIGGER_WORKLOAD_API_PORT_EXTERNAL: z.coerce.number().default(8020), // This is the exposed port passed to the run controller diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index b5d7ea36d1..b65860dd3a 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -234,6 +234,7 @@ class ManagedSupervisor { // Responds to workload requests only this.workloadServer = new WorkloadServer({ port: env.TRIGGER_WORKLOAD_API_PORT_INTERNAL, + host: env.TRIGGER_WORKLOAD_API_HOST_INTERNAL, workerClient: this.workerSession.httpClient, checkpointClient: this.checkpointClient, }); From 30077c62d7cedd1a18078af13584a63becded5a5 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 27 Mar 2025 17:34:34 +0000 Subject: [PATCH 06/21] expose supervisor metrics and make more configurable --- apps/supervisor/src/env.ts | 3 +++ apps/supervisor/src/index.ts | 27 +++++++++++++-------- apps/supervisor/src/workloadServer/index.ts | 15 +++++++++++- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index ab2d03e127..8006012429 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -54,7 +54,10 @@ const Env = z.object({ EPHEMERAL_STORAGE_SIZE_REQUEST: z.string().default("2Gi"), // Metrics + METRICS_ENABLED: BoolEnv.default(true), METRICS_COLLECT_DEFAULTS: BoolEnv.default(true), + METRICS_HOST: z.string().default("127.0.0.1"), + METRICS_PORT: z.coerce.number().int().default(9090), // Pod cleaner POD_CLEANER_ENABLED: BoolEnv.default(true), diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index b65860dd3a..29a9183787 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -30,7 +30,7 @@ if (env.METRICS_COLLECT_DEFAULTS) { class ManagedSupervisor { private readonly workerSession: SupervisorSession; - private readonly httpServer: HttpServer; + private readonly metricsServer?: HttpServer; private readonly workloadServer: WorkloadServer; private readonly workloadManager: WorkloadManager; private readonly logger = new SimpleStructuredLogger("managed-worker"); @@ -50,6 +50,7 @@ class ManagedSupervisor { if (env.POD_CLEANER_ENABLED) { this.podCleaner = new PodCleaner({ + register, namespace: env.KUBERNETES_NAMESPACE, batchSize: env.POD_CLEANER_BATCH_SIZE, intervalMs: env.POD_CLEANER_INTERVAL_MS, @@ -58,6 +59,7 @@ class ManagedSupervisor { if (env.FAILED_POD_HANDLER_ENABLED) { this.failedPodHandler = new FailedPodHandler({ + register, namespace: env.KUBERNETES_NAMESPACE, reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS, }); @@ -224,12 +226,16 @@ class ManagedSupervisor { } }); - // Used for health checks and metrics - this.httpServer = new HttpServer({ port: 8080, host: "0.0.0.0" }).route("/health", "GET", { - handler: async ({ reply }) => { - reply.text("OK"); - }, - }); + if (env.METRICS_ENABLED) { + this.metricsServer = new HttpServer({ + port: env.METRICS_PORT, + host: env.METRICS_HOST, + metrics: { + register, + expose: true, + }, + }); + } // Responds to workload requests only this.workloadServer = new WorkloadServer({ @@ -320,13 +326,12 @@ class ManagedSupervisor { } await this.workerSession.start(); - - await this.httpServer.start(); + await this.metricsServer?.start(); } async stop() { this.logger.log("[ManagedWorker] Shutting down"); - await this.httpServer.stop(); + await this.workerSession.stop(); if (this.podCleaner) { await this.podCleaner.stop(); @@ -335,6 +340,8 @@ class ManagedSupervisor { if (this.failedPodHandler) { await this.failedPodHandler.stop(); } + + await this.metricsServer?.stop(); } } diff --git a/apps/supervisor/src/workloadServer/index.ts b/apps/supervisor/src/workloadServer/index.ts index 1421d8f98d..ed90c450c3 100644 --- a/apps/supervisor/src/workloadServer/index.ts +++ b/apps/supervisor/src/workloadServer/index.ts @@ -22,6 +22,7 @@ import { } from "@trigger.dev/core/v3/workers"; import { HttpServer, type CheckpointClient } from "@trigger.dev/core/v3/serverOnly"; import { type IncomingMessage } from "node:http"; +import { register } from "../metrics.js"; // Use the official export when upgrading to socket.io@4.8.0 interface DefaultEventsMap { @@ -121,7 +122,19 @@ export class WorkloadServer extends EventEmitter { } private createHttpServer({ host, port }: { host: string; port: number }) { - return new HttpServer({ port, host }) + return new HttpServer({ + port, + host, + metrics: { + register, + expose: false, + }, + }) + .route("/health", "GET", { + handler: async ({ reply }) => { + reply.text("OK"); + }, + }) .route( "/api/v1/workload-actions/runs/:runFriendlyId/snapshots/:snapshotFriendlyId/attempts/start", "POST", From a7efef1688ca52048bbeae0657da7038b5e6f1e1 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 27 Mar 2025 22:32:30 +0000 Subject: [PATCH 07/21] configurable pull secrets, no defaults --- apps/supervisor/src/env.ts | 1 + apps/supervisor/src/index.ts | 1 + apps/supervisor/src/workloadManager/kubernetes.ts | 13 +++++-------- apps/supervisor/src/workloadManager/types.ts | 1 + 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index e0c80b005b..1b2eb52ddc 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -42,6 +42,7 @@ const Env = z.object({ DOCKER_NETWORK: z.string().default("host"), OTEL_EXPORTER_OTLP_ENDPOINT: z.string().url(), ENFORCE_MACHINE_PRESETS: z.coerce.boolean().default(false), + KUBERNETES_IMAGE_PULL_SECRETS: z.string().optional(), // csv // Used by the resource monitor OVERRIDE_CPU_TOTAL: z.coerce.number().optional(), diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index c1d41b8ab2..dde016e3bf 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -97,6 +97,7 @@ class ManagedSupervisor { workloadApiDomain, workloadApiPort: workloadApiPortExternal, warmStartUrl: this.warmStartUrl, + imagePullSecrets: env.KUBERNETES_IMAGE_PULL_SECRETS?.split(","), }); } else { this.resourceMonitor = new DockerResourceMonitor(new Docker()); diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 90977ed21b..50e9b81f47 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -182,18 +182,15 @@ export class KubernetesWorkloadManager implements WorkloadManager { } } + private getImagePullSecrets(): k8s.V1LocalObjectReference[] | undefined { + return this.opts.imagePullSecrets?.map((name) => ({ name })); + } + get #defaultPodSpec(): Omit { return { restartPolicy: "Never", automountServiceAccountToken: false, - imagePullSecrets: [ - { - name: "registry-trigger", - }, - { - name: "registry-trigger-failover", - }, - ], + imagePullSecrets: this.getImagePullSecrets(), nodeSelector: { nodetype: "worker-re2", }, diff --git a/apps/supervisor/src/workloadManager/types.ts b/apps/supervisor/src/workloadManager/types.ts index ea2046b631..ed06abc8c8 100644 --- a/apps/supervisor/src/workloadManager/types.ts +++ b/apps/supervisor/src/workloadManager/types.ts @@ -5,6 +5,7 @@ export interface WorkloadManagerOptions { workloadApiDomain?: string; // If unset, will use orchestrator-specific default workloadApiPort: number; warmStartUrl?: string; + imagePullSecrets?: string[]; } export interface WorkloadManager { From 208711f9bef93a099ed1e4eb22acdb7d53af1e28 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 08:26:49 +0100 Subject: [PATCH 08/21] remove restore route --- ...d.snapshots.$snapshotFriendlyId.restore.ts | 51 ------------------- ...d.snapshots.$snapshotFriendlyId.suspend.ts | 1 - 2 files changed, 52 deletions(-) delete mode 100644 apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.restore.ts diff --git a/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.restore.ts b/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.restore.ts deleted file mode 100644 index 8f892288e6..0000000000 --- a/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.restore.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { json, TypedResponse } from "@remix-run/server-runtime"; -import { - WorkerApiSuspendRunRequestBody, - WorkerApiSuspendRunResponseBody, -} from "@trigger.dev/core/v3/workers"; -import { z } from "zod"; -import { logger } from "~/services/logger.server"; -import { createActionWorkerApiRoute } from "~/services/routeBuilders/apiBuilder.server"; - -export const action = createActionWorkerApiRoute( - { - params: z.object({ - runFriendlyId: z.string(), - snapshotFriendlyId: z.string(), - }), - body: WorkerApiSuspendRunRequestBody, - }, - async ({ - authenticatedWorker, - params, - body, - }): Promise> => { - const { runFriendlyId, snapshotFriendlyId } = params; - - logger.debug("Restoring run", { runFriendlyId, snapshotFriendlyId, body }); - - if (!body.success) { - // TODO: we could create a debug span here - logger.error("Failed to restore run", { - runFriendlyId, - snapshotFriendlyId, - error: body.error, - }); - - return json({ ok: true }); - } - - try { - await authenticatedWorker.createCheckpoint({ - runFriendlyId, - snapshotFriendlyId, - checkpoint: body.checkpoint, - }); - - return json({ ok: true }); - } catch (error) { - logger.error("Failed to restore run", { runFriendlyId, snapshotFriendlyId, error }); - throw error; - } - } -); diff --git a/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.suspend.ts b/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.suspend.ts index 323c98405f..2d745ed94a 100644 --- a/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.suspend.ts +++ b/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.suspend.ts @@ -25,7 +25,6 @@ export const action = createActionWorkerApiRoute( logger.debug("Suspending run", { runFriendlyId, snapshotFriendlyId, body }); if (!body.success) { - // TODO: we could create a debug span here logger.error("Failed to suspend run", { runFriendlyId, snapshotFriendlyId, From 3ca58dc6a2996053a33d40f6efad5447841fe78d Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 10:43:48 +0100 Subject: [PATCH 09/21] run controller to handle queued executing --- packages/cli-v3/src/entryPoints/managed-run-controller.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli-v3/src/entryPoints/managed-run-controller.ts b/packages/cli-v3/src/entryPoints/managed-run-controller.ts index 08cf64ab03..495e85b009 100644 --- a/packages/cli-v3/src/entryPoints/managed-run-controller.ts +++ b/packages/cli-v3/src/entryPoints/managed-run-controller.ts @@ -485,6 +485,7 @@ class ManagedRunController { console.log("Run is finished, nothing to do"); return; } + case "QUEUED_EXECUTING": case "EXECUTING_WITH_WAITPOINTS": { console.log("Run is executing with waitpoints", { snapshot }); @@ -629,7 +630,6 @@ class ManagedRunController { return; } case "RUN_CREATED": - case "QUEUED_EXECUTING": case "QUEUED": { console.log("Status change not handled", { status: snapshot.executionStatus }); return; From 756973f2d5c9476b7fcf3c0cbd5a0168aeddd4b0 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 10:58:19 +0100 Subject: [PATCH 10/21] fix v3 deploys in v4 project --- .../v3/services/initializeDeployment.server.ts | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/v3/services/initializeDeployment.server.ts b/apps/webapp/app/v3/services/initializeDeployment.server.ts index c5a375ba90..e99473ca9c 100644 --- a/apps/webapp/app/v3/services/initializeDeployment.server.ts +++ b/apps/webapp/app/v3/services/initializeDeployment.server.ts @@ -18,8 +18,20 @@ export class InitializeDeploymentService extends BaseService { payload: InitializeDeploymentRequestBody ) { return this.traceWithEnv("call", environment, async (span) => { - if (payload.type !== "V1" && environment.project.engine !== "V2") { - throw new ServiceValidationError("Only V1 deployments are supported for this project"); + if (payload.type === "UNMANAGED") { + throw new ServiceValidationError("UNMANAGED deployments are not supported"); + } + + // Upgrade the project to engine "V2" if it's not already. This should cover cases where people deploy to V2 without running dev first. + if (payload.type === "MANAGED" && environment.project.engine === "V1") { + await this._prisma.project.update({ + where: { + id: environment.project.id, + }, + data: { + engine: "V2", + }, + }); } const latestDeployment = await this._prisma.workerDeployment.findFirst({ From ea07b627541f240db3492fa0b061fbb146e88b1d Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:38:55 +0100 Subject: [PATCH 11/21] update admin worker route --- apps/supervisor/README.md | 32 +++++++++++++++---- .../webapp/app/routes/admin.api.v1.workers.ts | 10 +++--- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/apps/supervisor/README.md b/apps/supervisor/README.md index 9f2f5b9e23..e3bad3dcb6 100644 --- a/apps/supervisor/README.md +++ b/apps/supervisor/README.md @@ -8,20 +8,15 @@ api_url=http://localhost:3030 wg_name=my-worker -# edit these +# edit this admin_pat=tr_pat_... -project_id=clsw6q8wz... curl -sS \ -X POST \ "$api_url/admin/api/v1/workers" \ -H "Authorization: Bearer $admin_pat" \ -H "Content-Type: application/json" \ - -d "{ - \"name\": \"$wg_name\", - \"makeDefault\": true, - \"projectId\": \"$project_id\" - }" + -d "{\"name\": \"$wg_name\"}" ``` 2. Create `.env` and set the worker token @@ -47,3 +42,26 @@ pnpm exec trigger deploy --self-hosted # The additional network flag is required on linux pnpm exec trigger deploy --self-hosted --network host ``` + +## Additional worker groups + +When adding more worker groups you might also want to make them the default for a specific project. This will allow you to test it without having to change the global default: + +```sh +api_url=http://localhost:3030 +wg_name=my-worker + +# edit these +admin_pat=tr_pat_... +project_id=clsw6q8wz... + +curl -sS \ + -X POST \ + "$api_url/admin/api/v1/workers" \ + -H "Authorization: Bearer $admin_pat" \ + -H "Content-Type: application/json" \ + -d "{ + \"name\": \"$wg_name\", + \"makeDefaultForProjectId\": \"$project_id\" + }" +``` diff --git a/apps/webapp/app/routes/admin.api.v1.workers.ts b/apps/webapp/app/routes/admin.api.v1.workers.ts index 185c9cc4d0..9299c0e2c0 100644 --- a/apps/webapp/app/routes/admin.api.v1.workers.ts +++ b/apps/webapp/app/routes/admin.api.v1.workers.ts @@ -7,8 +7,7 @@ import { WorkerGroupService } from "~/v3/services/worker/workerGroupService.serv const RequestBodySchema = z.object({ name: z.string().optional(), description: z.string().optional(), - projectId: z.string().optional(), - makeDefault: z.boolean().optional(), + makeDefaultForProjectId: z.string().optional(), }); export async function action({ request }: ActionFunctionArgs) { @@ -35,7 +34,7 @@ export async function action({ request }: ActionFunctionArgs) { try { const rawBody = await request.json(); - const { name, description, projectId, makeDefault } = RequestBodySchema.parse(rawBody ?? {}); + const { name, description, makeDefaultForProjectId } = RequestBodySchema.parse(rawBody ?? {}); const service = new WorkerGroupService(); const { workerGroup, token } = await service.createWorkerGroup({ @@ -43,14 +42,13 @@ export async function action({ request }: ActionFunctionArgs) { description, }); - if (makeDefault && projectId) { + if (makeDefaultForProjectId) { await prisma.project.update({ where: { - id: projectId, + id: makeDefaultForProjectId, }, data: { defaultWorkerGroupId: workerGroup.id, - engine: "V2", }, }); } From 356d8109d04f8aa3e91e12ca95da834fc7a82d9f Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:40:17 +0100 Subject: [PATCH 12/21] only start pod cleaner et al in k8s mode --- apps/supervisor/src/index.ts | 60 ++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index dde016e3bf..27b9998427 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -54,36 +54,6 @@ class ManagedSupervisor { const workloadApiDomain = env.TRIGGER_WORKLOAD_API_DOMAIN; const workloadApiPortExternal = env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL; - if (env.POD_CLEANER_ENABLED) { - this.logger.log("[ManagedWorker] ๐Ÿงน Pod cleaner enabled", { - namespace: env.KUBERNETES_NAMESPACE, - batchSize: env.POD_CLEANER_BATCH_SIZE, - intervalMs: env.POD_CLEANER_INTERVAL_MS, - }); - this.podCleaner = new PodCleaner({ - register, - namespace: env.KUBERNETES_NAMESPACE, - batchSize: env.POD_CLEANER_BATCH_SIZE, - intervalMs: env.POD_CLEANER_INTERVAL_MS, - }); - } else { - this.logger.warn("[ManagedWorker] Pod cleaner disabled"); - } - - if (env.FAILED_POD_HANDLER_ENABLED) { - this.logger.log("[ManagedWorker] ๐Ÿ” Failed pod handler enabled", { - namespace: env.KUBERNETES_NAMESPACE, - reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS, - }); - this.failedPodHandler = new FailedPodHandler({ - register, - namespace: env.KUBERNETES_NAMESPACE, - reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS, - }); - } else { - this.logger.warn("[ManagedWorker] Failed pod handler disabled"); - } - if (this.warmStartUrl) { this.logger.log("[ManagedWorker] ๐Ÿ”ฅ Warm starts enabled", { warmStartUrl: this.warmStartUrl, @@ -91,6 +61,36 @@ class ManagedSupervisor { } if (this.isKubernetes) { + if (env.POD_CLEANER_ENABLED) { + this.logger.log("[ManagedWorker] ๐Ÿงน Pod cleaner enabled", { + namespace: env.KUBERNETES_NAMESPACE, + batchSize: env.POD_CLEANER_BATCH_SIZE, + intervalMs: env.POD_CLEANER_INTERVAL_MS, + }); + this.podCleaner = new PodCleaner({ + register, + namespace: env.KUBERNETES_NAMESPACE, + batchSize: env.POD_CLEANER_BATCH_SIZE, + intervalMs: env.POD_CLEANER_INTERVAL_MS, + }); + } else { + this.logger.warn("[ManagedWorker] Pod cleaner disabled"); + } + + if (env.FAILED_POD_HANDLER_ENABLED) { + this.logger.log("[ManagedWorker] ๐Ÿ” Failed pod handler enabled", { + namespace: env.KUBERNETES_NAMESPACE, + reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS, + }); + this.failedPodHandler = new FailedPodHandler({ + register, + namespace: env.KUBERNETES_NAMESPACE, + reconnectIntervalMs: env.FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS, + }); + } else { + this.logger.warn("[ManagedWorker] Failed pod handler disabled"); + } + this.resourceMonitor = new KubernetesResourceMonitor(createK8sApi(), ""); this.workloadManager = new KubernetesWorkloadManager({ workloadApiProtocol, From 9d6b2ce9e77fc355c2f5d0a9c49516524f9e1cd4 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:41:08 +0100 Subject: [PATCH 13/21] set new worker group as default if none yet --- .../app/v3/services/worker/workerGroupService.server.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/webapp/app/v3/services/worker/workerGroupService.server.ts b/apps/webapp/app/v3/services/worker/workerGroupService.server.ts index 24d457a882..c654dd3bf5 100644 --- a/apps/webapp/app/v3/services/worker/workerGroupService.server.ts +++ b/apps/webapp/app/v3/services/worker/workerGroupService.server.ts @@ -47,7 +47,13 @@ export class WorkerGroupService extends WithRunEngine { }, }); - if (managedCount === 1) { + const getFlag = makeFlags(this._prisma); + const defaultWorkerInstanceGroupId = await getFlag({ + key: "defaultWorkerInstanceGroupId", + }); + + // If there's no global default yet we should set it to the new worker group + if (!defaultWorkerInstanceGroupId) { const setFlag = makeSetFlags(this._prisma); await setFlag({ key: "defaultWorkerInstanceGroupId", From bf36a539952d34b5ed477d0a25361dcceb794b05 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:00:10 +0100 Subject: [PATCH 14/21] make image ref optional --- .../migration.sql | 2 ++ internal-packages/database/prisma/schema.prisma | 2 +- packages/core/src/v3/schemas/runEngine.ts | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 internal-packages/database/prisma/migrations/20250331105838_make_checkpoint_image_ref_optional/migration.sql diff --git a/internal-packages/database/prisma/migrations/20250331105838_make_checkpoint_image_ref_optional/migration.sql b/internal-packages/database/prisma/migrations/20250331105838_make_checkpoint_image_ref_optional/migration.sql new file mode 100644 index 0000000000..b5989cc616 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20250331105838_make_checkpoint_image_ref_optional/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "TaskRunCheckpoint" ALTER COLUMN "imageRef" DROP NOT NULL; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 216ed36851..b73a9f8e1f 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -2064,7 +2064,7 @@ model TaskRunCheckpoint { type TaskRunCheckpointType location String - imageRef String + imageRef String? reason String? metadata String? diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index c45a651c1d..569c0d645d 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -161,7 +161,6 @@ export type CheckpointType = z.infer; export const CheckpointInput = z.object({ type: CheckpointType, location: z.string(), - imageRef: z.string(), reason: z.string().nullish(), }); @@ -171,6 +170,7 @@ export const TaskRunCheckpoint = CheckpointInput.merge( z.object({ id: z.string(), friendlyId: z.string(), + imageRef: z.string(), }) ); From b74d15dc75a0ee9e28b2b41770c13a06d7c7752d Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:29:06 +0100 Subject: [PATCH 15/21] checkpoint image ref is optional for output as well --- packages/core/src/v3/schemas/runEngine.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index 569c0d645d..012fe48e48 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -161,6 +161,7 @@ export type CheckpointType = z.infer; export const CheckpointInput = z.object({ type: CheckpointType, location: z.string(), + imageRef: z.string().nullish(), reason: z.string().nullish(), }); @@ -170,7 +171,6 @@ export const TaskRunCheckpoint = CheckpointInput.merge( z.object({ id: z.string(), friendlyId: z.string(), - imageRef: z.string(), }) ); From 32187879e0a044b6ae4b7fc80908c2d130238531 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:46:24 +0100 Subject: [PATCH 16/21] export feature flag const --- apps/webapp/app/v3/featureFlags.server.ts | 6 +++++- .../app/v3/services/worker/workerGroupService.server.ts | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/apps/webapp/app/v3/featureFlags.server.ts b/apps/webapp/app/v3/featureFlags.server.ts index 1cc57ed48c..6f7c3edce5 100644 --- a/apps/webapp/app/v3/featureFlags.server.ts +++ b/apps/webapp/app/v3/featureFlags.server.ts @@ -1,8 +1,12 @@ import { z } from "zod"; import { prisma, PrismaClientOrTransaction } from "~/db.server"; +export const FEATURE_FLAG = { + defaultWorkerInstanceGroupId: "defaultWorkerInstanceGroupId", +} as const; + const FeatureFlagCatalog = { - defaultWorkerInstanceGroupId: z.string(), + [FEATURE_FLAG.defaultWorkerInstanceGroupId]: z.string(), }; type FeatureFlagKey = keyof typeof FeatureFlagCatalog; diff --git a/apps/webapp/app/v3/services/worker/workerGroupService.server.ts b/apps/webapp/app/v3/services/worker/workerGroupService.server.ts index c654dd3bf5..e33c3056fe 100644 --- a/apps/webapp/app/v3/services/worker/workerGroupService.server.ts +++ b/apps/webapp/app/v3/services/worker/workerGroupService.server.ts @@ -2,7 +2,7 @@ import { WorkerInstanceGroup, WorkerInstanceGroupType } from "@trigger.dev/datab import { WithRunEngine } from "../baseService.server"; import { WorkerGroupTokenService } from "./workerGroupTokenService.server"; import { logger } from "~/services/logger.server"; -import { makeFlags, makeSetFlags } from "~/v3/featureFlags.server"; +import { FEATURE_FLAG, makeFlags, makeSetFlags } from "~/v3/featureFlags.server"; export class WorkerGroupService extends WithRunEngine { private readonly defaultNamePrefix = "worker_group"; @@ -49,14 +49,14 @@ export class WorkerGroupService extends WithRunEngine { const getFlag = makeFlags(this._prisma); const defaultWorkerInstanceGroupId = await getFlag({ - key: "defaultWorkerInstanceGroupId", + key: FEATURE_FLAG.defaultWorkerInstanceGroupId, }); // If there's no global default yet we should set it to the new worker group if (!defaultWorkerInstanceGroupId) { const setFlag = makeSetFlags(this._prisma); await setFlag({ - key: "defaultWorkerInstanceGroupId", + key: FEATURE_FLAG.defaultWorkerInstanceGroupId, value: workerGroup.id, }); } @@ -169,7 +169,7 @@ export class WorkerGroupService extends WithRunEngine { const flags = makeFlags(this._prisma); const defaultWorkerInstanceGroupId = await flags({ - key: "defaultWorkerInstanceGroupId", + key: FEATURE_FLAG.defaultWorkerInstanceGroupId, }); if (!defaultWorkerInstanceGroupId) { From b8f9dc17d5a5c36c5df9b6992fdd989a479f5137 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:47:13 +0100 Subject: [PATCH 17/21] one last image ref type fix --- packages/core/src/v3/schemas/runEngine.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index 012fe48e48..efcfe8086e 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -217,7 +217,7 @@ export const DequeueMessageCheckpoint = z.object({ id: z.string(), type: CheckpointType, location: z.string(), - imageRef: z.string(), + imageRef: z.string().nullish(), reason: z.string().nullish(), }); export type DequeueMessageCheckpoint = z.infer; From 030907819eabe5ee528fd1af0dd7d9cdc352e796 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Tue, 1 Apr 2025 08:57:22 +0100 Subject: [PATCH 18/21] make runner intervals configurable --- apps/supervisor/src/env.ts | 4 +++ apps/supervisor/src/index.ts | 31 ++++++++----------- apps/supervisor/src/workloadManager/docker.ts | 12 +++++++ .../src/workloadManager/kubernetes.ts | 16 ++++++++++ apps/supervisor/src/workloadManager/types.ts | 2 ++ 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 1b2eb52ddc..02a909c63f 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -30,6 +30,10 @@ const Env = z.object({ TRIGGER_WORKLOAD_API_PORT_INTERNAL: z.coerce.number().default(8020), // This is the port the workload API listens on TRIGGER_WORKLOAD_API_PORT_EXTERNAL: z.coerce.number().default(8020), // This is the exposed port passed to the run controller + // Runner settings + RUNNER_HEARTBEAT_INTERVAL_SECONDS: z.coerce.number().optional(), + RUNNER_SNAPSHOT_POLL_INTERVAL_SECONDS: z.coerce.number().optional(), + // Dequeue settings (provider mode) TRIGGER_DEQUEUE_ENABLED: BoolEnv.default("true"), TRIGGER_DEQUEUE_INTERVAL_MS: z.coerce.number().int().default(1000), diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index 27b9998427..edbb844533 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -2,7 +2,7 @@ import { SupervisorSession } from "@trigger.dev/core/v3/workers"; import { SimpleStructuredLogger } from "@trigger.dev/core/v3/utils/structuredLogger"; import { env } from "./env.js"; import { WorkloadServer } from "./workloadServer/index.js"; -import { type WorkloadManager } from "./workloadManager/types.js"; +import type { WorkloadManagerOptions, WorkloadManager } from "./workloadManager/types.js"; import Docker from "dockerode"; import { z } from "zod"; import { type DequeuedMessage } from "@trigger.dev/core/v3"; @@ -50,16 +50,22 @@ class ManagedSupervisor { console.debug("[ManagedSupervisor] Starting up", { envWithoutSecrets }); } - const workloadApiProtocol = env.TRIGGER_WORKLOAD_API_PROTOCOL; - const workloadApiDomain = env.TRIGGER_WORKLOAD_API_DOMAIN; - const workloadApiPortExternal = env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL; - if (this.warmStartUrl) { this.logger.log("[ManagedWorker] ๐Ÿ”ฅ Warm starts enabled", { warmStartUrl: this.warmStartUrl, }); } + const workloadManagerOptions = { + workloadApiProtocol: env.TRIGGER_WORKLOAD_API_PROTOCOL, + workloadApiDomain: env.TRIGGER_WORKLOAD_API_DOMAIN, + workloadApiPort: env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL, + warmStartUrl: this.warmStartUrl, + imagePullSecrets: env.KUBERNETES_IMAGE_PULL_SECRETS?.split(","), + heartbeatIntervalSeconds: env.RUNNER_HEARTBEAT_INTERVAL_SECONDS, + snapshotPollIntervalSeconds: env.RUNNER_SNAPSHOT_POLL_INTERVAL_SECONDS, + } satisfies WorkloadManagerOptions; + if (this.isKubernetes) { if (env.POD_CLEANER_ENABLED) { this.logger.log("[ManagedWorker] ๐Ÿงน Pod cleaner enabled", { @@ -92,21 +98,10 @@ class ManagedSupervisor { } this.resourceMonitor = new KubernetesResourceMonitor(createK8sApi(), ""); - this.workloadManager = new KubernetesWorkloadManager({ - workloadApiProtocol, - workloadApiDomain, - workloadApiPort: workloadApiPortExternal, - warmStartUrl: this.warmStartUrl, - imagePullSecrets: env.KUBERNETES_IMAGE_PULL_SECRETS?.split(","), - }); + this.workloadManager = new KubernetesWorkloadManager(workloadManagerOptions); } else { this.resourceMonitor = new DockerResourceMonitor(new Docker()); - this.workloadManager = new DockerWorkloadManager({ - workloadApiProtocol, - workloadApiDomain, - workloadApiPort: workloadApiPortExternal, - warmStartUrl: this.warmStartUrl, - }); + this.workloadManager = new DockerWorkloadManager(workloadManagerOptions); } this.workerSession = new SupervisorSession({ diff --git a/apps/supervisor/src/workloadManager/docker.ts b/apps/supervisor/src/workloadManager/docker.ts index 704e564726..7e0f835e60 100644 --- a/apps/supervisor/src/workloadManager/docker.ts +++ b/apps/supervisor/src/workloadManager/docker.ts @@ -45,6 +45,18 @@ export class DockerWorkloadManager implements WorkloadManager { runArgs.push(`--env=TRIGGER_WARM_START_URL=${this.opts.warmStartUrl}`); } + if (this.opts.heartbeatIntervalSeconds) { + runArgs.push( + `--env=TRIGGER_HEARTBEAT_INTERVAL_SECONDS=${this.opts.heartbeatIntervalSeconds}` + ); + } + + if (this.opts.snapshotPollIntervalSeconds) { + runArgs.push( + `--env=TRIGGER_SNAPSHOT_POLL_INTERVAL_SECONDS=${this.opts.snapshotPollIntervalSeconds}` + ); + } + if (env.ENFORCE_MACHINE_PRESETS) { runArgs.push(`--cpus=${opts.machine.cpu}`, `--memory=${opts.machine.memory}G`); runArgs.push(`--env=TRIGGER_MACHINE_CPU=${opts.machine.cpu}`); diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 50e9b81f47..453d9eca1c 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -134,6 +134,22 @@ export class KubernetesWorkloadManager implements WorkloadManager { ...(this.opts.warmStartUrl ? [{ name: "TRIGGER_WARM_START_URL", value: this.opts.warmStartUrl }] : []), + ...(this.opts.heartbeatIntervalSeconds + ? [ + { + name: "TRIGGER_HEARTBEAT_INTERVAL_SECONDS", + value: `${this.opts.heartbeatIntervalSeconds}`, + }, + ] + : []), + ...(this.opts.snapshotPollIntervalSeconds + ? [ + { + name: "TRIGGER_SNAPSHOT_POLL_INTERVAL_SECONDS", + value: `${this.opts.snapshotPollIntervalSeconds}`, + }, + ] + : []), ], }, ], diff --git a/apps/supervisor/src/workloadManager/types.ts b/apps/supervisor/src/workloadManager/types.ts index ed06abc8c8..e398161d79 100644 --- a/apps/supervisor/src/workloadManager/types.ts +++ b/apps/supervisor/src/workloadManager/types.ts @@ -6,6 +6,8 @@ export interface WorkloadManagerOptions { workloadApiPort: number; warmStartUrl?: string; imagePullSecrets?: string[]; + heartbeatIntervalSeconds?: number; + snapshotPollIntervalSeconds?: number; } export interface WorkloadManager { From 75c796ab413bd08bee4fbf6bdaa568a5075b6d7a Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Tue, 1 Apr 2025 09:20:59 +0100 Subject: [PATCH 19/21] ability to set arbitrary env vars on new runners --- apps/supervisor/src/env.ts | 12 +-- apps/supervisor/src/envUtil.test.ts | 80 +++++++++++++++++++ apps/supervisor/src/envUtil.ts | 39 +++++++++ apps/supervisor/src/index.ts | 1 + apps/supervisor/src/workloadManager/docker.ts | 6 ++ .../src/workloadManager/kubernetes.ts | 6 ++ apps/supervisor/src/workloadManager/types.ts | 1 + 7 files changed, 137 insertions(+), 8 deletions(-) create mode 100644 apps/supervisor/src/envUtil.test.ts create mode 100644 apps/supervisor/src/envUtil.ts diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 02a909c63f..69e2b2ddfb 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -1,14 +1,7 @@ import { randomUUID } from "crypto"; import { env as stdEnv } from "std-env"; import { z } from "zod"; - -const BoolEnv = z.preprocess((val) => { - if (typeof val !== "string") { - return val; - } - - return ["true", "1"].includes(val.toLowerCase().trim()); -}, z.boolean()); +import { AdditionalEnvVars, BoolEnv } from "./envUtil.js"; const Env = z.object({ // This will come from `spec.nodeName` in k8s @@ -75,6 +68,9 @@ const Env = z.object({ // Debug DEBUG: BoolEnv.default(false), + + // Additional environment variables (CSV format) + RUNNER_ADDITIONAL_ENV_VARS: AdditionalEnvVars, }); export const env = Env.parse(stdEnv); diff --git a/apps/supervisor/src/envUtil.test.ts b/apps/supervisor/src/envUtil.test.ts new file mode 100644 index 0000000000..c3d35758f1 --- /dev/null +++ b/apps/supervisor/src/envUtil.test.ts @@ -0,0 +1,80 @@ +import { describe, it, expect } from "vitest"; +import { BoolEnv, AdditionalEnvVars } from "./envUtil.js"; + +describe("BoolEnv", () => { + it("should parse string 'true' as true", () => { + expect(BoolEnv.parse("true")).toBe(true); + expect(BoolEnv.parse("TRUE")).toBe(true); + expect(BoolEnv.parse("True")).toBe(true); + }); + + it("should parse string '1' as true", () => { + expect(BoolEnv.parse("1")).toBe(true); + }); + + it("should parse string 'false' as false", () => { + expect(BoolEnv.parse("false")).toBe(false); + expect(BoolEnv.parse("FALSE")).toBe(false); + expect(BoolEnv.parse("False")).toBe(false); + }); + + it("should handle whitespace", () => { + expect(BoolEnv.parse(" true ")).toBe(true); + expect(BoolEnv.parse(" 1 ")).toBe(true); + }); + + it("should pass through boolean values", () => { + expect(BoolEnv.parse(true)).toBe(true); + expect(BoolEnv.parse(false)).toBe(false); + }); + + it("should return false for invalid inputs", () => { + expect(BoolEnv.parse("invalid")).toBe(false); + expect(BoolEnv.parse("")).toBe(false); + }); +}); + +describe("AdditionalEnvVars", () => { + it("should parse single key-value pair", () => { + expect(AdditionalEnvVars.parse("FOO=bar")).toEqual({ FOO: "bar" }); + }); + + it("should parse multiple key-value pairs", () => { + expect(AdditionalEnvVars.parse("FOO=bar,BAZ=qux")).toEqual({ + FOO: "bar", + BAZ: "qux", + }); + }); + + it("should handle whitespace", () => { + expect(AdditionalEnvVars.parse(" FOO = bar , BAZ = qux ")).toEqual({ + FOO: "bar", + BAZ: "qux", + }); + }); + + it("should return undefined for empty string", () => { + expect(AdditionalEnvVars.parse("")).toBeUndefined(); + }); + + it("should return undefined for invalid format", () => { + expect(AdditionalEnvVars.parse("invalid")).toBeUndefined(); + }); + + it("should skip invalid pairs but include valid ones", () => { + expect(AdditionalEnvVars.parse("FOO=bar,INVALID,BAZ=qux")).toEqual({ + FOO: "bar", + BAZ: "qux", + }); + }); + + it("should pass through undefined", () => { + expect(AdditionalEnvVars.parse(undefined)).toBeUndefined(); + }); + + it("should handle empty values", () => { + expect(AdditionalEnvVars.parse("FOO=,BAR=value")).toEqual({ + BAR: "value", + }); + }); +}); diff --git a/apps/supervisor/src/envUtil.ts b/apps/supervisor/src/envUtil.ts new file mode 100644 index 0000000000..41dd5ca22a --- /dev/null +++ b/apps/supervisor/src/envUtil.ts @@ -0,0 +1,39 @@ +import { z } from "zod"; + +export const BoolEnv = z.preprocess((val) => { + if (typeof val !== "string") { + return val; + } + + return ["true", "1"].includes(val.toLowerCase().trim()); +}, z.boolean()); + +export const AdditionalEnvVars = z.preprocess((val) => { + if (typeof val !== "string") { + return val; + } + + if (!val) { + return undefined; + } + + try { + const result = val.split(",").reduce( + (acc, pair) => { + const [key, value] = pair.split("="); + if (!key || !value) { + return acc; + } + acc[key.trim()] = value.trim(); + return acc; + }, + {} as Record + ); + + // Return undefined if no valid key-value pairs were found + return Object.keys(result).length === 0 ? undefined : result; + } catch (error) { + console.warn("Failed to parse additional env vars", { error, val }); + return undefined; + } +}, z.record(z.string(), z.string()).optional()); diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index edbb844533..3d9947e00a 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -64,6 +64,7 @@ class ManagedSupervisor { imagePullSecrets: env.KUBERNETES_IMAGE_PULL_SECRETS?.split(","), heartbeatIntervalSeconds: env.RUNNER_HEARTBEAT_INTERVAL_SECONDS, snapshotPollIntervalSeconds: env.RUNNER_SNAPSHOT_POLL_INTERVAL_SECONDS, + additionalEnvVars: env.RUNNER_ADDITIONAL_ENV_VARS, } satisfies WorkloadManagerOptions; if (this.isKubernetes) { diff --git a/apps/supervisor/src/workloadManager/docker.ts b/apps/supervisor/src/workloadManager/docker.ts index 7e0f835e60..9641e4e25a 100644 --- a/apps/supervisor/src/workloadManager/docker.ts +++ b/apps/supervisor/src/workloadManager/docker.ts @@ -57,6 +57,12 @@ export class DockerWorkloadManager implements WorkloadManager { ); } + if (this.opts.additionalEnvVars) { + Object.entries(this.opts.additionalEnvVars).forEach(([key, value]) => { + runArgs.push(`--env=${key}=${value}`); + }); + } + if (env.ENFORCE_MACHINE_PRESETS) { runArgs.push(`--cpus=${opts.machine.cpu}`, `--memory=${opts.machine.memory}G`); runArgs.push(`--env=TRIGGER_MACHINE_CPU=${opts.machine.cpu}`); diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 453d9eca1c..26e86a1809 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -150,6 +150,12 @@ export class KubernetesWorkloadManager implements WorkloadManager { }, ] : []), + ...(this.opts.additionalEnvVars + ? Object.entries(this.opts.additionalEnvVars).map(([key, value]) => ({ + name: key, + value: value, + })) + : []), ], }, ], diff --git a/apps/supervisor/src/workloadManager/types.ts b/apps/supervisor/src/workloadManager/types.ts index e398161d79..6eddef22c2 100644 --- a/apps/supervisor/src/workloadManager/types.ts +++ b/apps/supervisor/src/workloadManager/types.ts @@ -8,6 +8,7 @@ export interface WorkloadManagerOptions { imagePullSecrets?: string[]; heartbeatIntervalSeconds?: number; snapshotPollIntervalSeconds?: number; + additionalEnvVars?: Record; } export interface WorkloadManager { From 9bcc67df280889caa5a671196e45f284c1726ef2 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Tue, 1 Apr 2025 09:26:51 +0100 Subject: [PATCH 20/21] set default runtime back to node 21 --- packages/cli-v3/src/config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli-v3/src/config.ts b/packages/cli-v3/src/config.ts index 222d05312d..8cca2d1605 100644 --- a/packages/cli-v3/src/config.ts +++ b/packages/cli-v3/src/config.ts @@ -173,7 +173,7 @@ async function resolveConfig( ["run_engine_v2" as const].concat(config.compatibilityFlags ?? []) ); - const defaultRuntime: BuildRuntime = features.run_engine_v2 ? "node-22" : DEFAULT_RUNTIME; + const defaultRuntime: BuildRuntime = features.run_engine_v2 ? "node" : DEFAULT_RUNTIME; const mergedConfig = defu( { From 405f5e948e12afa42afb9eb7d60a64f74079c794 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Tue, 1 Apr 2025 09:28:58 +0100 Subject: [PATCH 21/21] move all runner env vars to the same section --- apps/supervisor/src/env.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 69e2b2ddfb..5009206a29 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -26,6 +26,7 @@ const Env = z.object({ // Runner settings RUNNER_HEARTBEAT_INTERVAL_SECONDS: z.coerce.number().optional(), RUNNER_SNAPSHOT_POLL_INTERVAL_SECONDS: z.coerce.number().optional(), + RUNNER_ADDITIONAL_ENV_VARS: AdditionalEnvVars, // optional (csv) // Dequeue settings (provider mode) TRIGGER_DEQUEUE_ENABLED: BoolEnv.default("true"), @@ -68,9 +69,6 @@ const Env = z.object({ // Debug DEBUG: BoolEnv.default(false), - - // Additional environment variables (CSV format) - RUNNER_ADDITIONAL_ENV_VARS: AdditionalEnvVars, }); export const env = Env.parse(stdEnv);