diff --git a/apps/supervisor/src/util.ts b/apps/supervisor/src/util.ts index 6e666cb1b1..ba1bc1b2fd 100644 --- a/apps/supervisor/src/util.ts +++ b/apps/supervisor/src/util.ts @@ -5,6 +5,12 @@ export function getDockerHostDomain() { return isMacOs || isWindows ? "host.docker.internal" : "localhost"; } -export function getRunnerId(runId: string) { - return `runner-${runId.replace("run_", "")}`; +export function getRunnerId(runId: string, attemptNumber?: number) { + const parts = ["runner", runId.replace("run_", "")]; + + if (attemptNumber && attemptNumber > 1) { + parts.push(`attempt-${attemptNumber}`); + } + + return parts.join("-"); } diff --git a/apps/supervisor/src/workloadManager/docker.ts b/apps/supervisor/src/workloadManager/docker.ts index 09695bc897..9e4ba29594 100644 --- a/apps/supervisor/src/workloadManager/docker.ts +++ b/apps/supervisor/src/workloadManager/docker.ts @@ -22,7 +22,7 @@ export class DockerWorkloadManager implements WorkloadManager { async create(opts: WorkloadManagerCreateOptions) { this.logger.log("[DockerWorkloadProvider] Creating container", { opts }); - const runnerId = getRunnerId(opts.runFriendlyId); + const runnerId = getRunnerId(opts.runFriendlyId, opts.nextAttemptNumber); const runArgs = [ "run", diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 8b3c48ffed..54dd95a795 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -31,7 +31,7 @@ export class KubernetesWorkloadManager implements WorkloadManager { async create(opts: WorkloadManagerCreateOptions) { this.logger.log("[KubernetesWorkloadManager] Creating container", { opts }); - const runnerId = getRunnerId(opts.runFriendlyId); + const runnerId = getRunnerId(opts.runFriendlyId, opts.nextAttemptNumber); try { await this.k8s.core.createNamespacedPod({ diff --git a/apps/webapp/app/v3/runEngineHandlers.server.ts b/apps/webapp/app/v3/runEngineHandlers.server.ts index 0663e76537..6f236cf3ed 100644 --- a/apps/webapp/app/v3/runEngineHandlers.server.ts +++ b/apps/webapp/app/v3/runEngineHandlers.server.ts @@ -326,12 +326,19 @@ export function registerRunEngineEventBusHandlers() { engine.eventBus.on("runRetryScheduled", async ({ time, run, environment, retryAt }) => { try { - await eventRepository.recordEvent(`Retry #${run.attemptNumber} delay`, { + let retryMessage = `Retry #${run.attemptNumber} delay`; + + if (run.nextMachineAfterOOM) { + retryMessage += ` after OOM`; + } + + await eventRepository.recordEvent(retryMessage, { taskSlug: run.taskIdentifier, environment, attributes: { properties: { retryAt: retryAt.toISOString(), + nextMachine: run.nextMachineAfterOOM, }, runId: run.friendlyId, style: { diff --git a/internal-packages/run-engine/src/engine/eventBus.ts b/internal-packages/run-engine/src/engine/eventBus.ts index c64d0b2c11..5662cae00c 100644 --- a/internal-packages/run-engine/src/engine/eventBus.ts +++ b/internal-packages/run-engine/src/engine/eventBus.ts @@ -85,6 +85,7 @@ export type EventBusEvents = { traceContext: Record; taskIdentifier: string; baseCostInCents: number; + nextMachineAfterOOM?: string; }; organization: { id: string; diff --git a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts index 9827d7ec1d..4aaa395821 100644 --- a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts @@ -692,6 +692,7 @@ export class RunAttemptSystem { traceContext: run.traceContext as Record, baseCostInCents: run.baseCostInCents, spanId: run.spanId, + nextMachineAfterOOM: retryResult.machine, }, organization: { id: run.runtimeEnvironment.organizationId, diff --git a/packages/cli-v3/src/deploy/buildImage.ts b/packages/cli-v3/src/deploy/buildImage.ts index e5e03c1fc5..42ff998af3 100644 --- a/packages/cli-v3/src/deploy/buildImage.ts +++ b/packages/cli-v3/src/deploy/buildImage.ts @@ -688,8 +688,7 @@ ENV TRIGGER_PROJECT_ID=\${TRIGGER_PROJECT_ID} \ TRIGGER_CONTENT_HASH=\${TRIGGER_CONTENT_HASH} \ TRIGGER_PROJECT_REF=\${TRIGGER_PROJECT_REF} \ NODE_EXTRA_CA_CERTS=\${NODE_EXTRA_CA_CERTS} \ - NODE_ENV=production \ - NODE_OPTIONS="--max_old_space_size=8192" + NODE_ENV=production # Copy the files from the install stage COPY --from=build --chown=node:node /app ./ diff --git a/packages/core/src/v3/build/flags.test.ts b/packages/core/src/v3/build/flags.test.ts index aaa3149bea..694934b7b0 100644 --- a/packages/core/src/v3/build/flags.test.ts +++ b/packages/core/src/v3/build/flags.test.ts @@ -18,6 +18,11 @@ describe("dedupFlags", () => { expect(dedupFlags("--log=info --log=warn --log=error")).toBe("--log=error"); }); + it("should treat underscores as hyphens", () => { + expect(dedupFlags("--debug_level=info")).toBe("--debug-level=info"); + expect(dedupFlags("--debug_level=info --debug-level=warn")).toBe("--debug-level=warn"); + }); + it("should handle mix of flags with and without values", () => { expect(dedupFlags("--debug=false -v --debug=true")).toBe("-v --debug=true"); expect(dedupFlags("-v --quiet -v")).toBe("--quiet -v"); diff --git a/packages/core/src/v3/build/flags.ts b/packages/core/src/v3/build/flags.ts index 88448fa730..8f6c00349d 100644 --- a/packages/core/src/v3/build/flags.ts +++ b/packages/core/src/v3/build/flags.ts @@ -27,11 +27,11 @@ export function dedupFlags(flags: string): string { .map((flag): [string, string | boolean] => { const equalIndex = flag.indexOf("="); if (equalIndex !== -1) { - const key = flag.slice(0, equalIndex); + const key = flag.slice(0, equalIndex).replace(/_/g, "-"); const value = flag.slice(equalIndex + 1); return [key, value]; } else { - return [flag, true]; + return [flag.replace(/_/g, "-"), true]; } });