From ae64ea969c0888b906104e41ee63cae88830eec0 Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Mon, 19 May 2025 09:08:21 -0400 Subject: [PATCH 1/5] use Scheduled state for vmStartTime --- .../api/request/BatchRequestExecutor.scala | 11 ++- .../batch/api/BatchRequestExecutorSpec.scala | 82 ++++++++++++++----- 2 files changed, 70 insertions(+), 23 deletions(-) diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala index 2e498e219d..4108500c1d 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala @@ -176,8 +176,15 @@ object BatchRequestExecutor { } private def getEventList(events: List[StatusEvent]): List[ExecutionEvent] = { - val startedRegex = ".*SCHEDULED to RUNNING.*".r - val endedRegex = ".*RUNNING to.*".r // can be SUCCEEDED or FAILED + // on Batch, when job transitions to SCHEDULED state it indicates that the VM is being. Users are billed for this + // startup time. Hence, the 'vmStartTime' corresponds to when the job enters the SCHEDULED state. + val startedRegex = ".*QUEUED to SCHEDULED.*".r + + // job terminal events can occur in 2 ways: + // - job transitions from a RUNNING state to either SUCCEEDED or FAILED state + // - job never enters the RUNNING state and instead transitions from SCHEDULED -> SCHEDULED_PENDING_FAILED -> FAILED + val endedRegex = ".*RUNNING to.*|.*SCHEDULED_PENDING_FAILED to FAILED.*".r + events.flatMap { e => val time = java.time.Instant .ofEpochSecond(e.getEventTime.getSeconds, e.getEventTime.getNanos.toLong) diff --git a/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala b/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala index 02a13c10ca..9dfdc9f7d5 100644 --- a/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala +++ b/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala @@ -29,25 +29,32 @@ class BatchRequestExecutorSpec with MockSugar with PrivateMethodTester { - val startStatusEvent = StatusEvent + val schedulingStatusEvent = StatusEvent .newBuilder() .setType("STATUS_CHANGED") .setEventTime(Timestamp.newBuilder().setSeconds(1).build()) - .setDescription("Job state is set from SCHEDULED to RUNNING for job...") + .setDescription("Job state is set from QUEUED to SCHEDULED for job...") .build() - val endStatusEvent = StatusEvent + val runningStatusEvent = StatusEvent .newBuilder() .setType("STATUS_CHANGED") .setEventTime(Timestamp.newBuilder().setSeconds(2).build()) - .setDescription("Job state is set from RUNNING to SOME_OTHER_STATUS for job...") + .setDescription("Job state is set from SCHEDULED to RUNNING for job...") .build() - val schedulingStatusEvent = StatusEvent + val terminalStatusEvent = StatusEvent .newBuilder() .setType("STATUS_CHANGED") .setEventTime(Timestamp.newBuilder().setSeconds(3).build()) - .setDescription("Job state is set from QUEUED to SCHEDULED for job...") + .setDescription("Job state is set from RUNNING to SOME_OTHER_STATUS for job...") + .build() + + val scheduleFailedStatusEvent = StatusEvent + .newBuilder() + .setType("STATUS_CHANGED") + .setEventTime(Timestamp.newBuilder().setSeconds(5).build()) + .setDescription("Job state is set from SCHEDULED_PENDING_FAILED to FAILED for job...") .build() val preemptionError = "Job state is set from SCHEDULED to FAILED for job projects/....Job failed due to task " + @@ -94,7 +101,7 @@ class BatchRequestExecutorSpec behavior of "BatchRequestExecutor" - it should "create a schedule status event correctly" in { + it should "create a schedule status and vmStartTime event correctly" in { val mockClient = setupBatchClient(jobState = JobStatus.State.QUEUED, events = List(schedulingStatusEvent)) val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) @@ -106,9 +113,9 @@ class BatchRequestExecutorSpec // Verify the event result.status match { case RunStatus.Initializing(events, _) => - events.length shouldBe 1 - events.map(_.name).head shouldBe "Job state is set from QUEUED to SCHEDULED for job..." - events.map(_.offsetDateTime.toString).head shouldBe "1970-01-01T00:00:03Z" + events.length shouldBe 2 + events.map(_.name) should contain allOf ("Job state is set from QUEUED to SCHEDULED for job...", "vmStartTime") + events.map(_.offsetDateTime.toString) shouldBe List("1970-01-01T00:00:01Z", "1970-01-01T00:00:01Z") case _ => fail("Expected RunStatus.Initializing with events") } } @@ -137,7 +144,7 @@ class BatchRequestExecutorSpec it should "create instantiatedVmInfo correctly" in { val mockClient = - setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(startStatusEvent, endStatusEvent)) + setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(runningStatusEvent, terminalStatusEvent)) // Create the BatchRequestExecutor val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) @@ -159,7 +166,7 @@ class BatchRequestExecutorSpec val mockClient = setupBatchClient(location = "zones/us-central1-a", jobState = JobStatus.State.RUNNING, - events = List(startStatusEvent, endStatusEvent) + events = List(runningStatusEvent, terminalStatusEvent) ) // Create the BatchRequestExecutor @@ -182,7 +189,7 @@ class BatchRequestExecutorSpec it should "create instantiatedVmInfo correctly with missing location info" in { val mockClient = - setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(startStatusEvent, endStatusEvent)) + setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(runningStatusEvent, terminalStatusEvent)) // Create the BatchRequestExecutor val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) @@ -203,7 +210,7 @@ class BatchRequestExecutorSpec it should "send vmStartTime and vmEndTime metadata info when a workflow succeeds" in { - val mockClient = setupBatchClient(events = List(startStatusEvent, endStatusEvent)) + val mockClient = setupBatchClient(events = List(schedulingStatusEvent, runningStatusEvent, terminalStatusEvent)) // Create the BatchRequestExecutor val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) @@ -216,16 +223,20 @@ class BatchRequestExecutorSpec result.status match { case RunStatus.Success(events, _) => val eventNames = events.map(_.name) - val eventTimes = events.map(_.offsetDateTime.toString) eventNames should contain allOf ("vmStartTime", "vmEndTime") - eventTimes should contain allOf ("1970-01-01T00:00:01Z", "1970-01-01T00:00:02Z") + + val vmStartTime = events.find(e => e.name == "vmStartTime").get + val vmEndTime = events.find(e => e.name == "vmEndTime").get + + vmStartTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:01Z" + vmEndTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:03Z" case _ => fail("Expected RunStatus.Success with events") } } it should "send vmStartTime and vmEndTime metadata info along with other events when a workflow fails" in { val mockClient = - setupBatchClient(jobState = JobStatus.State.FAILED, events = List(startStatusEvent, endStatusEvent)) + setupBatchClient(jobState = JobStatus.State.FAILED, events = List(schedulingStatusEvent, runningStatusEvent, terminalStatusEvent)) // Create the BatchRequestExecutor val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) @@ -238,13 +249,42 @@ class BatchRequestExecutorSpec result.status match { case RunStatus.Failed(_, events, _) => val eventNames = events.map(_.name) - val eventTimes = events.map(_.offsetDateTime.toString) - println(eventNames) eventNames should contain allOf ("vmStartTime", "vmEndTime") + + val vmStartTime = events.find(e => e.name == "vmStartTime").get + val vmEndTime = events.find(e => e.name == "vmEndTime").get + eventNames should contain allOf ("Job state is set from RUNNING to SOME_OTHER_STATUS for job...", "Job state is set from SCHEDULED to RUNNING for job...") - eventTimes should contain allOf ("1970-01-01T00:00:01Z", "1970-01-01T00:00:02Z") - case _ => fail("Expected RunStatus.Success with events") + vmStartTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:01Z" + vmEndTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:03Z" + case _ => fail("Expected RunStatus.Failed with events") } } + it should "send vmStartTime and vmEndTime metadata info along with other events when a job fails to run" in { + val mockClient = + setupBatchClient(jobState = JobStatus.State.FAILED, events = List(schedulingStatusEvent, scheduleFailedStatusEvent)) + + // Create the BatchRequestExecutor + val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) + + // testing a private method see https://www.scalatest.org/user_guide/using_PrivateMethodTester + val internalGetHandler = PrivateMethod[BatchApiResponse.StatusQueried](Symbol("internalGetHandler")) + val result = batchRequestExecutor invokePrivate internalGetHandler(mockClient, GetJobRequest.newBuilder().build()) + + // Verify the events + result.status match { + case RunStatus.Failed(_, events, _) => + val eventNames = events.map(_.name) + eventNames should contain allOf ("vmStartTime", "vmEndTime") + + val vmStartTime = events.find(e => e.name == "vmStartTime").get + val vmEndTime = events.find(e => e.name == "vmEndTime").get + + eventNames should contain allOf ("Job state is set from SCHEDULED_PENDING_FAILED to FAILED for job...", "Job state is set from QUEUED to SCHEDULED for job...") + vmStartTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:01Z" + vmEndTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:05Z" + case _ => fail("Expected RunStatus.Failed with events") + } + } } From 9564b07d579a1f4a762c00d3fb7353c66b62bf0e Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Mon, 19 May 2025 09:18:27 -0400 Subject: [PATCH 2/5] comments --- CHANGELOG.md | 1 + .../backend/google/batch/api/request/BatchRequestExecutor.scala | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 621858de03..104c14e4b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### GCP Batch * Cromwell now supports automatic use of the [GAR Dockerhub mirror](https://cloud.google.com/artifact-registry/docs/pull-cached-dockerhub-images), see [ReadTheDocs](https://cromwell.readthedocs.io/en/develop/backends/GCPBatch/) for details. + * VM initialization time in now included in estimated cost calculation for jobs. ## 89 Release Notes diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala index 4108500c1d..ddaffbde6e 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala @@ -176,7 +176,7 @@ object BatchRequestExecutor { } private def getEventList(events: List[StatusEvent]): List[ExecutionEvent] = { - // on Batch, when job transitions to SCHEDULED state it indicates that the VM is being. Users are billed for this + // on Batch, when job transitions to SCHEDULED state it indicates that the VM is being initialized. Users are billed for this // startup time. Hence, the 'vmStartTime' corresponds to when the job enters the SCHEDULED state. val startedRegex = ".*QUEUED to SCHEDULED.*".r From 5c2389b959bec6f896e279dc6743ff1f236e7ad3 Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Mon, 19 May 2025 09:34:26 -0400 Subject: [PATCH 3/5] scalafmt --- .../google/batch/api/BatchRequestExecutorSpec.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala b/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala index 9dfdc9f7d5..66c1f39429 100644 --- a/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala +++ b/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala @@ -236,7 +236,9 @@ class BatchRequestExecutorSpec it should "send vmStartTime and vmEndTime metadata info along with other events when a workflow fails" in { val mockClient = - setupBatchClient(jobState = JobStatus.State.FAILED, events = List(schedulingStatusEvent, runningStatusEvent, terminalStatusEvent)) + setupBatchClient(jobState = JobStatus.State.FAILED, + events = List(schedulingStatusEvent, runningStatusEvent, terminalStatusEvent) + ) // Create the BatchRequestExecutor val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) @@ -263,7 +265,9 @@ class BatchRequestExecutorSpec it should "send vmStartTime and vmEndTime metadata info along with other events when a job fails to run" in { val mockClient = - setupBatchClient(jobState = JobStatus.State.FAILED, events = List(schedulingStatusEvent, scheduleFailedStatusEvent)) + setupBatchClient(jobState = JobStatus.State.FAILED, + events = List(schedulingStatusEvent, scheduleFailedStatusEvent) + ) // Create the BatchRequestExecutor val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build()) From 83cf25de0a2f77d1f5d984f661f598c9f0c50cec Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Mon, 19 May 2025 16:30:36 -0400 Subject: [PATCH 4/5] update cost expectations --- .../standardTestCases/recursive_imports_cost_batch.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centaur/src/main/resources/standardTestCases/recursive_imports_cost_batch.test b/centaur/src/main/resources/standardTestCases/recursive_imports_cost_batch.test index dd856036f4..6118f0be41 100644 --- a/centaur/src/main/resources/standardTestCases/recursive_imports_cost_batch.test +++ b/centaur/src/main/resources/standardTestCases/recursive_imports_cost_batch.test @@ -20,4 +20,4 @@ metadata { status: Succeeded } -cost: [0.0011, 0.0023] +cost: [0.0024, 0.0071] From d3d91e07f0e421633bc054e92ae770ed84c384a0 Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Mon, 19 May 2025 17:09:28 -0400 Subject: [PATCH 5/5] PR feedback - update regex --- .../backend/google/batch/api/request/BatchRequestExecutor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala index ddaffbde6e..3451c11c6a 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala @@ -178,7 +178,7 @@ object BatchRequestExecutor { private def getEventList(events: List[StatusEvent]): List[ExecutionEvent] = { // on Batch, when job transitions to SCHEDULED state it indicates that the VM is being initialized. Users are billed for this // startup time. Hence, the 'vmStartTime' corresponds to when the job enters the SCHEDULED state. - val startedRegex = ".*QUEUED to SCHEDULED.*".r + val startedRegex = ".*to SCHEDULED.*".r // job terminal events can occur in 2 ways: // - job transitions from a RUNNING state to either SUCCEEDED or FAILED state