Skip to content

Commit 8f96cfa

Browse files
authored
[AN-557] Include VM initialization time in cost calculation for Batch jobs (#7744)
1 parent f14ddaf commit 8f96cfa

File tree

4 files changed

+76
-24
lines changed

4 files changed

+76
-24
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### GCP Batch
66
* Cromwell now supports automatic use of the [GAR Dockerhub mirror](https://cloud.google.com/artifact-registry/docs/pull-cached-dockerhub-images), see [ReadTheDocs](https://cromwell.readthedocs.io/en/develop/backends/GCPBatch/) for details.
7+
* VM initialization time in now included in estimated cost calculation for jobs.
78

89
## 89 Release Notes
910

centaur/src/main/resources/standardTestCases/recursive_imports_cost_batch.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ metadata {
2020
status: Succeeded
2121
}
2222

23-
cost: [0.0011, 0.0023]
23+
cost: [0.0024, 0.0071]

supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/request/BatchRequestExecutor.scala

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,15 @@ object BatchRequestExecutor {
176176
}
177177

178178
private def getEventList(events: List[StatusEvent]): List[ExecutionEvent] = {
179-
val startedRegex = ".*SCHEDULED to RUNNING.*".r
180-
val endedRegex = ".*RUNNING to.*".r // can be SUCCEEDED or FAILED
179+
// on Batch, when job transitions to SCHEDULED state it indicates that the VM is being initialized. Users are billed for this
180+
// startup time. Hence, the 'vmStartTime' corresponds to when the job enters the SCHEDULED state.
181+
val startedRegex = ".*to SCHEDULED.*".r
182+
183+
// job terminal events can occur in 2 ways:
184+
// - job transitions from a RUNNING state to either SUCCEEDED or FAILED state
185+
// - job never enters the RUNNING state and instead transitions from SCHEDULED -> SCHEDULED_PENDING_FAILED -> FAILED
186+
val endedRegex = ".*RUNNING to.*|.*SCHEDULED_PENDING_FAILED to FAILED.*".r
187+
181188
events.flatMap { e =>
182189
val time = java.time.Instant
183190
.ofEpochSecond(e.getEventTime.getSeconds, e.getEventTime.getNanos.toLong)

supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/api/BatchRequestExecutorSpec.scala

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,32 @@ class BatchRequestExecutorSpec
2929
with MockSugar
3030
with PrivateMethodTester {
3131

32-
val startStatusEvent = StatusEvent
32+
val schedulingStatusEvent = StatusEvent
3333
.newBuilder()
3434
.setType("STATUS_CHANGED")
3535
.setEventTime(Timestamp.newBuilder().setSeconds(1).build())
36-
.setDescription("Job state is set from SCHEDULED to RUNNING for job...")
36+
.setDescription("Job state is set from QUEUED to SCHEDULED for job...")
3737
.build()
3838

39-
val endStatusEvent = StatusEvent
39+
val runningStatusEvent = StatusEvent
4040
.newBuilder()
4141
.setType("STATUS_CHANGED")
4242
.setEventTime(Timestamp.newBuilder().setSeconds(2).build())
43-
.setDescription("Job state is set from RUNNING to SOME_OTHER_STATUS for job...")
43+
.setDescription("Job state is set from SCHEDULED to RUNNING for job...")
4444
.build()
4545

46-
val schedulingStatusEvent = StatusEvent
46+
val terminalStatusEvent = StatusEvent
4747
.newBuilder()
4848
.setType("STATUS_CHANGED")
4949
.setEventTime(Timestamp.newBuilder().setSeconds(3).build())
50-
.setDescription("Job state is set from QUEUED to SCHEDULED for job...")
50+
.setDescription("Job state is set from RUNNING to SOME_OTHER_STATUS for job...")
51+
.build()
52+
53+
val scheduleFailedStatusEvent = StatusEvent
54+
.newBuilder()
55+
.setType("STATUS_CHANGED")
56+
.setEventTime(Timestamp.newBuilder().setSeconds(5).build())
57+
.setDescription("Job state is set from SCHEDULED_PENDING_FAILED to FAILED for job...")
5158
.build()
5259

5360
val preemptionError = "Job state is set from SCHEDULED to FAILED for job projects/....Job failed due to task " +
@@ -94,7 +101,7 @@ class BatchRequestExecutorSpec
94101

95102
behavior of "BatchRequestExecutor"
96103

97-
it should "create a schedule status event correctly" in {
104+
it should "create a schedule status and vmStartTime event correctly" in {
98105

99106
val mockClient = setupBatchClient(jobState = JobStatus.State.QUEUED, events = List(schedulingStatusEvent))
100107
val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build())
@@ -106,9 +113,9 @@ class BatchRequestExecutorSpec
106113
// Verify the event
107114
result.status match {
108115
case RunStatus.Initializing(events, _) =>
109-
events.length shouldBe 1
110-
events.map(_.name).head shouldBe "Job state is set from QUEUED to SCHEDULED for job..."
111-
events.map(_.offsetDateTime.toString).head shouldBe "1970-01-01T00:00:03Z"
116+
events.length shouldBe 2
117+
events.map(_.name) should contain allOf ("Job state is set from QUEUED to SCHEDULED for job...", "vmStartTime")
118+
events.map(_.offsetDateTime.toString) shouldBe List("1970-01-01T00:00:01Z", "1970-01-01T00:00:01Z")
112119
case _ => fail("Expected RunStatus.Initializing with events")
113120
}
114121
}
@@ -137,7 +144,7 @@ class BatchRequestExecutorSpec
137144
it should "create instantiatedVmInfo correctly" in {
138145

139146
val mockClient =
140-
setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(startStatusEvent, endStatusEvent))
147+
setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(runningStatusEvent, terminalStatusEvent))
141148
// Create the BatchRequestExecutor
142149
val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build())
143150

@@ -159,7 +166,7 @@ class BatchRequestExecutorSpec
159166

160167
val mockClient = setupBatchClient(location = "zones/us-central1-a",
161168
jobState = JobStatus.State.RUNNING,
162-
events = List(startStatusEvent, endStatusEvent)
169+
events = List(runningStatusEvent, terminalStatusEvent)
163170
)
164171

165172
// Create the BatchRequestExecutor
@@ -182,7 +189,7 @@ class BatchRequestExecutorSpec
182189
it should "create instantiatedVmInfo correctly with missing location info" in {
183190

184191
val mockClient =
185-
setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(startStatusEvent, endStatusEvent))
192+
setupBatchClient(jobState = JobStatus.State.RUNNING, events = List(runningStatusEvent, terminalStatusEvent))
186193

187194
// Create the BatchRequestExecutor
188195
val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build())
@@ -203,7 +210,7 @@ class BatchRequestExecutorSpec
203210

204211
it should "send vmStartTime and vmEndTime metadata info when a workflow succeeds" in {
205212

206-
val mockClient = setupBatchClient(events = List(startStatusEvent, endStatusEvent))
213+
val mockClient = setupBatchClient(events = List(schedulingStatusEvent, runningStatusEvent, terminalStatusEvent))
207214

208215
// Create the BatchRequestExecutor
209216
val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build())
@@ -216,16 +223,22 @@ class BatchRequestExecutorSpec
216223
result.status match {
217224
case RunStatus.Success(events, _) =>
218225
val eventNames = events.map(_.name)
219-
val eventTimes = events.map(_.offsetDateTime.toString)
220226
eventNames should contain allOf ("vmStartTime", "vmEndTime")
221-
eventTimes should contain allOf ("1970-01-01T00:00:01Z", "1970-01-01T00:00:02Z")
227+
228+
val vmStartTime = events.find(e => e.name == "vmStartTime").get
229+
val vmEndTime = events.find(e => e.name == "vmEndTime").get
230+
231+
vmStartTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:01Z"
232+
vmEndTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:03Z"
222233
case _ => fail("Expected RunStatus.Success with events")
223234
}
224235
}
225236

226237
it should "send vmStartTime and vmEndTime metadata info along with other events when a workflow fails" in {
227238
val mockClient =
228-
setupBatchClient(jobState = JobStatus.State.FAILED, events = List(startStatusEvent, endStatusEvent))
239+
setupBatchClient(jobState = JobStatus.State.FAILED,
240+
events = List(schedulingStatusEvent, runningStatusEvent, terminalStatusEvent)
241+
)
229242

230243
// Create the BatchRequestExecutor
231244
val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build())
@@ -238,13 +251,44 @@ class BatchRequestExecutorSpec
238251
result.status match {
239252
case RunStatus.Failed(_, events, _) =>
240253
val eventNames = events.map(_.name)
241-
val eventTimes = events.map(_.offsetDateTime.toString)
242-
println(eventNames)
243254
eventNames should contain allOf ("vmStartTime", "vmEndTime")
255+
256+
val vmStartTime = events.find(e => e.name == "vmStartTime").get
257+
val vmEndTime = events.find(e => e.name == "vmEndTime").get
258+
244259
eventNames should contain allOf ("Job state is set from RUNNING to SOME_OTHER_STATUS for job...", "Job state is set from SCHEDULED to RUNNING for job...")
245-
eventTimes should contain allOf ("1970-01-01T00:00:01Z", "1970-01-01T00:00:02Z")
246-
case _ => fail("Expected RunStatus.Success with events")
260+
vmStartTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:01Z"
261+
vmEndTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:03Z"
262+
case _ => fail("Expected RunStatus.Failed with events")
247263
}
248264
}
249265

266+
it should "send vmStartTime and vmEndTime metadata info along with other events when a job fails to run" in {
267+
val mockClient =
268+
setupBatchClient(jobState = JobStatus.State.FAILED,
269+
events = List(schedulingStatusEvent, scheduleFailedStatusEvent)
270+
)
271+
272+
// Create the BatchRequestExecutor
273+
val batchRequestExecutor = new BatchRequestExecutor.CloudImpl(BatchServiceSettings.newBuilder().build())
274+
275+
// testing a private method see https://www.scalatest.org/user_guide/using_PrivateMethodTester
276+
val internalGetHandler = PrivateMethod[BatchApiResponse.StatusQueried](Symbol("internalGetHandler"))
277+
val result = batchRequestExecutor invokePrivate internalGetHandler(mockClient, GetJobRequest.newBuilder().build())
278+
279+
// Verify the events
280+
result.status match {
281+
case RunStatus.Failed(_, events, _) =>
282+
val eventNames = events.map(_.name)
283+
eventNames should contain allOf ("vmStartTime", "vmEndTime")
284+
285+
val vmStartTime = events.find(e => e.name == "vmStartTime").get
286+
val vmEndTime = events.find(e => e.name == "vmEndTime").get
287+
288+
eventNames should contain allOf ("Job state is set from SCHEDULED_PENDING_FAILED to FAILED for job...", "Job state is set from QUEUED to SCHEDULED for job...")
289+
vmStartTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:01Z"
290+
vmEndTime.offsetDateTime.toString shouldBe "1970-01-01T00:00:05Z"
291+
case _ => fail("Expected RunStatus.Failed with events")
292+
}
293+
}
250294
}

0 commit comments

Comments
 (0)