Skip to content

Commit 126c374

Browse files
AN-381 Enable GCR mirroring of Dockerhub (#7740)
Co-authored-by: Adam Nichols <aednichols@gmail.com>
1 parent 5392f1b commit 126c374

File tree

29 files changed

+472
-26
lines changed

29 files changed

+472
-26
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Cromwell Change Log
22

3+
## 90 Release Notes
4+
5+
### GCP Batch
6+
* Cromwell now supports automatic use of the [GAR Dockerhub mirror](https://cloud.google.com/artifact-registry/docs/pull-cached-dockerhub-images), see [ReadTheDocs](https://cromwell.readthedocs.io/en/develop/backends/GCPBatch/) for details.
7+
38
## 89 Release Notes
49

510
### Improvements

backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import cromwell.core.CallOutputs
88
import cromwell.core.JobToken.JobTokenType
99
import cromwell.core.path.Path
1010
import cromwell.core.path.PathFactory.PathBuilders
11+
import cromwell.docker.DockerMirroring
1112
import net.ceedubs.ficus.Ficus._
1213
import wom.expression.{IoFunctionSet, NoIoFunctionSet}
1314
import wom.graph.CommandCallNode
@@ -164,6 +165,11 @@ trait BackendLifecycleActorFactory extends PlatformSpecific {
164165
initializationDataOption: Option[BackendInitializationData]
165166
): List[Any] = List.empty
166167

168+
/**
169+
* Returns a DockerMirror built based on backend configuration
170+
*/
171+
val dockerMirroring: Option[DockerMirroring] = None
172+
167173
/**
168174
* Allows Cromwell to self-identify which cloud it's running on for runtime attribute purposes
169175
*/
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: hello_dockermirror_gcpbatch
2+
testFormat: workflowsuccess
3+
# needs an alt if we're going to keep Docker image cache tests which I'm not sure we are
4+
backends: [GCPBATCHDockerMirror]
5+
6+
files {
7+
workflow: hello_dockermirror_gcpbatch/hello.wdl
8+
inputs: hello_dockermirror_gcpbatch/hello.inputs
9+
}
10+
11+
metadata {
12+
workflowName: wf_hello
13+
status: Succeeded
14+
"calls.wf_hello.hello.runtimeAttributes.docker": "mirror.gcr.io/ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950"
15+
"calls.wf_hello.hello.dockerImageUsed": "mirror.gcr.io/ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950"
16+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"wf_hello.hello.addressee": "m'Lord"
3+
}
4+
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
task hello {
2+
String addressee
3+
command {
4+
echo "Hello ${addressee}!"
5+
}
6+
output {
7+
String salutation = read_string(stdout())
8+
}
9+
runtime {
10+
backend: "GCPBATCHDockerMirror"
11+
docker: "ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950"
12+
}
13+
}
14+
15+
workflow wf_hello {
16+
call hello
17+
output {
18+
hello.salutation
19+
}
20+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: hello_dockermirror_papiv2
2+
testFormat: workflowsuccess
3+
# needs an alt if we're going to keep Docker image cache tests which I'm not sure we are
4+
backends: [Papiv2DockerMirror]
5+
6+
files {
7+
workflow: hello_dockermirror_papiv2/hello.wdl
8+
inputs: hello_dockermirror_papiv2/hello.inputs
9+
}
10+
11+
metadata {
12+
workflowName: wf_hello
13+
status: Succeeded
14+
"calls.wf_hello.hello.runtimeAttributes.docker": "mirror.gcr.io/ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950"
15+
"calls.wf_hello.hello.dockerImageUsed": "mirror.gcr.io/ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950"
16+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"wf_hello.hello.addressee": "m'Lord"
3+
}
4+
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
task hello {
2+
String addressee
3+
command {
4+
echo "Hello ${addressee}!"
5+
}
6+
output {
7+
String salutation = read_string(stdout())
8+
}
9+
runtime {
10+
backend: "Papiv2DockerMirror"
11+
docker: "ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950"
12+
}
13+
}
14+
15+
workflow wf_hello {
16+
call hello
17+
output {
18+
hello.salutation
19+
}
20+
}

dockerHashing/src/main/scala/cromwell/docker/DockerImageIdentifier.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ sealed trait DockerImageIdentifier {
1111
def reference: String
1212

1313
def swapReference(newReference: String): DockerImageIdentifier
14+
def swapHost(newHost: String): DockerImageIdentifier
1415

1516
// The name of the image with a repository prefix iff a repository was explicitly specified.
1617
lazy val name = repository map { r => s"$r/$image" } getOrElse image
@@ -35,6 +36,7 @@ case class DockerImageIdentifierWithoutHash(host: Option[String],
3536
) extends DockerImageIdentifier {
3637
def withHash(hash: DockerHashResult) = DockerImageIdentifierWithHash(host, repository, image, reference, hash)
3738
override def swapReference(newReference: String) = this.copy(reference = newReference)
39+
override def swapHost(newHost: String) = this.copy(host = Option(newHost))
3840
}
3941

4042
case class DockerImageIdentifierWithHash(host: Option[String],
@@ -45,6 +47,7 @@ case class DockerImageIdentifierWithHash(host: Option[String],
4547
) extends DockerImageIdentifier {
4648
override lazy val fullName: String = s"$hostAsString$name@${hash.algorithmAndHash}"
4749
override def swapReference(newReference: String) = this.copy(reference = newReference)
50+
override def swapHost(newHost: String) = this.copy(host = Option(newHost))
4851
}
4952

5053
object DockerImageIdentifier {
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package cromwell.docker
2+
3+
import com.typesafe.config.Config
4+
import com.typesafe.scalalogging.LazyLogging
5+
import cromwell.docker.registryv2.flows.dockerhub.DockerHub
6+
import net.ceedubs.ficus.Ficus._
7+
import net.ceedubs.ficus.readers.ValueReader
8+
9+
case class DockerMirroring(mirrors: List[DockerMirror]) {
10+
def mirrorImage(image: DockerImageIdentifier): Option[DockerImageIdentifier] =
11+
mirrors.flatMap(_.mirrorImage.lift(image)).headOption
12+
}
13+
14+
object DockerMirroring {
15+
def fromConfig(config: Config): Option[DockerMirroring] = {
16+
val mirrors = config
17+
.getAs[Config]("docker-mirror")
18+
.map { mirrorConfig =>
19+
val dockerhubMirror = mirrorConfig.getAs[DockerHubMirror]("dockerhub")
20+
// Add support for additional repositories here
21+
22+
dockerhubMirror.toList
23+
}
24+
.getOrElse(List[DockerMirror]())
25+
Option.when(mirrors.nonEmpty)(DockerMirroring(mirrors))
26+
}
27+
}
28+
29+
sealed trait DockerMirror {
30+
val mirrorImage: PartialFunction[DockerImageIdentifier, DockerImageIdentifier]
31+
}
32+
33+
case class DockerHubMirror(address: String) extends DockerMirror {
34+
val mirrorImage: PartialFunction[DockerImageIdentifier, DockerImageIdentifier] = {
35+
case i if DockerHub.isValidDockerHubHost(i.host) => i.swapHost(address)
36+
}
37+
}
38+
39+
object DockerHubMirror extends LazyLogging {
40+
implicit val dockerHubMirrorOptionValueReader: ValueReader[Option[DockerHubMirror]] =
41+
(config: Config, path: String) =>
42+
config.getAs[Config](path) flatMap { dockerMirrorConfig =>
43+
val enabled = dockerMirrorConfig.as[Boolean]("enabled")
44+
val address = dockerMirrorConfig.as[Option[String]]("address").getOrElse("")
45+
if (address.isEmpty)
46+
logger.warn(
47+
"Potential misconfiguration: docker-mirror.dockerhub.enabled=true with no address provided. " +
48+
"Mirroring will be disabled."
49+
)
50+
Option.when(enabled && address.nonEmpty)(DockerHubMirror(address))
51+
}
52+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package cromwell.docker
2+
3+
import com.typesafe.config.ConfigFactory
4+
import cromwell.core.TestKitSuite
5+
import org.scalatest.flatspec.AnyFlatSpecLike
6+
import org.scalatest.matchers.should.Matchers
7+
import org.scalatest.prop.TableDrivenPropertyChecks._
8+
9+
class DockerMirroringSpec extends TestKitSuite with AnyFlatSpecLike with Matchers {
10+
11+
behavior of "DockerMirroring config parsing"
12+
13+
it should "parse empty config correctly" in {
14+
val config = ConfigFactory.parseString("")
15+
DockerMirroring.fromConfig(config) shouldBe None
16+
}
17+
18+
it should "parse dockerhub config correctly" in {
19+
val config = ConfigFactory.parseString("""
20+
|docker-mirror {
21+
| dockerhub {
22+
| enabled: true
23+
| address: "foo.bar"
24+
| }
25+
|}
26+
|""".stripMargin)
27+
DockerMirroring.fromConfig(config) shouldBe Some(DockerMirroring(mirrors = List(DockerHubMirror("foo.bar"))))
28+
}
29+
30+
it should "parse no-address dockerhub config correctly" in {
31+
val config = ConfigFactory.parseString("""
32+
|docker-mirror {
33+
| dockerhub {
34+
| enabled: true
35+
| }
36+
|}
37+
|""".stripMargin)
38+
DockerMirroring.fromConfig(config) shouldBe None
39+
}
40+
41+
it should "parse disabled config correctly" in {
42+
val config = ConfigFactory.parseString("""
43+
|docker-mirror {
44+
| dockerhub {
45+
| enabled: false
46+
| address: "foo.bar"
47+
| }
48+
|}
49+
|""".stripMargin)
50+
DockerMirroring.fromConfig(config) shouldBe None
51+
}
52+
53+
it should "parse non-dockerhub config (unsupported) correctly" in {
54+
val config = ConfigFactory.parseString("""
55+
|docker-mirror {
56+
| quay {
57+
| enabled: true
58+
| address: "foo.bar"
59+
| }
60+
|}
61+
|""".stripMargin)
62+
DockerMirroring.fromConfig(config) shouldBe None
63+
}
64+
65+
behavior of "DockerMirroring"
66+
67+
val mirroring = DockerMirroring(mirrors = List(DockerHubMirror("my.mirror.io")))
68+
69+
val dockerMirrorInputs = Table(
70+
("testName", "origImg", "mirrorResult"),
71+
("mirror a Dockerhub image", "ubuntu:latest", Some("my.mirror.io/ubuntu:latest")),
72+
("mirror a docker.io Dockerhub image",
73+
"docker.io/broadinstitute/cromwell",
74+
Some("my.mirror.io/broadinstitute/cromwell:latest")
75+
),
76+
("not mirror a GCR image", "gcr.io/broad-dsde-cromwell-dev/cromwell-drs-localizer", None)
77+
)
78+
79+
forAll(dockerMirrorInputs) { (testName, origImg, mirrorResult) =>
80+
it should testName in {
81+
val imgOpt = DockerImageIdentifier.fromString(origImg)
82+
val img = imgOpt.get
83+
val m = mirroring.mirrorImage(img)
84+
val name = m.map(_.fullName)
85+
name shouldBe mirrorResult
86+
}
87+
}
88+
}

docs/backends/GCPBatch.md

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
**Google Cloud Batch Backend (alpha)**
1+
**Google Cloud Batch Backend**
22

3-
[//]:
43
Google Cloud Batch is a fully managed service that lets you schedule, queue, and execute batch processing workloads on Google Cloud resources. Batch provisions resources and manages capacity on your behalf, allowing your batch workloads to run at scale.
54

65
This section offers detailed configuration instructions for using Cromwell with the Google Cloud Batch in all supported
@@ -9,8 +8,6 @@ authentication modes. Before reading further in this section please see the
98
and detailed instructions for the application default authentication scheme in particular.
109
The instructions below assume you have created a Google Cloud Storage bucket and a Google project enabled for the appropriate APIs.
1110

12-
*NOTE*: Google Cloud Batch is still in alpha version, this means that there could be breaking changes, be sure to review the [GCP Batch CHANGELOG](https://github.com/broadinstitute/cromwell/blob/develop/CHANGELOG.md#gcp-batch) carefully before upgrading.
13-
1411
**Configuring Authentication**
1512

1613
The `google` stanza in the Cromwell configuration file defines how to authenticate to Google. There are four different
@@ -164,6 +161,31 @@ backend {
164161
Note that as per the Google Secret Manager docs, the compute service account for the project in which the GCP Batch
165162
jobs will run will need to be assigned the `Secret Manager Secret Accessor` IAM role.
166163

164+
***Dockerhub Mirroring***
165+
166+
Cromwell supports automatic use of [GAR's Dockerhub mirror](https://cloud.google.com/artifact-registry/docs/pull-cached-dockerhub-images)
167+
in the Batch backend. When enabled, Dockerhub images will be pulled through this mirror rather than directly from Dockerhub.
168+
169+
To use, include the below `docker-mirror` config in your backend configuration:
170+
```
171+
backend {
172+
default = GCPBATCH
173+
providers {
174+
GCPBATCH {
175+
actor-factory = "cromwell.backend.google.batch.GcpBatchBackendLifecycleActorFactory"
176+
config {
177+
docker-mirror {
178+
dockerhub {
179+
enabled: true
180+
address: "mirror.gcr.io"
181+
}
182+
}
183+
}
184+
}
185+
}
186+
}
187+
```
188+
167189
**Monitoring**
168190

169191
In order to monitor metrics (CPU, Memory, Disk usage...) about the VM during Call Runtime, a workflow option can be used to specify the path to a script that will run in the background and write its output to a log file.

engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/preparation/JobPreparationActor.scala

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ class JobPreparationActor(workflowDescriptor: EngineWorkflowDescriptor,
7979
private[preparation] lazy val runtimeAttributeDefinitions = factory.runtimeAttributeDefinitions(initializationData)
8080
private[preparation] lazy val hasDockerDefinition =
8181
runtimeAttributeDefinitions.exists(_.name == DockerValidation.instance.key)
82+
private[preparation] lazy val dockerMirroring = factory.dockerMirroring
83+
private[preparation] lazy val platform = factory.platform
8284

8385
startWith(Idle, JobPreparationActorNoData)
8486

@@ -146,7 +148,7 @@ class JobPreparationActor(workflowDescriptor: EngineWorkflowDescriptor,
146148
private[preparation] def evaluateInputsAndAttributes(
147149
valueStore: ValueStore
148150
): ErrorOr[(WomEvaluatedCallInputs, Map[LocallyQualifiedName, WomValue])] = {
149-
import common.validation.ErrorOr.{ShortCircuitingFlatMap, NestedErrorOr}
151+
import common.validation.ErrorOr.{NestedErrorOr, ShortCircuitingFlatMap}
150152
for {
151153
evaluatedInputs <- ErrorOr(resolveAndEvaluateInputs(jobKey, expressionLanguageFunctions, valueStore)).flatten
152154
runtimeAttributes <- prepareRuntimeAttributes(evaluatedInputs)
@@ -331,12 +333,31 @@ class JobPreparationActor(workflowDescriptor: EngineWorkflowDescriptor,
331333
import RuntimeAttributeDefinition.{addDefaultsToAttributes, evaluateRuntimeAttributes}
332334
val curriedAddDefaultsToAttributes =
333335
addDefaultsToAttributes(runtimeAttributeDefinitions, workflowDescriptor.backendDescriptor.workflowOptions) _
336+
337+
def applyDockerMirroring(attributes: Map[LocallyQualifiedName, WomValue]): Map[LocallyQualifiedName, WomValue] = {
338+
val mirroredImageAttribute = for {
339+
mirror <- dockerMirroring
340+
origDockerStr <- attributes.get(RuntimeAttributesKeys.DockerKey).map(_.valueString)
341+
origDockerImg <- DockerImageIdentifier.fromString(origDockerStr) match {
342+
case Success(i) => Some(i)
343+
case Failure(e) =>
344+
workflowLogger.warn(s"Failed to attempt mirroring image ${origDockerStr} due to ${e.toString}")
345+
None
346+
}
347+
mirroredImage <- mirror.mirrorImage(origDockerImg)
348+
newDockerImageAttribute = (RuntimeAttributesKeys.DockerKey, WomString(mirroredImage.fullName))
349+
} yield newDockerImageAttribute
350+
351+
attributes ++ mirroredImageAttribute.toList
352+
}
353+
334354
val unevaluatedRuntimeAttributes = jobKey.call.callable.runtimeAttributes
335355
evaluateRuntimeAttributes(unevaluatedRuntimeAttributes,
336356
expressionLanguageFunctions,
337357
inputEvaluation,
338-
factory.platform
339-
) map curriedAddDefaultsToAttributes
358+
platform
359+
) map curriedAddDefaultsToAttributes map applyDockerMirroring
360+
340361
}
341362
}
342363

0 commit comments

Comments
 (0)