Skip to content

Commit db3dbb4

Browse files
committed
Introduce var machine_zones to specify multiple zones where VM instances will be created at random
1 parent 6bab8d4 commit db3dbb4

File tree

10 files changed

+45
-15
lines changed

10 files changed

+45
-15
lines changed

cloudRun.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ resource "google_cloud_run_v2_service" "autoscaler" {
3939
value = local.projectId
4040
}
4141
env {
42-
name = "ZONE"
43-
value = local.zone
42+
name = "ZONES"
43+
value = join(",", local.zones)
4444
}
4545
env {
4646
name = "TASK_QUEUE"

iam.tf

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,15 @@ resource "google_project_iam_custom_role" "read_secret_version" {
6262

6363
// ---- autoscaler-sa roles member ----
6464
resource "google_project_iam_member" "manage_vm_instances_member" {
65+
66+
count = length(local.zones)
67+
6568
project = local.projectId
6669
member = "serviceAccount:${google_service_account.autoscaler_sa.email}"
6770
role = google_project_iam_custom_role.manage_vm_instances.id
6871
condition {
69-
title = "VM instance administration with a fix prefix ${var.github_runner_prefix}"
70-
expression = "resource.name.startsWith('projects/${local.projectId}/zones/${local.zone}/instances/${var.github_runner_prefix}-')"
72+
title = "VM instance administration with a fix prefix ${var.github_runner_prefix} in zone ${local.zones[count.index]}"
73+
expression = "resource.name.startsWith('projects/${local.projectId}/zones/${local.zones[count.index]}/instances/${var.github_runner_prefix}-')"
7174
}
7275
}
7376

@@ -103,12 +106,15 @@ resource "google_project_iam_member" "create_vm_from_instance_template_member" {
103106
}
104107

105108
resource "google_project_iam_member" "create_disk_member" {
109+
110+
count = length(local.zones)
111+
106112
project = local.projectId
107113
member = "serviceAccount:${google_service_account.autoscaler_sa.email}"
108114
role = google_project_iam_custom_role.create_disk.id
109115
condition {
110-
title = "Create disk with a fix prefix ${var.github_runner_prefix}"
111-
expression = "resource.name.startsWith('projects/${local.projectId}/zones/${local.zone}/disks/${var.github_runner_prefix}-')"
116+
title = "Create disk with a fix prefix ${var.github_runner_prefix} in zone ${local.zones[count.index]}"
117+
expression = "resource.name.startsWith('projects/${local.projectId}/zones/${local.zones[count.index]}/disks/${var.github_runner_prefix}-')"
112118
}
113119
}
114120

main.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ locals {
1818
projectId = data.google_client_config.current.project
1919
projectNumber = data.google_project.current.number
2020
region = data.google_client_config.current.region
21-
zone = data.google_client_config.current.zone
21+
zones = distinct(concat(var.machine_zones, [data.google_client_config.current.zone]))
2222
runnerLabel = join(",", var.github_runner_labels)
2323
runnerLabelInstanceTemplate = length(var.github_runner_labels) == 0 ? "" : format("--no-default-labels --labels '%s'", local.runnerLabel)
2424
hasEnterprise = length(var.github_enterprise) > 0

outputs.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,8 @@ output "github_pat_secret_name" {
1313
value = google_secret_manager_secret.github_pat_token.secret_id
1414
description = "The name of the secret in gcp Secret Manager where the GitHub Fine-grained personal access token (classic) has to be saved"
1515
}
16+
17+
output "machine_zones" {
18+
value = local.zones
19+
description = "The zones where VM instances will be created in"
20+
}

runner-autoscaler/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ The scaler is configured via the following environment variables:
3232
| ROUTE_DELETE_VM | "/delete_vm" | The Cloud Run callback path invoked by Cloud Task when a VM instance should be **deleted**. The payload contains the name of the "to be deleted" VM instance. |
3333
| ROUTE_CREATE_VM | "/create_vm" | The Cloud Run callback path invoked by Cloud Task when a VM instance should be **created**. The payload contains the name of the "to be created" VM instance. |
3434
| PROJECT_ID | "" | The Google Cloud Project Id. |
35-
| ZONE | "" | The Google Cloud zone where the VM instance will be created. |
35+
| ZONES | "" *(comma separated list)* | One or multiple Google Cloud zones where the VM instances will be created in. The zone is selected at random for each instance. |
3636
| TASK_QUEUE | "" | The relative resource name of the Cloud Task queue. |
3737
| TASK_DISPATCH_TIMEOUT | "180" | The timeout in seconds for the Cloud Task callback (should be longer than it takes to create/delete a VM instance) |
3838
| CREATE_VM_DELAY | "10" | The delay in seconds to wait before the VM is created. Useful for skipping the VM creation if the workflow job is canceled by the user shortly afterwards. |

runner-autoscaler/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func main() {
6868
RouteDeleteVm: getEnvDefault("ROUTE_DELETE_VM", "/delete_vm"),
6969
RouteCreateVm: getEnvDefault("ROUTE_CREATE_VM", "/create_vm"),
7070
ProjectId: mustGetEnv("PROJECT_ID"),
71-
Zone: mustGetEnv("ZONE"),
71+
Zones: strings.Split(mustGetEnv("ZONES"), ","),
7272
TaskQueue: mustGetEnv("TASK_QUEUE"),
7373
TaskTimeout: getEnvDefaultInt64("TASK_DISPATCH_TIMEOUT", 180),
7474
InstanceTemplate: mustGetEnv("INSTANCE_TEMPLATE"),

runner-autoscaler/pkg/srv.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"crypto/hmac"
77
"crypto/sha256"
8+
"encoding/binary"
89
"encoding/hex"
910
"encoding/json"
1011
"fmt"
@@ -230,6 +231,15 @@ func CalcSigHex(secret []byte, data []byte) string {
230231
return hex.EncodeToString(sig.Sum(nil))
231232
}
232233

234+
// depending on an arbitrary input string a zone is selected
235+
// the same input string leads to the same zone
236+
func (s *Autoscaler) PickRandomZone(seed string) string {
237+
238+
hash := sha256.Sum256([]byte(seed))
239+
index := binary.BigEndian.Uint64(hash[:8]) % uint64(len(s.conf.Zones))
240+
return s.conf.Zones[index]
241+
}
242+
233243
// returns http body, "src" query, error
234244
func (s *Autoscaler) verifySignature(ctx *gin.Context) ([]byte, Source, error) {
235245

@@ -262,6 +272,7 @@ func (s *Autoscaler) verifySignature(ctx *gin.Context) ([]byte, Source, error) {
262272
}
263273
}
264274

275+
/*
265276
func (s *Autoscaler) GetInstanceState(ctx context.Context, instanceName string) (State, error) {
266277
267278
client := newComputeClient(ctx)
@@ -334,6 +345,7 @@ func (s *Autoscaler) StopInstance(ctx context.Context, instanceName string) erro
334345
}
335346
return nil
336347
}
348+
*/
337349

338350
// blocking until the instance is deleted or the deletion fails
339351
func (s *Autoscaler) DeleteInstance(ctx context.Context, instanceName string) error {
@@ -348,7 +360,7 @@ func (s *Autoscaler) DeleteInstance(ctx context.Context, instanceName string) er
348360
defer client.Close()
349361
if res, err := client.Delete(ctx, &computepb.DeleteInstanceRequest{
350362
Project: s.conf.ProjectId,
351-
Zone: s.conf.Zone,
363+
Zone: s.PickRandomZone(instanceName),
352364
Instance: instanceName,
353365
}); err != nil {
354366
if apiErr, ok := err.(*apierror.APIError); ok && apiErr.HTTPCode() == 404 {
@@ -382,14 +394,15 @@ func (s *Autoscaler) CreateInstanceFromTemplate(ctx context.Context, instanceNam
382394
computeClient := newComputeClient(ctx)
383395
defer computeClient.Close()
384396

397+
zone := s.PickRandomZone(instanceName)
385398
var machine *string = nil
386399
if machineType != nil {
387-
machine = proto.String(fmt.Sprintf("zones/%s/machineTypes/%s", s.conf.Zone, *machineType))
400+
machine = proto.String(fmt.Sprintf("zones/%s/machineTypes/%s", zone, *machineType))
388401
}
389402

390403
if res, err := computeClient.Insert(ctx, &computepb.InsertInstanceRequest{
391404
Project: s.conf.ProjectId,
392-
Zone: s.conf.Zone,
405+
Zone: zone,
393406
InstanceResource: &computepb.Instance{
394407
Name: proto.String(instanceName),
395408
MachineType: machine,
@@ -706,7 +719,7 @@ type AutoscalerConfig struct {
706719
RouteCreateVm string
707720
RouteDeleteVm string
708721
ProjectId string
709-
Zone string
722+
Zones []string
710723
TaskQueue string
711724
TaskTimeout int64
712725
InstanceTemplate string

runner-autoscaler/test/main_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func init() {
3737
RouteCreateVm: "/create",
3838
RouteDeleteVm: "/delete",
3939
ProjectId: PROJECT_ID,
40-
Zone: ZONE,
40+
Zones: []string{ZONE},
4141
TaskQueue: "projects/" + PROJECT_ID + "/locations/" + REGION + "/queues/autoscaler-callback-queue",
4242
InstanceTemplate: "projects/" + PROJECT_ID + "/global/instanceTemplates/ephemeral-github-runner",
4343
SecretVersion: "projects/" + PROJECT_ID + "/secrets/github-pat-token/versions/latest",

variables.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ variable "machine_timeout" {
5050
default = 14400 // 4 h
5151
}
5252

53+
variable "machine_zones" {
54+
type = list(string)
55+
description = "One or multiple Google Cloud zones where the VM instances will be created in. The zone is selected at random for each instance."
56+
default = []
57+
}
58+
5359
variable "autoscaler_timeout" {
5460
type = number
5561
description = "The timeout of the autoscaler in seconds. Should be greater than the time required to create/delete a VM instance."

version.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# used to tag the docker image of autoscaler. Should be incremented every time the autoscaler code is changed.
22
locals {
3-
autoscaler_version = "1.3"
3+
autoscaler_version = "1.4-snapshot"
44
}

0 commit comments

Comments
 (0)