Skip to content

Commit e95f358

Browse files
authored
[llvm-zorg] Set up libc++ premerge testing runner sets. (#474)
Create new runner sets for libc++ premerge testing in the LLVM Github premerge testing infrastructure.
1 parent ece8298 commit e95f358

File tree

6 files changed

+227
-2
lines changed

6 files changed

+227
-2
lines changed

premerge/gke_cluster/main.tf

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,38 @@ resource "google_container_node_pool" "llvm_premerge_linux" {
6565
}
6666
}
6767

68+
resource "google_container_node_pool" "llvm_premerge_libcxx" {
69+
name = "llvm-premerge-libcxx"
70+
location = var.region
71+
cluster = google_container_cluster.llvm_premerge.name
72+
initial_node_count = 0
73+
74+
autoscaling {
75+
total_min_node_count = 0
76+
total_max_node_count = 32
77+
}
78+
79+
node_config {
80+
machine_type = var.libcxx_machine_type
81+
taint {
82+
key = "premerge-platform-libcxx"
83+
value = "linux-libcxx"
84+
effect = "NO_SCHEDULE"
85+
}
86+
labels = {
87+
"premerge-platform-libcxx" : "linux-libcxx"
88+
}
89+
disk_size_gb = 200
90+
# Terraform wants to recreate the node pool everytime whe running
91+
# terraform apply unless we explicitly set this.
92+
# TODO(boomanaiden154): Look into why terraform is doing this so we do
93+
# not need this hack.
94+
resource_labels = {
95+
"goog-gke-node-pool-provisioning-model" = "on-demand"
96+
}
97+
}
98+
}
99+
68100
resource "google_container_node_pool" "llvm_premerge_windows" {
69101
name = "llvm-premerge-windows"
70102
location = var.region

premerge/gke_cluster/variables.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ variable "windows_machine_type" {
1818
type = string
1919
}
2020

21+
variable "libcxx_machine_type" {
22+
description = "The type of machine to use for libcxx instances (linux)"
23+
type = string
24+
}
25+
2126
variable "service_node_pool_locations" {
2227
description = "The location to run the service node pool in"
2328
type = list(any)

premerge/libcxx_runners_values.yaml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
githubConfigUrl: "https://github.com/llvm"
2+
githubConfigSecret: "github-token"
3+
4+
minRunners: 0
5+
maxRunners: 32
6+
7+
runnerGroup: ${ runner_group_name }
8+
9+
template:
10+
metadata:
11+
annotations:
12+
cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
13+
spec:
14+
tolerations:
15+
- key: "premerge-platform-libcxx"
16+
operator: "Equal"
17+
value: "linux-libcxx"
18+
effect: "NoSchedule"
19+
nodeSelector:
20+
premerge-platform-libcxx: linux
21+
containers:
22+
- name: runner
23+
image: ${ runner_image }
24+
command: ["/home/gha/actions-runner/run.sh"]
25+
resources:
26+
# If we don't set the CPU request high-enough here, 2 runners might
27+
# be scheduled on the same pod, meaning 2 jobs, and they will starve
28+
# each other.
29+
#
30+
# This number should be:
31+
# - greater than number_of_cores / 2:
32+
# A value lower than that could allow the scheduler to put 2
33+
# runners on the same node. Meaning 2 jobs sharing the resources of
34+
# a single node.
35+
# - lower than number_of_cores:
36+
# Each pod has some basic services running (metrics for ex). Those
37+
# already require some amount of CPU (~0.5). This means we don't
38+
# exactly have N cores to allocate, but N - epsilon.
39+
#
40+
# We also need to request sufficient memory to not get OOM killed.
41+
requests:
42+
cpu: 28
43+
memory: "100Gi"
44+
limits:
45+
cpu: 32
46+
memory: "128Gi"
47+

premerge/main.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ module "premerge_cluster_us_central" {
4747
source = "./gke_cluster"
4848
cluster_name = "llvm-premerge-cluster-us-central"
4949
region = "us-central1-a"
50+
libcxx_machine_type = "n2d-standard-32"
5051
linux_machine_type = "n2-standard-64"
5152
windows_machine_type = "n2-standard-32"
5253
}
@@ -59,6 +60,7 @@ module "premerge_cluster_us_west" {
5960
source = "./gke_cluster"
6061
cluster_name = "llvm-premerge-cluster-us-west"
6162
region = "us-west1"
63+
libcxx_machine_type = "n2d-standard-32"
6264
linux_machine_type = "n2d-standard-64"
6365
windows_machine_type = "n2d-standard-32"
6466
service_node_pool_locations = ["us-west1-a"]

premerge/premerge_resources/main.tf

Lines changed: 125 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@ resource "kubernetes_namespace" "llvm_premerge_linux_runners" {
2323
}
2424
}
2525

26+
resource "kubernetes_namespace" "llvm_premerge_libcxx_runners" {
27+
metadata {
28+
name = "llvm-premerge-libcxx-runners"
29+
}
30+
}
31+
32+
resource "kubernetes_namespace" "llvm_premerge_libcxx_release_runners" {
33+
metadata {
34+
name = "llvm-premerge-libcxx-release-runners"
35+
}
36+
}
37+
38+
resource "kubernetes_namespace" "llvm_premerge_libcxx_next_runners" {
39+
metadata {
40+
name = "llvm-premerge-libcxx-next-runners"
41+
}
42+
}
43+
44+
resource "kubernetes_namespace" "llvm_premerge_windows_runners" {
45+
metadata {
46+
name = "llvm-premerge-windows-runners"
47+
}
48+
}
49+
2650
resource "kubernetes_secret" "linux_github_pat" {
2751
metadata {
2852
name = "github-token"
@@ -40,10 +64,55 @@ resource "kubernetes_secret" "linux_github_pat" {
4064
depends_on = [kubernetes_namespace.llvm_premerge_linux_runners]
4165
}
4266

43-
resource "kubernetes_namespace" "llvm_premerge_windows_runners" {
67+
resource "kubernetes_secret" "libcxx_github_pat" {
4468
metadata {
45-
name = "llvm-premerge-windows-runners"
69+
name = "github-token"
70+
namespace = "llvm-premerge-libcxx-runners"
71+
}
72+
73+
data = {
74+
"github_app_id" = var.github_app_id
75+
"github_app_installation_id" = var.github_app_installation_id
76+
"github_app_private_key" = var.github_app_private_key
77+
}
78+
79+
type = "Opaque"
80+
81+
depends_on = [kubernetes_namespace.llvm_premerge_libcxx_runners]
82+
}
83+
84+
resource "kubernetes_secret" "libcxx_release_github_pat" {
85+
metadata {
86+
name = "github-token"
87+
namespace = "llvm-premerge-libcxx-release-runners"
88+
}
89+
90+
data = {
91+
"github_app_id" = var.github_app_id
92+
"github_app_installation_id" = var.github_app_installation_id
93+
"github_app_private_key" = var.github_app_private_key
94+
}
95+
96+
type = "Opaque"
97+
98+
depends_on = [kubernetes_namespace.llvm_premerge_libcxx_release_runners]
99+
}
100+
101+
resource "kubernetes_secret" "libcxx_next_github_pat" {
102+
metadata {
103+
name = "github-token"
104+
namespace = "llvm-premerge-libcxx-next-runners"
105+
}
106+
107+
data = {
108+
"github_app_id" = var.github_app_id
109+
"github_app_installation_id" = var.github_app_installation_id
110+
"github_app_private_key" = var.github_app_private_key
46111
}
112+
113+
type = "Opaque"
114+
115+
depends_on = [kubernetes_namespace.llvm_premerge_libcxx_next_runners]
47116
}
48117

49118
resource "kubernetes_secret" "windows_github_pat" {
@@ -111,6 +180,60 @@ resource "helm_release" "github_actions_runner_set_windows" {
111180
]
112181
}
113182

183+
resource "helm_release" "github_actions_runner_set_libcxx" {
184+
name = "llvm-premerge-libcxx-runners"
185+
namespace = "llvm-premerge-libcxx-runners"
186+
repository = "oci://ghcr.io/actions/actions-runner-controller-charts"
187+
version = "0.11.0"
188+
chart = "gha-runner-scale-set"
189+
190+
values = [
191+
"${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_runner_image })}"
192+
]
193+
194+
depends_on = [
195+
kubernetes_namespace.llvm_premerge_libcxx_runners,
196+
helm_release.github_actions_runner_controller,
197+
kubernetes_secret.libcxx_github_pat,
198+
]
199+
}
200+
201+
resource "helm_release" "github_actions_runner_set_libcxx_release" {
202+
name = "llvm-premerge-libcxx-release-runners"
203+
namespace = "llvm-premerge-libcxx-release-runners"
204+
repository = "oci://ghcr.io/actions/actions-runner-controller-charts"
205+
version = "0.11.0"
206+
chart = "gha-runner-scale-set"
207+
208+
values = [
209+
"${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_release_runner_image })}"
210+
]
211+
212+
depends_on = [
213+
kubernetes_namespace.llvm_premerge_libcxx_release_runners,
214+
helm_release.github_actions_runner_controller,
215+
kubernetes_secret.libcxx_release_github_pat,
216+
]
217+
}
218+
219+
resource "helm_release" "github_actions_runner_set_libcxx_next" {
220+
name = "llvm-premerge-libcxx-next-runners"
221+
namespace = "llvm-premerge-libcxx-next-runners"
222+
repository = "oci://ghcr.io/actions/actions-runner-controller-charts"
223+
version = "0.11.0"
224+
chart = "gha-runner-scale-set"
225+
226+
values = [
227+
"${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_next_runner_image })}"
228+
]
229+
230+
depends_on = [
231+
kubernetes_namespace.llvm_premerge_libcxx_next_runners,
232+
helm_release.github_actions_runner_controller,
233+
kubernetes_secret.libcxx_next_github_pat,
234+
]
235+
}
236+
114237
resource "kubernetes_namespace" "grafana" {
115238
metadata {
116239
name = "grafana"

premerge/premerge_resources/variables.tf

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,19 @@ variable "externalservices_tempo_basicauth_username" {
5454
variable "runner_group_name" {
5555
type = string
5656
}
57+
58+
variable "libcxx_runner_image" {
59+
type = string
60+
default = "ghcr.io/llvm/libcxx-linux-builder:b060022103f51d8ca1dad84122ef73927c86512"
61+
}
62+
63+
variable "libcxx_release_runner_image" {
64+
type = string
65+
default = "ghcr.io/llvm/libcxx-linux-builder:d8a0709b1090350a7fe3604d8ab78c7d62f10698"
66+
}
67+
68+
# Same value as libcxx_runner_image at this time.
69+
variable "libcxx_next_runner_image" {
70+
type = string
71+
default = "ghcr.io/llvm/libcxx-linux-builder:b060022103f51d8ca1dad84122ef73927c86512"
72+
}

0 commit comments

Comments
 (0)