From 48c91bb2f1291811828432d99d165b53ce128c5b Mon Sep 17 00:00:00 2001 From: Roman Hros Date: Fri, 14 Jun 2024 10:36:50 +0200 Subject: [PATCH 1/7] Add server metadata Signed-off-by: Roman Hros --- terraform/files/bin/deploy_cluster_api.sh | 3 +++ terraform/files/template/cluster-template.yaml | 2 ++ terraform/files/template/clusterctl.yaml.tmpl | 4 ++++ terraform/mgmtcluster.tf | 4 +++- terraform/variables.tf | 12 ++++++++++++ 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/terraform/files/bin/deploy_cluster_api.sh b/terraform/files/bin/deploy_cluster_api.sh index 8a573228..76bac4bc 100755 --- a/terraform/files/bin/deploy_cluster_api.sh +++ b/terraform/files/bin/deploy_cluster_api.sh @@ -31,6 +31,9 @@ clusterctl version --output yaml #MTU=`yq eval '.MTU_VALUE' ~/cluster-defaults/clusterctl.yaml` # Fix up nameserver list (trailing comma -- cosmetic) sed '/OPENSTACK_DNS_NAMESERVERS:/s@, \]"@ ]"@' -i ~/cluster-defaults/clusterctl.yaml +# Fix metadata dicts (trailing comma -- cosmetic) +sed '/OPENSTACK_CONTROL_PLANE_MACHINE_METADATA:/s@, }"@ }"@' -i ~/cluster-defaults/clusterctl.yaml +sed '/OPENSTACK_NODE_MACHINE_METADATA:/s@, }"@ }"@' -i ~/cluster-defaults/clusterctl.yaml # cp clusterctl.yaml to the right place if test "$(dotversion "$(clusterctl version -o short)")" -ge 10500; then diff --git a/terraform/files/template/cluster-template.yaml b/terraform/files/template/cluster-template.yaml index b0373d6e..37787162 100644 --- a/terraform/files/template/cluster-template.yaml +++ b/terraform/files/template/cluster-template.yaml @@ -320,6 +320,7 @@ spec: template: spec: flavor: ${OPENSTACK_CONTROL_PLANE_MACHINE_FLAVOR} + serverMetadata: ${OPENSTACK_CONTROL_PLANE_MACHINE_METADATA} serverGroupID: ${OPENSTACK_SRVGRP_CONTROLLER} image: ${OPENSTACK_IMAGE_NAME} sshKeyName: ${OPENSTACK_SSH_KEY_NAME} @@ -345,6 +346,7 @@ spec: name: ${CLUSTER_NAME}-cloud-config kind: Secret flavor: ${OPENSTACK_NODE_MACHINE_FLAVOR} + serverMetadata: ${OPENSTACK_NODE_MACHINE_METADATA} serverGroupID: ${OPENSTACK_SRVGRP_WORKER} image: ${OPENSTACK_IMAGE_NAME} sshKeyName: ${OPENSTACK_SSH_KEY_NAME} diff --git a/terraform/files/template/clusterctl.yaml.tmpl b/terraform/files/template/clusterctl.yaml.tmpl index b38bfd7e..d668133f 100644 --- a/terraform/files/template/clusterctl.yaml.tmpl +++ b/terraform/files/template/clusterctl.yaml.tmpl @@ -37,6 +37,10 @@ DEPLOY_FLUX: ${deploy_flux} # deploy metrics service DEPLOY_METRICS: ${deploy_metrics} +# OpenStack instance additional metadata +OPENSTACK_CONTROL_PLANE_MACHINE_METADATA: "{ %{ for metadata_key, metadata_value in controller_metadata ~} ${metadata_key}: '${metadata_value}', %{ endfor ~} }" +OPENSTACK_NODE_MACHINE_METADATA: "{ %{ for metadata_key, metadata_value in worker_metadata ~} ${metadata_key}: '${metadata_value}', %{ endfor ~} }" + # OpenStack flavors and machine count OPENSTACK_CONTROL_PLANE_MACHINE_FLAVOR: ${controller_flavor} CONTROL_PLANE_MACHINE_COUNT: ${controller_count} diff --git a/terraform/mgmtcluster.tf b/terraform/mgmtcluster.tf index 07ccef00..401e9916 100644 --- a/terraform/mgmtcluster.tf +++ b/terraform/mgmtcluster.tf @@ -318,6 +318,7 @@ resource "terraform_data" "mgmtcluster_bootstrap_files" { cloud_provider = var.cloud_provider, controller_count = var.controller_count, controller_flavor = var.controller_flavor, + controller_metadata = var.controller_metadata, deploy_cert_manager = var.deploy_cert_manager, deploy_cindercsi = var.deploy_cindercsi, deploy_flux = var.deploy_flux, @@ -340,7 +341,8 @@ resource "terraform_data" "mgmtcluster_bootstrap_files" { calico_version = var.calico_version, use_ovn_lb_provider = var.use_ovn_lb_provider, worker_count = var.worker_count, - worker_flavor = var.worker_flavor + worker_flavor = var.worker_flavor, + worker_metadata = var.worker_metadata }) destination = "/home/${var.ssh_username}/cluster-defaults/clusterctl.yaml" } diff --git a/terraform/variables.tf b/terraform/variables.tf index c250d61a..882c1b02 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -33,6 +33,18 @@ variable "worker_flavor" { default = "SCS-2V-4-20s" } +variable "controller_metadata" { + description = "additional metadata for instances running the k8s management nodes" + type = map(string) + default = {} +} + +variable "worker_metadata" { + description = "additional metadata for instances running the k8s worker nodes" + type = map(string) + default = {} +} + variable "availability_zone" { description = "availability zone for openstack resources" type = string From 798de7871f11f6695b020dd52776d8c6ab90068e Mon Sep 17 00:00:00 2001 From: Roman Hros Date: Fri, 14 Jun 2024 13:00:03 +0200 Subject: [PATCH 2/7] Add docs about server metadata Signed-off-by: Roman Hros --- doc/configuration.md | 2 ++ terraform/environments/environment-default.tfvars | 2 ++ 2 files changed, 4 insertions(+) diff --git a/doc/configuration.md b/doc/configuration.md index 47ccc0b8..3e0a7b71 100644 --- a/doc/configuration.md +++ b/doc/configuration.md @@ -74,6 +74,8 @@ Parameters controlling the cluster creation: | `etcd_unsafe_fs` | `ETCD_UNSAFE_FS` | SCS | `false` | Use `barrier=0` for filesystem on control nodes to avoid storage latency. Use for multi-controller clusters on slow/networked storage, otherwise not recommended. | | `testcluster_name` | (cmd line) | SCS | `testcluster` | Allows setting the default cluster name, created at bootstrap (if `controller_count` is larger than 0) | | `restrict_kubeapi` | `RESTRICT_KUBEAPI` | SCS | `[ ]` | Allows restricting access to kubernetes API by list of CIDRs. Empty list (default) means public, `[ "none" ]` means internal access only. | +| `controller_metadata` | `OPENSTACK_CONTROL_PLANE_MACHINE_METADATA` | SCS | `{ }` | Adds additional metadata for instances running the k8s management nodes | +| `worker_metadata` | `OPENSTACK_NODE_MACHINE_METADATA` | SCS | `{ }` | Adds additional metadata for instances running the k8s worker nodes | | `` | `OPENSTACK_CLUSTER_GEN` | SCS | `geno01` | Generation counter for the OpenStackClusterTemplate resource. Increase, when changing restrict_kubeapi or other OC settings | | `capo_instance_create_timeout` | `CLUSTER_API_OPENSTACK_INSTANCE_CREATE_TIMEOUT` | capo | `5` | Time to wait for an OpenStack machine to be created (in minutes) | | `containerd_registry_files` | | SCS | `{"hosts":["./files/containerd/docker.io"], "certs":[]}` | Containerd registry hosts config files, see related [docs](./usage/containter-registry-configuration.md) for details. | diff --git a/terraform/environments/environment-default.tfvars b/terraform/environments/environment-default.tfvars index f538ae3e..515ae81f 100644 --- a/terraform/environments/environment-default.tfvars +++ b/terraform/environments/environment-default.tfvars @@ -39,6 +39,8 @@ deploy_cindercsi = "" # defaults to "true", dito etcd_unsafe_fs = "" # defaults to "false", dangerous testcluster_name = "NAME" # defaults to "testcluster" restrict_kubeapi = [ "IP/20", "IP/22" ] # defaults to empty (fully open), use [ "none" ] for exclusive internal access +controller_metadata = { metadata_key = "metadata_value" } # defaults to empty dict (no additional metadata) +worker_metadata = { metadata_key = "metadata_value" } # defaults to empty dict (no additional metadata) containerd_registry_files = {"hosts":[""], "certs":[""]} # defaults to '{"hosts":["./files/containerd/docker.io"], "certs":[]}' deploy_harbor = "" # defaults to "false", "true" deploys Harbor and forces deployment of flux and potentially other services (`cert_manager`, `nginx_ingress` and `cindercsi`), see `doc/usage/harbor.md` harbor_config = {"domain_name":"", "issuer_email":"", "persistence":"", "database_size":"size", "redis_size":"size", "trivy_size":"size"} # for defaults see ../variables.tf From cf0f358df201534456429f6a7cbd8b523cd5feac Mon Sep 17 00:00:00 2001 From: Roman Hros Date: Fri, 14 Jun 2024 13:01:09 +0200 Subject: [PATCH 3/7] Add default flavor with local ssd for gx-scs control-plane nodes Signed-off-by: Roman Hros --- playbooks/templates/environment.tfvars.j2 | 6 +++++- terraform/environments/environment-gx-scs-staging.tfvars | 5 ++++- terraform/environments/environment-gx-scs.tfvars | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/playbooks/templates/environment.tfvars.j2 b/playbooks/templates/environment.tfvars.j2 index 2199a82c..f82bf623 100644 --- a/playbooks/templates/environment.tfvars.j2 +++ b/playbooks/templates/environment.tfvars.j2 @@ -6,9 +6,13 @@ availability_zone = "nova" external = "ext01" dns_nameservers = ["62.138.222.111", "62.138.222.222"] kind_flavor = "SCS-2V:4" -controller_flavor = "SCS-2V:4:20" +controller_flavor = "SCS-2V-4-20s" worker_flavor = "SCS-2V:4:20" +controller_metadata = { + ps_restart_after_maint = "true" +} + controller_count = 3 worker_count = 3 diff --git a/terraform/environments/environment-gx-scs-staging.tfvars b/terraform/environments/environment-gx-scs-staging.tfvars index 6c252fc4..79b74b65 100644 --- a/terraform/environments/environment-gx-scs-staging.tfvars +++ b/terraform/environments/environment-gx-scs-staging.tfvars @@ -5,7 +5,10 @@ cloud_provider = "gx-scs-staging" availability_zone = "nova" external = "ext01" kind_flavor = "SCS-2V:4" -controller_flavor = "SCS-8V:16:100" +controller_flavor = "SCS-4V-16-100s" worker_flavor = "SCS-8V:16:100" #image = "Ubuntu 22.04" #ssh_username = "ubuntu" +controller_metadata = { + ps_restart_after_maint = "true" +} diff --git a/terraform/environments/environment-gx-scs.tfvars b/terraform/environments/environment-gx-scs.tfvars index 2777c5f9..9af51783 100644 --- a/terraform/environments/environment-gx-scs.tfvars +++ b/terraform/environments/environment-gx-scs.tfvars @@ -4,10 +4,13 @@ cloud_provider = "gx-scs" availability_zone = "nova" external = "ext01" kind_flavor = "SCS-2V:4" -controller_flavor = "SCS-2V:4:20" +controller_flavor = "SCS-2V-4-20s" worker_flavor = "SCS-2V:4:20" #image = "Ubuntu 22.04" #ssh_username = "ubuntu" #kube_image_raw = "true" dns_nameservers = ["62.138.222.111", "62.138.222.222"] #controller_count = 0 +controller_metadata = { + ps_restart_after_maint = "true" +} From 3d67303bba047a2f540d505dfbe1f9805334f22d Mon Sep 17 00:00:00 2001 From: Roman Hros Date: Fri, 14 Jun 2024 13:50:05 +0200 Subject: [PATCH 4/7] Bump default k8s version to v1.28.11 Signed-off-by: Roman Hros --- terraform/files/bin/openstack-kube-versions.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/files/bin/openstack-kube-versions.inc b/terraform/files/bin/openstack-kube-versions.inc index 98925806..5bbad236 100644 --- a/terraform/files/bin/openstack-kube-versions.inc +++ b/terraform/files/bin/openstack-kube-versions.inc @@ -3,7 +3,7 @@ # (c) Kurt Garloff , 3/2022 # SPDX-License-Identifier: Apache-2.0 # Images from https://swift.services.a.regiocloud.tech/swift/v1/AUTH_b182637428444b9aa302bb8d5a5a418c/openstack-k8s-capi-images -k8s_versions=("v1.21.14" "v1.22.17" "v1.23.16" "v1.24.15" "v1.25.15" "v1.26.14" "v1.27.12" "v1.28.10" "v1.29.3") +k8s_versions=("v1.21.14" "v1.22.17" "v1.23.16" "v1.24.15" "v1.25.15" "v1.26.14" "v1.27.12" "v1.28.11" "v1.29.3") # OCCM, CCM-RBAC, Cinder CSI, Cinder-Snapshot (TODO: Manila CSI) occm_versions=("v1.21.1" "v1.22.2" "v1.23.4" "v1.24.6" "v1.25.6" "v1.26.4" "v1.27.3" "v1.28.2" "v1.29.0") #ccmr_versions=("" "v1.22.2" "v1.23.4" "v1.24.6" "v1.25.6" "v1.26.4" "v1.27.3" "v1.28.2" "v1.29.0") From 38c4457cc8f8e9a7d61746a3d5a754da63d28a54 Mon Sep 17 00:00:00 2001 From: Roman Hros Date: Thu, 20 Jun 2024 14:01:01 +0200 Subject: [PATCH 5/7] Run CI without anti_affinity There is not enough hosts for local ssd flavors Signed-off-by: Roman Hros --- playbooks/templates/environment.tfvars.j2 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/playbooks/templates/environment.tfvars.j2 b/playbooks/templates/environment.tfvars.j2 index f82bf623..438cd6c5 100644 --- a/playbooks/templates/environment.tfvars.j2 +++ b/playbooks/templates/environment.tfvars.j2 @@ -13,6 +13,9 @@ controller_metadata = { ps_restart_after_maint = "true" } +# FIXME: Remove when CI runs on gx-scs2 environment(3+ physical machines for local ssd flavors) +anti_affinity = false + controller_count = 3 worker_count = 3 From 053c40a908ea15e59a9f42ce90e507e580ad610d Mon Sep 17 00:00:00 2001 From: Roman Hros Date: Fri, 21 Jun 2024 09:58:47 +0200 Subject: [PATCH 6/7] Run only KaaS v2 tests Signed-off-by: Roman Hros --- playbooks/tasks/scs_compliance.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playbooks/tasks/scs_compliance.yaml b/playbooks/tasks/scs_compliance.yaml index 55f7b6fa..4f78e42c 100644 --- a/playbooks/tasks/scs_compliance.yaml +++ b/playbooks/tasks/scs_compliance.yaml @@ -31,7 +31,7 @@ ansible.builtin.shell: cmd: ". {{ python_venv_dir }}/bin/activate && - python3 {{ check_dir }}/Tests/scs-compliance-check.py {{ check_dir }}/Tests/scs-compatible-kaas.yaml -v -s KaaS_V1 -a kubeconfig={{ kubeconfig_path }}" + python3 {{ check_dir }}/Tests/scs-compliance-check.py {{ check_dir }}/Tests/scs-compatible-kaas.yaml -v -s KaaS_V1 -V v2 -a kubeconfig={{ kubeconfig_path }}" changed_when: false register: scs_compliance_results always: From b46a69a01a66873a5c769b7d26b59144ce79a080 Mon Sep 17 00:00:00 2001 From: Roman Hros Date: Fri, 21 Jun 2024 12:40:42 +0200 Subject: [PATCH 7/7] Allow the use of soft-anti-affinity for the control plane Signed-off-by: Roman Hros --- doc/configuration.md | 1 + playbooks/templates/environment.tfvars.j2 | 2 +- terraform/environments/environment-default.tfvars | 1 + terraform/files/bin/create_cluster.sh | 5 ++++- terraform/files/template/clusterctl.yaml.tmpl | 1 + terraform/mgmtcluster.tf | 1 + terraform/variables.tf | 6 ++++++ 7 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/configuration.md b/doc/configuration.md index 3e0a7b71..25f775db 100644 --- a/doc/configuration.md +++ b/doc/configuration.md @@ -67,6 +67,7 @@ Parameters controlling the cluster creation: | `` | `CONTROL_PLANE_ROOT_DISKSIZE` | SCS | `20` | *If* diskless flavors are used for control plane nodes, this is the allocated root volume disk size (in GB) | | `` | `WORKER_ROOT_DISKSIZE` | SCS | `20` | *If* diskless flavors are used for worker nodes, this is the allocated root volume disk size (in GB) | | `anti_affinity` | `OPENSTACK_ANTI_AFFINITY` | SCS | `true` | Use anti-affinity server groups to prevent k8s nodes on same host (soft for workers, hard for controllers) | +| `soft_anti_affinity_controller` | `OPENSTACK_SOFT_ANTI_AFFINITY_CONTROLLER` | SCS | `false` | Allow the use of soft-anti-affinity for the controllers (if `anti_affinity` is `true`) | | `` | `OPENSTACK_SRVGRP_CONTROLLER` | SCS | `nonono` | Autogenerated if `anti_affinity` is `true`, eliminated otherwise | | `` | `OPENSTACK_SRVGRP_WORKER` | SCS | `nonono` | Autogenerated if `anti_affinity` is `true`, eliminated otherwise | | `deploy_occm` | `DEPLOY_OCCM` | SCS | `true` | Deploy the given version of OCCM into the cluster. `true` (default) chooses the latest version matching the k8s version. You can specify `master` to chose the upstream master branch. Don't disable this. | diff --git a/playbooks/templates/environment.tfvars.j2 b/playbooks/templates/environment.tfvars.j2 index 438cd6c5..08b9ff1c 100644 --- a/playbooks/templates/environment.tfvars.j2 +++ b/playbooks/templates/environment.tfvars.j2 @@ -14,7 +14,7 @@ controller_metadata = { } # FIXME: Remove when CI runs on gx-scs2 environment(3+ physical machines for local ssd flavors) -anti_affinity = false +soft_anti_affinity_controller = true controller_count = 3 worker_count = 3 diff --git a/terraform/environments/environment-default.tfvars b/terraform/environments/environment-default.tfvars index 515ae81f..ff7168bc 100644 --- a/terraform/environments/environment-default.tfvars +++ b/terraform/environments/environment-default.tfvars @@ -27,6 +27,7 @@ node_cidr = "" # defaults to "10.8.0.0/20" service_cidr = "" # defaults to "10.96.0.0/12" pod_cidr = "" # defaults to "192.168.0.0/16" anti_affinity = "" # defaults to "true" +soft_anti_affinity_controller = "" # defaults to "false" use_cilium = "version/true/false" # defaults to "true", can also be set to "vx.y.z", also see cilium_binaries use_ovn_lb_provider = "auto/true/false" # use OVN LB if available (auto) or force (true) or never (false) deploy_nginx_ingress = "version/true/false" # defaults to "true", you can also set vX.Y.Z if you want diff --git a/terraform/files/bin/create_cluster.sh b/terraform/files/bin/create_cluster.sh index b832b8fd..cce22200 100755 --- a/terraform/files/bin/create_cluster.sh +++ b/terraform/files/bin/create_cluster.sh @@ -91,7 +91,10 @@ if test "$CONTROL_PLANE_MACHINE_COUNT" -gt 0 && grep '^ *OPENSTACK_ANTI_AFFINITY SRVGRP_CONTROLLER=$(echo "$SRVGRP" | grep "${PREFIX}-${CLUSTER_NAME}-controller" | sed 's/^\([0-9a-f\-]*\) .*$/\1/') SRVGRP_WORKER=$(echo "$SRVGRP" | grep "${PREFIX}-${CLUSTER_NAME}-worker" | sed 's/^\([0-9a-f\-]*\) .*$/\1/') if test -z "$SRVGRP_CONTROLLER"; then - SRVGRP_CONTROLLER=$(openstack --os-compute-api-version 2.15 server group create --policy anti-affinity -f value -c id ${PREFIX}-${CLUSTER_NAME}-controller) + ANTI_AFFINITY_POLICY_CONTROLLER=anti-affinity + SOFT_ANTI_AFFINITY_CONTROLLER=$(yq eval '.OPENSTACK_SOFT_ANTI_AFFINITY_CONTROLLER' $CCCFG) + if test "$SOFT_ANTI_AFFINITY_CONTROLLER" = "true"; then ANTI_AFFINITY_POLICY_CONTROLLER=soft-anti-affinity; fi + SRVGRP_CONTROLLER=$(openstack --os-compute-api-version 2.15 server group create --policy ${ANTI_AFFINITY_POLICY_CONTROLLER} -f value -c id ${PREFIX}-${CLUSTER_NAME}-controller) SRVGRP_WORKER=$(openstack --os-compute-api-version 2.15 server group create --policy soft-anti-affinity -f value -c id ${PREFIX}-${CLUSTER_NAME}-worker) fi echo "Adding server groups $SRVGRP_CONTROLLER and $SRVGRP_WORKER to $CCCFG" diff --git a/terraform/files/template/clusterctl.yaml.tmpl b/terraform/files/template/clusterctl.yaml.tmpl index d668133f..887f3285 100644 --- a/terraform/files/template/clusterctl.yaml.tmpl +++ b/terraform/files/template/clusterctl.yaml.tmpl @@ -84,6 +84,7 @@ OPENSTACK_SSH_KEY_NAME: ${prefix}-keypair # Use anti-affinity server groups OPENSTACK_ANTI_AFFINITY: ${anti_affinity} +OPENSTACK_SOFT_ANTI_AFFINITY_CONTROLLER: ${soft_anti_affinity_controller} OPENSTACK_SRVGRP_CONTROLLER: nonono OPENSTACK_SRVGRP_WORKER: nonono diff --git a/terraform/mgmtcluster.tf b/terraform/mgmtcluster.tf index 401e9916..1cef822d 100644 --- a/terraform/mgmtcluster.tf +++ b/terraform/mgmtcluster.tf @@ -313,6 +313,7 @@ resource "terraform_data" "mgmtcluster_bootstrap_files" { provisioner "file" { content = templatefile("files/template/clusterctl.yaml.tmpl", { anti_affinity = var.anti_affinity, + soft_anti_affinity_controller = var.soft_anti_affinity_controller, availability_zone = var.availability_zone, capo_instance_create_timeout = var.capo_instance_create_timeout, cloud_provider = var.cloud_provider, diff --git a/terraform/variables.tf b/terraform/variables.tf index 882c1b02..95a9436d 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -203,6 +203,12 @@ variable "anti_affinity" { default = true } +variable "soft_anti_affinity_controller" { + description = "allow the use of soft-anti-affinity for the control plane" + type = bool + default = false +} + variable "dns_nameservers" { description = "array of nameservers to be set for subnets, prefer local DNS servers if available" type = list(string)