From 2d728f507eae5093893df44fe54a2fc33ce7bf1f Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Mon, 6 Oct 2025 11:31:58 +0100 Subject: [PATCH 1/2] export state directory to ondemand nodes for caas --- ansible/roles/zenith_proxy/tasks/main.yml | 2 ++ .../.caas/inventory/group_vars/all/nfs.yml | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ansible/roles/zenith_proxy/tasks/main.yml b/ansible/roles/zenith_proxy/tasks/main.yml index 7a4c03413..360e77d54 100644 --- a/ansible/roles/zenith_proxy/tasks/main.yml +++ b/ansible/roles/zenith_proxy/tasks/main.yml @@ -61,6 +61,8 @@ group: "{{ zenith_proxy_podman_user }}" mode: "0755" become: true + delegate_to: "{{ groups['control'] | first }}" + run_once: true - name: Initialise Zenith client # Use a foreground command rather than the podman_container module as I could not diff --git a/environments/.caas/inventory/group_vars/all/nfs.yml b/environments/.caas/inventory/group_vars/all/nfs.yml index 0eca0c836..63c54d4fa 100644 --- a/environments/.caas/inventory/group_vars/all/nfs.yml +++ b/environments/.caas/inventory/group_vars/all/nfs.yml @@ -5,8 +5,18 @@ caas_nfs_home: - comment: Export /exports/home from Slurm control node as /home nfs_enable: server: "{{ inventory_hostname in groups['control'] }}" - clients: "{{ inventory_hostname in groups['cluster'] }}" + clients: "{{ inventory_hostname in groups['cluster'] and inventory_hostname not in groups['control'] }}" nfs_export: "/exports/home" # assumes default site TF is being used nfs_client_mnt_point: "/home" + nfs_export_options: "rw,secure,root_squash" -nfs_configurations: "{{ caas_nfs_home if not cluster_home_manila_share | bool else [] }}" +caas_ood_zenith_state_dir: + - comment: Export /var/lib/state from Slurm control node + nfs_enable: + server: "{{ inventory_hostname in groups['control'] }}" + clients: "{{ inventory_hostname in groups['openondemand'] }}" + nfs_export: "/var/lib/state" + nfs_client_mnt_point: "/var/lib/state" + nfs_export_options: "rw,secure,root_squash" + +nfs_configurations: "{{ caas_ood_zenith_state_dir + ( caas_nfs_home if not cluster_home_manila_share | bool else [] ) }}" From 5b775d7030c1c4b26fbed57cc91e26b88be6f4b4 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Mon, 6 Oct 2025 16:48:12 +0100 Subject: [PATCH 2/2] Move OOD zenith state to volume + add migration path for existing clusters --- ansible/roles/cluster_infra/defaults/main.yml | 2 ++ .../cluster_infra/templates/resources.tf.j2 | 19 ++++++++++++ ansible/roles/zenith_proxy/defaults/main.yml | 2 ++ ansible/roles/zenith_proxy/tasks/main.yml | 6 ++-- .../roles/zenith_proxy/tasks/migrate-ood.yml | 29 +++++++++++++++++++ .../zenith_proxy/templates/client.service.j2 | 2 +- environments/.caas/hooks/post.yml | 10 +++++++ .../inventory/group_vars/all/cluster.yml | 1 + 8 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 ansible/roles/zenith_proxy/tasks/migrate-ood.yml diff --git a/ansible/roles/cluster_infra/defaults/main.yml b/ansible/roles/cluster_infra/defaults/main.yml index 3b1f6c798..1543c5c69 100644 --- a/ansible/roles/cluster_infra/defaults/main.yml +++ b/ansible/roles/cluster_infra/defaults/main.yml @@ -1,3 +1,5 @@ --- ansible_init_collections: [] ansible_init_playbooks: [] + +cluster_infra_ood_zenith_volume_size: 1 diff --git a/ansible/roles/cluster_infra/templates/resources.tf.j2 b/ansible/roles/cluster_infra/templates/resources.tf.j2 index f46192c1f..dece24f6d 100644 --- a/ansible/roles/cluster_infra/templates/resources.tf.j2 +++ b/ansible/roles/cluster_infra/templates/resources.tf.j2 @@ -120,6 +120,12 @@ resource "openstack_blockstorage_volume_v3" "home" { } {% endif %} +resource "openstack_blockstorage_volume_v3" "ood_zenith" { + name = "{{ cluster_name }}-ood-zenith" + description = "State for Open OnDemand Zenith client" + size = "{{ cluster_infra_ood_zenith_volume_size }}" +} + ###### ###### Cluster network ###### @@ -419,6 +425,13 @@ resource "openstack_compute_instance_v2" "login" { delete_on_termination = true } + block_device { + destination_type = "volume" + source_type = "volume" + boot_index = -1 + uuid = openstack_blockstorage_volume_v3.ood_zenith.id + } + # Use cloud-init to inject the SSH keys user_data = <<-EOF #cloud-config @@ -429,6 +442,12 @@ resource "openstack_compute_instance_v2" "login" { {%- if cluster_ssh_private_key_file is not defined %} - "${openstack_compute_keypair_v2.cluster_keypair.public_key}" {%- endif %} + bootcmd: + %{for volume in [openstack_blockstorage_volume_v3.ood_zenith] } + - BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${replace(substr(volume.id, 0, 20), "-", "*")}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L ${lower(split(" ", volume.description)[0])} $BLKDEV + %{endfor} + mounts: + - [LABEL=ood_zenith, {{ caas_ood_zenith_dir }}, auto] EOF metadata = { diff --git a/ansible/roles/zenith_proxy/defaults/main.yml b/ansible/roles/zenith_proxy/defaults/main.yml index 748ad71c6..f91a7514e 100644 --- a/ansible/roles/zenith_proxy/defaults/main.yml +++ b/ansible/roles/zenith_proxy/defaults/main.yml @@ -23,6 +23,8 @@ zenith_proxy_client_image: "{{ zenith_proxy_client_image_repository }}:{{ zenith zenith_proxy_mitm_image_repository: ghcr.io/azimuth-cloud/zenith-proxy zenith_proxy_mitm_image: "{{ zenith_proxy_mitm_image_repository }}:{{ zenith_proxy_image_tag }}" +zenith_proxy_state_dir: "{{ undef(hint = 'zenith_proxy_state_dir is required') }}" + zenith_proxy_upstream_scheme: http zenith_proxy_upstream_host: "{{ undef(hint = 'zenith_proxy_upstream_host is required') }}" zenith_proxy_upstream_port: "{{ undef(hint = 'zenith_proxy_upstream_port is required') }}" diff --git a/ansible/roles/zenith_proxy/tasks/main.yml b/ansible/roles/zenith_proxy/tasks/main.yml index 360e77d54..ebaedf411 100644 --- a/ansible/roles/zenith_proxy/tasks/main.yml +++ b/ansible/roles/zenith_proxy/tasks/main.yml @@ -55,14 +55,12 @@ - name: Create directory to persist SSH key ansible.builtin.file: - path: "{{ appliances_state_dir }}/{{ zenith_proxy_service_name }}-ssh" + path: "{{ zenith_proxy_state_dir }}" state: directory owner: "{{ zenith_proxy_podman_user }}" group: "{{ zenith_proxy_podman_user }}" mode: "0755" become: true - delegate_to: "{{ groups['control'] | first }}" - run_once: true - name: Initialise Zenith client # Use a foreground command rather than the podman_container module as I could not @@ -72,7 +70,7 @@ --name {{ zenith_proxy_service_name }}-init --replace --volume /etc/zenith/{{ zenith_proxy_service_name }}:/etc/zenith:ro - --volume {{ appliances_state_dir }}/{{ zenith_proxy_service_name }}-ssh:/home/zenith/.ssh + --volume {{ zenith_proxy_state_dir }}:/home/zenith/.ssh {{ zenith_proxy_client_image }} zenith-client init become: true diff --git a/ansible/roles/zenith_proxy/tasks/migrate-ood.yml b/ansible/roles/zenith_proxy/tasks/migrate-ood.yml new file mode 100644 index 000000000..1d4534e93 --- /dev/null +++ b/ansible/roles/zenith_proxy/tasks/migrate-ood.yml @@ -0,0 +1,29 @@ +--- +- name: Check task being called for correct service + ansible.builtin.assert: + that: zenith_proxy_service_name == 'zenith-ood' + +- name: Check for legacy Zenith OOD state + ansible.builtin.stat: + path: "{{ appliances_state_dir }}/{{ zenith_proxy_service_name }}-ssh" + register: _ood_zenith_stat + +- name: Migrate releases <=2.6.1 to new Zenith state directory + become: true + block: + - name: Ensure new SSH directory + ansible.builtin.file: + path: "{{ zenith_proxy_state_dir }}" + state: directory + owner: "{{ zenith_proxy_podman_user }}" + group: "{{ zenith_proxy_podman_user }}" + mode: "0755" + become: true + - ansible.builtin.copy: + src: "{{ appliances_state_dir }}/{{ zenith_proxy_service_name }}-ssh/{{ item }}" + dest: "{{ caas_ood_zenith_dir }}/{{ zenith_proxy_service_name }}-ssh/{{ item }}" + remote_src: true + loop: + - id_zenith + - id_zenith.pub + when: _ood_zenith_stat.stat.exists \ No newline at end of file diff --git a/ansible/roles/zenith_proxy/templates/client.service.j2 b/ansible/roles/zenith_proxy/templates/client.service.j2 index 809b19b87..e131408b6 100644 --- a/ansible/roles/zenith_proxy/templates/client.service.j2 +++ b/ansible/roles/zenith_proxy/templates/client.service.j2 @@ -25,7 +25,7 @@ ExecStart=/usr/bin/podman run \ --name {{ zenith_proxy_client_container_name }} \ --security-opt label=disable \ --volume /etc/zenith/{{ zenith_proxy_service_name }}:/etc/zenith:ro \ - --volume {{ appliances_state_dir }}/{{ zenith_proxy_service_name }}-ssh:/home/zenith/.ssh \ + --volume {{ zenith_proxy_state_dir }}:/home/zenith/.ssh \ {{ zenith_proxy_client_image }} ExecStop=/usr/bin/podman stop --ignore -t 10 {{ zenith_proxy_client_container_name }} ExecStopPost=/usr/bin/podman rm --ignore -f {{ zenith_proxy_client_container_name }} diff --git a/environments/.caas/hooks/post.yml b/environments/.caas/hooks/post.yml index cf606c746..e6f301743 100644 --- a/environments/.caas/hooks/post.yml +++ b/environments/.caas/hooks/post.yml @@ -16,10 +16,19 @@ zenith_proxy_mitm_auth_inject: basic zenith_proxy_mitm_auth_basic_username: "{{ grafana_security.admin_user }}" zenith_proxy_mitm_auth_basic_password: "{{ grafana_security.admin_password }}" + zenith_proxy_state_dir: "{{ appliances_state_dir }}/{{ zenith_proxy_service_name }}-ssh" when: zenith_subdomain_monitoring is defined - hosts: openondemand tasks: + - name: Migrate legacy Zenith OOD state + ansible.builtin.include_role: + name: zenith_proxy + tasks_from: migrate-ood.yml + vars: + zenith_proxy_service_name: zenith-ood + zenith_proxy_state_dir: "{{ caas_ood_zenith_dir }}/{{ zenith_proxy_service_name }}-ssh" + - name: Deploy the Zenith client for OOD ansible.builtin.include_role: name: zenith_proxy @@ -34,6 +43,7 @@ zenith_proxy_mitm_auth_inject: basic zenith_proxy_mitm_auth_basic_username: azimuth zenith_proxy_mitm_auth_basic_password: "{{ vault_azimuth_user_password }}" + zenith_proxy_state_dir: "{{ caas_ood_zenith_dir }}/{{ zenith_proxy_service_name }}-ssh" when: zenith_subdomain_ood is defined # Run hpctests if set in UI diff --git a/environments/.caas/inventory/group_vars/all/cluster.yml b/environments/.caas/inventory/group_vars/all/cluster.yml index ea38e9f36..0e6cdca6c 100644 --- a/environments/.caas/inventory/group_vars/all/cluster.yml +++ b/environments/.caas/inventory/group_vars/all/cluster.yml @@ -22,6 +22,7 @@ openondemand_servername_default: "{{ hostvars[groups['openstack'][0]].cluster_ga openondemand_servername: "{{ zenith_fqdn_ood | default(openondemand_servername_default) }}" appliances_state_dir: /var/lib/state +caas_ood_zenith_dir: /var/lib/ood_zenith # Defaults for caas-provided extravars: cluster_project_manila_share: false