Skip to content

Code updates to resolve the LSF DA cluster deployment issue #237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Closed
8 changes: 4 additions & 4 deletions locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ locals {

storage_subnet = [for subnet in local.storage_subnets : subnet.name]
protocol_subnet = [for subnet in local.protocol_subnets : subnet.name]
protocol_subnet_id = [for subnet in local.protocol_subnets : subnet.id][0]
protocol_subnet_id = local.protocol_instance_count > 0 ? [for subnet in local.protocol_subnets : subnet.id][0] : ""
compute_subnet = [for subnet in local.compute_subnets : subnet.name]
client_subnet = [for subnet in local.client_subnets : subnet.name]
bastion_subnet = [for subnet in local.bastion_subnets : subnet.name]
Expand Down Expand Up @@ -286,7 +286,7 @@ locals {
client_nodes = var.scheduler == "LSF" ? var.enable_deployer ? [] : (flatten([module.landing_zone_vsi[0].client_vsi_data]))[*]["name"] : []
gui_hosts = var.scheduler == "LSF" ? var.enable_deployer ? [] : [local.management_nodes[0]] : [] # Without Pac HA
db_hosts = var.scheduler == "LSF" ? var.enable_deployer ? [] : [local.management_nodes[0]] : [] # Without Pac HA
ha_shared_dir = var.scheduler == "LSF" ? "/mnt/lsf/shared" : ""
ha_shared_dir = var.scheduler == "LSF" ? "/mnt/lsf" : ""
nfs_install_dir = var.scheduler == "LSF" ? "none" : ""
enable_monitoring = var.scheduler == "LSF" ? false : false
lsf_deployer_hostname = var.scheduler == "LSF" ? var.deployer_hostname : ""
Expand Down Expand Up @@ -419,9 +419,9 @@ locals {

fileset_size_map = try({ for details in var.file_shares : details.mount_path => details.size }, {})

storage_subnet_cidr = var.enable_deployer ? "" : jsonencode((data.ibm_is_subnet.existing_storage_subnets[*].ipv4_cidr_block)[0])
storage_subnet_cidr = var.enable_deployer ? "" : local.storage_instance_count > 0 ? jsonencode((data.ibm_is_subnet.existing_storage_subnets[*].ipv4_cidr_block)[0]) : ""
compute_subnet_cidr = var.enable_deployer ? "" : jsonencode((data.ibm_is_subnet.existing_compute_subnets[*].ipv4_cidr_block)[0])
client_subnet_cidr = var.enable_deployer ? "" : jsonencode((data.ibm_is_subnet.existing_client_subnets[*].ipv4_cidr_block)[0])
client_subnet_cidr = var.enable_deployer ? "" : local.client_instance_count > 0 ? jsonencode((data.ibm_is_subnet.existing_client_subnets[*].ipv4_cidr_block)[0]) : ""

compute_memory = data.ibm_is_instance_profile.compute_profile.memory[0].value
compute_vcpus_count = data.ibm_is_instance_profile.compute_profile.vcpu_count[0].value
Expand Down
14 changes: 7 additions & 7 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ module "landing_zone_vsi" {
enable_dedicated_host = var.enable_dedicated_host
enable_ldap = var.enable_ldap
ldap_instances = var.ldap_instances
ldap_server = var.ldap_server
ldap_server = local.ldap_server
ldap_instance_key_pair = local.ldap_instance_key_pair
scale_encryption_enabled = var.scale_encryption_enabled
scale_encryption_type = var.scale_encryption_type
Expand Down Expand Up @@ -163,7 +163,7 @@ module "prepare_tf_input" {
observability_atracker_target_type = var.observability_atracker_target_type
enable_ldap = var.enable_ldap
ldap_instances = var.ldap_instances
ldap_server = var.ldap_server
ldap_server = local.ldap_server
ldap_basedns = var.ldap_basedns
ldap_server_cert = local.ldap_server_cert
ldap_admin_password = local.ldap_admin_password
Expand All @@ -184,7 +184,7 @@ module "prepare_tf_input" {
module "validate_ldap_server_connection" {
count = var.enable_deployer && var.enable_ldap && local.ldap_server != "null" ? 1 : 0
source = "./modules/ldap_remote_exec"
ldap_server = var.ldap_server
ldap_server = local.ldap_server
bastion_fip = local.bastion_fip
bastion_private_key_content = local.bastion_private_key_content
deployer_ip = local.deployer_ip
Expand Down Expand Up @@ -509,7 +509,7 @@ module "compute_cluster_configuration" {
enable_ldap = var.enable_ldap
ldap_basedns = var.ldap_basedns
ldap_server = var.enable_ldap ? local.ldap_instance_private_ips[0] : null
ldap_admin_password = var.ldap_admin_password == "" ? jsonencode(null) : var.ldap_admin_password
ldap_admin_password = local.ldap_admin_password == "" ? jsonencode(null) : local.ldap_admin_password
enable_key_protect = var.scale_encryption_type
depends_on = [module.write_compute_scale_cluster_inventory]
}
Expand Down Expand Up @@ -569,8 +569,8 @@ module "storage_cluster_configuration" {
enable_ldap = var.enable_ldap
ldap_basedns = var.ldap_basedns
ldap_server = var.enable_ldap ? local.ldap_instance_private_ips[0] : null
ldap_admin_password = var.ldap_admin_password == "" ? jsonencode(null) : var.ldap_admin_password
ldap_server_cert = var.ldap_server_cert
ldap_admin_password = local.ldap_admin_password == "" ? jsonencode(null) : local.ldap_admin_password
ldap_server_cert = local.ldap_server_cert
enable_key_protect = var.scale_encryption_type
depends_on = [module.write_storage_scale_cluster_inventory]
}
Expand All @@ -592,7 +592,7 @@ module "client_configuration" {
enable_ldap = var.enable_ldap
ldap_basedns = var.ldap_basedns
ldap_server = var.enable_ldap ? jsonencode(local.ldap_instance_private_ips[0]) : jsonencode(null)
ldap_admin_password = var.ldap_admin_password == "" ? jsonencode(null) : var.ldap_admin_password
ldap_admin_password = local.ldap_admin_password == "" ? jsonencode(null) : local.ldap_admin_password
depends_on = [module.compute_cluster_configuration, module.storage_cluster_configuration]
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@
# Upload certificate to shared directory
- name: LDAP | Ensure shared OpenLDAP certificate directory exists
ansible.builtin.file:
path: "/mnt/lsf/shared/openldap"
path: "{{ ha_shared_dir }}/openldap"
state: directory
mode: '0755'
run_once: true

- name: LDAP | Upload ldap_cacert.pem to shared directory
ansible.builtin.copy:
src: "{{ LDAP_CERT_FILES_DIR }}/ldap_cacert.pem"
dest: "/mnt/lsf/shared/openldap/ldap_cacert.pem"
dest: "{{ ha_shared_dir }}/openldap/ldap_cacert.pem"
mode: '0644'
run_once: true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ if [ -n "${nfs_server_with_mount_path}" ]; then
if mount_nfs_with_retries "${nfs_server_with_mount_path}" "${nfs_client_mount_path}"; then
for dir in conf work; do
rm -rf "${LSF_TOP:?}/$dir"
ln -fs "${nfs_client_mount_path}/shared/lsf/$dir" "${LSF_TOP}/$dir"
ln -fs "${nfs_client_mount_path}/lsf/$dir" "${LSF_TOP}/$dir"
done
chown -R lsfadmin:root "${LSF_TOP}"
else
Expand Down Expand Up @@ -129,8 +129,8 @@ LDAP_DIR="/home/lsfadmin"
SSH_DIR="$LDAP_DIR/.ssh"
mkdir -p "$SSH_DIR"
cp /home/vpcuser/.ssh/authorized_keys "$SSH_DIR/authorized_keys"
cat /mnt/lsf/shared/ssh/id_rsa.pub >> "$SSH_DIR/authorized_keys"
cp /mnt/lsf/shared/ssh/id_rsa "$SSH_DIR/id_rsa"
cat "{{ ha_shared_dir }}/ssh/id_rsa.pub" >> "$SSH_DIR/authorized_keys"
cp "{{ ha_shared_dir }}/ssh/id_rsa" "$SSH_DIR/id_rsa"
echo "StrictHostKeyChecking no" >> "$SSH_DIR/config"
chmod 600 "$SSH_DIR/authorized_keys"
chmod 400 "$SSH_DIR/id_rsa"
Expand Down Expand Up @@ -241,19 +241,18 @@ if [ "$enable_ldap" = "true" ]; then
# Check if the SSL certificate file exists, then copy it to the correct location
# Retry finding SSL certificate with a maximum of 5 attempts and 5 seconds sleep between retries
for attempt in {1..5}; do
if [ -f "/mnt/lsf/shared/openldap/ldap_cacert.pem" ]; then
echo "LDAP SSL cert found under /mnt/lsf/shared/openldap/ldap_cacert.pem path" >> $logfile
if [ -f "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" ]; then
echo "LDAP SSL cert found under {{ ha_shared_dir }}/openldap/ldap_cacert.pem path" >> $logfile
mkdir -p /etc/openldap/certs/
cp -pr /mnt/lsf/shared/openldap/ldap_cacert.pem /etc/openldap/certs/ldap_cacert.pem
cp -pr "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" "/etc/openldap/certs/ldap_cacert.pem"
break
else
echo "SSL cert not found on attempt $attempt. Retrying in 5 seconds..." >> $logfile
sleep 5
fi
done
# Exit if the SSL certificate is still not found after 5 attempts
[ -f "/mnt/lsf/shared/openldap/ldap_cacert.pem" ] || { echo "SSL cert not found after 5 attempts. Exiting." >> $logfile; exit 1; }

[ -f "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" ] || { echo "SSL cert not found after 5 attempts. Exiting." >> $logfile; exit 1; }

# Create and configure the SSSD configuration file for LDAP integration
cat <<EOF > /etc/sssd/sssd.conf
Expand Down
2 changes: 1 addition & 1 deletion modules/landing_zone_vsi/datasource.tf
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ data "ibm_is_ssh_key" "ldap" {
}

data "ibm_is_image" "ldap_vsi_image" {
count = var.enable_ldap != null && var.ldap_server == null ? 1 : 0
count = var.enable_ldap != null && var.ldap_server == "null" ? 1 : 0
name = var.ldap_instances[count.index]["image"]
}

Expand Down
8 changes: 4 additions & 4 deletions modules/landing_zone_vsi/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ module "gklm_vsi" {
}

module "ldap_vsi" {
count = var.enable_ldap == true && var.ldap_server == null ? 1 : 0
count = var.enable_ldap == true && var.ldap_server == "null" ? 1 : 0
source = "terraform-ibm-modules/landing-zone-vsi/ibm"
version = "5.0.0"
vsi_per_subnet = 1
Expand All @@ -423,9 +423,9 @@ module "ldap_vsi" {
prefix = local.ldap_node_name
resource_group_id = local.resource_group_id
enable_floating_ip = false
security_group_ids = module.storage_sg[*].security_group_id
ssh_key_ids = local.ldap_ssh_keys
subnets = [local.storage_subnets[0]]
security_group_ids = local.products == "lsf" ? module.compute_sg[*].security_group_id : module.storage_sg[*].security_group_id
ssh_key_ids = local.products == "lsf" ? local.management_ssh_keys : local.ldap_ssh_keys
subnets = local.products == "lsf" ? local.compute_subnets : [local.storage_subnets[0]]
tags = local.tags
user_data = data.template_file.ldap_user_data.rendered
vpc_id = var.vpc_id
Expand Down
2 changes: 1 addition & 1 deletion modules/playbook/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ resource "null_resource" "configure_ldap_server_playbook" {
triggers = {
build = timestamp()
}
depends_on = [local_file.prepare_ldap_server_playbook]
depends_on = [local_file.prepare_ldap_server_playbook, null_resource.run_playbook]
}

resource "local_file" "prepare_ldap_client_playbook" {
Expand Down
2 changes: 1 addition & 1 deletion modules/resource_provisioner/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ locals {
deployer_path = "/opt/ibm"
remote_terraform_path = format("%s/terraform-ibm-hpc", local.deployer_path)
da_hpc_repo_url = "https://github.com/terraform-ibm-modules/terraform-ibm-hpc.git"
da_hpc_repo_tag = "jay_api_nd_colocation_chng" ###### change it to main in future
da_hpc_repo_tag = "develop" ###### change it to main in future
remote_ansible_path = format("%s/ibm-spectrumscale-cloud-deploy", local.deployer_path)
scale_cloud_infra_repo_url = "https://github.com/jayeshh123/ibm-spectrum-scale-install-infra"
scale_cloud_infra_repo_name = "ibm-spectrum-scale-install-infra"
Expand Down