Skip to content

Commit bd09a34

Browse files
authored
fix: Custom File Share support for Login node<br>Scale Support for >250 dynamic worker nodes<br>Updated catalog.json to have the dropdown for ssh keys(#167)
1 parent 57eedcf commit bd09a34

File tree

15 files changed

+143
-66
lines changed

15 files changed

+143
-66
lines changed

.catalog-onboard-pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ offerings:
88
offering_id: bf3c07f8-5a62-4289-8ea0-94dbb2b410e6
99
# list all of the variations (flavors) you have included in the ibm_catalog.json
1010
variations:
11-
- name: Cluster-with-LSF-v10.1.0.14
11+
- name: Cluster-with-LSF
1212
mark_ready: false # have pipeline mark as visible if validation passes
1313
install_type: fullstack # ensure value matches what is in ibm_catalog.json (fullstack or extension)
1414
destroy_resources_on_failure: false # defaults to false if not specified so resources can be inspected to debug failures during validation

hpcaas-arch-1.5.0.svg

Lines changed: 0 additions & 4 deletions
This file was deleted.

hpcaas-arch-1.6.svg

Lines changed: 4 additions & 0 deletions
Loading

ibm_catalog.json

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@
4343
],
4444
"flavors": [
4545
{
46-
"label": "Cluster-with-LSF-v10.1.0.14",
47-
"name": "Cluster-with-LSF-v10.1.0.14",
46+
"label": "Cluster with LSF v10.1.0.14",
47+
"name": "Cluster-with-LSF",
4848
"install_type": "fullstack",
4949
"working_directory": "solutions/hpc",
5050
"compliance": {
@@ -72,10 +72,36 @@
7272
"key": "cluster_id"
7373
},
7474
{
75-
"key": "bastion_ssh_keys"
75+
"key": "bastion_ssh_keys",
76+
"type": "array",
77+
"default_value": "",
78+
"display_name": "VPC SSH Key",
79+
"required": true,
80+
"custom_config": {
81+
"type": "vpc_ssh_key",
82+
"grouping": "deployment",
83+
"original_grouping": "deployment",
84+
"config_constraints": {
85+
"selection": "multi_select",
86+
"valueType": "name"
87+
}
88+
}
7689
},
7790
{
78-
"key": "compute_ssh_keys"
91+
"key": "compute_ssh_keys",
92+
"type": "array",
93+
"default_value": "",
94+
"display_name": "VPC SSH Key",
95+
"required": true,
96+
"custom_config": {
97+
"type": "vpc_ssh_key",
98+
"grouping": "deployment",
99+
"original_grouping": "deployment",
100+
"config_constraints": {
101+
"selection": "multi_select",
102+
"valueType": "name"
103+
}
104+
}
79105
},
80106
{
81107
"key": "remote_allowed_ips"
@@ -317,12 +343,6 @@
317343
}
318344
],
319345
"iam_permissions": [
320-
{
321-
"role_crns": [
322-
"crn:v1:bluemix:public:iam::::serviceRole:Manager"
323-
],
324-
"service_name": "schematics"
325-
},
326346
{
327347
"role_crns": [
328348
"crn:v1:bluemix:public:iam::::serviceRole:writer"
@@ -343,21 +363,28 @@
343363
},
344364
{
345365
"role_crns": [
346-
"crn:v1:bluemix:public:iam::::role:Administrator"
366+
"crn:v1:bluemix:public:iam::::role:Editor"
347367
],
348-
"service_name": "project"
368+
"service_name": "is.vpc"
349369
},
350370
{
351371
"role_crns": [
352372
"crn:v1:bluemix:public:iam::::role:Editor"
353373
],
354-
"service_name": "is.vpc"
374+
"service_name": "dns-svcs"
355375
},
356376
{
377+
"service_name": "is.flow-log-collector",
357378
"role_crns": [
358379
"crn:v1:bluemix:public:iam::::role:Editor"
359-
],
360-
"service_name": "dns-svcs"
380+
]
381+
},
382+
{
383+
"service_name": "sysdig-monitor",
384+
"role_crns": [
385+
"crn:v1:bluemix:public:iam::::serviceRole:Manager",
386+
"crn:v1:bluemix:public:iam::::role:Administrator"
387+
]
361388
}
362389
],
363390
"architecture": {
@@ -377,7 +404,7 @@
377404
},
378405
{
379406
"title": "Reduces failure events by using multizone regions",
380-
"description": "Yes"
407+
"description": "No"
381408
},
382409
{
383410
"title": "Collects and stores Internet Protocol (IP) traffic information with Activity Tracker and Flow Logs",
@@ -393,7 +420,7 @@
393420
},
394421
{
395422
"title": "Uses Floating IP address for access through the public internet",
396-
"description": "No"
423+
"description": "Yes"
397424
}
398425
],
399426
"diagrams": [

modules/landing_zone_vsi/configuration_steps/configure_management_vsi.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,11 @@ LSF_RSH="ssh -o 'PasswordAuthentication no' -o 'StrictHostKeyChecking no'"
162162
EOT
163163
sed -i "s/LSF_MASTER_LIST=.*/LSF_MASTER_LIST=\"${mgmt_hostnames}\"/g" $LSF_CONF_FILE
164164

165+
# Updating the worker node count to 2000 when no VPC file share is declared.
166+
if [[ $vpc_file_share_count == 0 ]]; then
167+
sed -i 's/THRESHOLD\[250\]/THRESHOLD\[2000\]/' $LSF_CONF_FILE
168+
fi
169+
165170
if [ "$hyperthreading" == true ]; then
166171
ego_define_ncpus="threads"
167172
else
@@ -572,7 +577,7 @@ dns_domain="${dns_domain}"
572577
ManagementHostNames="${mgmt_hostnames}"
573578
lsf_public_key="${cluster_public_key_content}"
574579
hyperthreading=${hyperthreading}
575-
nfs_server_with_mount_path="${nfs_server_with_mount_path}"
580+
nfs_server_with_mount_path=""
576581
custom_file_shares="${custom_file_shares}"
577582
custom_mount_paths="${custom_mount_paths}"
578583
login_ip_address="${login_ip}"
@@ -634,8 +639,6 @@ echo "Setting LSF share"
634639
# Setup file share
635640
if [ -n "${nfs_server_with_mount_path}" ]; then
636641
echo "File share ${nfs_server_with_mount_path} found"
637-
# Create a data directory for sharing HPC workload data ### is this used?
638-
mkdir -p "${LSF_TOP}/data"
639642
nfs_client_mount_path="/mnt/lsf"
640643
rm -rf "${nfs_client_mount_path}"
641644
mkdir -p "${nfs_client_mount_path}"
@@ -674,6 +677,13 @@ if [ -n "${nfs_server_with_mount_path}" ]; then
674677
ln -fs "${nfs_client_mount_path}/gui-conf" "${LSF_SUITE_GUI_CONF}"
675678
chown -R lsfadmin:root "${LSF_SUITE_GUI_CONF}"
676679

680+
# Create a data directory for sharing HPC workload data
681+
if [ "$on_primary" == "true" ]; then
682+
mkdir -p "${nfs_client_mount_path}/data"
683+
ln -s "${nfs_client_mount_path}/data" "$LSF_TOP/work/data"
684+
chown -R lsfadmin:root "$LSF_TOP/work/data"
685+
fi
686+
677687
# VNC Sessions
678688
if [ "$on_primary" == "true" ]; then
679689
mkdir -p "${nfs_client_mount_path}/repository-path"

modules/landing_zone_vsi/configuration_steps/management_values.tpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ hyperthreading="${hyperthreading}"
3636
network_interface=${network_interface}
3737
dns_domain="${dns_domain}"
3838
mount_path="${mount_path}"
39+
vpc_file_share_count="${vpc_file_share_count}"
3940
custom_file_shares="${custom_file_shares}"
4041
custom_mount_paths="${custom_mount_paths}"
4142
contract_id="${contract_id}"

modules/landing_zone_vsi/template_files.tf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ data "template_file" "login_user_data" {
2525
cluster_private_key_content = local.enable_management ? module.compute_key[0].private_key_content : ""
2626
cluster_public_key_content = local.enable_management ? module.compute_key[0].public_key_content : ""
2727
mount_path = var.share_path
28+
custom_mount_paths = join(" ", concat(local.vpc_file_share[*]["mount_path"], local.nfs_file_share[*]["mount_path"]))
29+
custom_file_shares = join(" ", concat([for file_share in var.file_share : file_share], local.nfs_file_share[*]["nfs_share"]))
2830
enable_ldap = var.enable_ldap
2931
rc_cidr_block = local.bastion_subnets[0].cidr
3032
cluster_prefix = var.prefix
@@ -78,6 +80,7 @@ data "template_file" "management_values" {
7880
network_interface = local.vsi_interfaces[0]
7981
dns_domain = var.dns_domain_names["compute"]
8082
mount_path = var.share_path
83+
vpc_file_share_count = var.vpc_file_share_count
8184
custom_mount_paths = join(" ", concat(local.vpc_file_share[*]["mount_path"], local.nfs_file_share[*]["mount_path"]))
8285
custom_file_shares = join(" ", concat([for file_share in var.file_share : file_share], local.nfs_file_share[*]["nfs_share"]))
8386
contract_id = var.contract_id

modules/landing_zone_vsi/templates/login_user_data.tpl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ dns_domain="${dns_domain}"
1515
cluster_private_key_content="${cluster_private_key_content}"
1616
cluster_public_key_content="${cluster_public_key_content}"
1717
mount_path="${mount_path}"
18+
custom_mount_paths="${custom_mount_paths}"
19+
custom_file_shares="${custom_file_shares}"
1820
enable_ldap="${enable_ldap}"
1921
network_interface=""${network_interface}""
2022
rc_cidr_block="${rc_cidr_block}"

modules/landing_zone_vsi/templates/login_vsi.sh

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ LSF_CONF=$LSF_TOP/conf
1515
LSF_HOSTS_FILE="/etc/hosts"
1616

1717
nfs_server_with_mount_path=${mount_path}
18-
18+
custom_mount_paths="${custom_mount_paths}"
19+
custom_file_shares="${custom_file_shares}"
1920

2021
# Setup logs for user data
2122
echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $logfile
@@ -107,10 +108,9 @@ EOT
107108
sh $command && (crontab -l 2>/dev/null; echo "@reboot $command") | crontab -
108109
fi
109110

110-
# Setup LSF
111-
echo "Setting LSF share." >> $logfile
112-
# Setup file share
111+
# Setup Default LSF Share
113112
if [ -n "${nfs_server_with_mount_path}" ]; then
113+
echo "Setting Default LSF share." >> $logfile
114114
echo "File share ${nfs_server_with_mount_path} found" >> $logfile
115115
nfs_client_mount_path="/mnt/lsf"
116116
rm -rf "${nfs_client_mount_path}"
@@ -133,11 +133,38 @@ if [ -n "${nfs_server_with_mount_path}" ]; then
133133
ln -fs "${nfs_client_mount_path}/$dir" "${LSF_TOP}"
134134
chown -R lsfadmin:root "${LSF_TOP}"
135135
done
136+
echo "Setting Default LSF share is completed." >> $logfile
136137
else
137138
echo "No mount point value found, exiting!" >> $logfile
138139
exit 1
139140
fi
140-
echo "Setting LSF share is completed." >> $logfile
141+
142+
# Setup Custom File shares
143+
if [ -n "${custom_file_shares}" ]; then
144+
echo "Setting custom file shares." >> $logfile
145+
echo "Custom file share ${custom_file_shares} found" >> $logfile
146+
file_share_array=(${custom_file_shares})
147+
mount_path_array=(${custom_mount_paths})
148+
length=${#file_share_array[@]}
149+
for (( i=0; i<length; i++ )); do
150+
rm -rf "${mount_path_array[$i]}"
151+
mkdir -p "${mount_path_array[$i]}"
152+
# Mount LSF TOP
153+
mount -t nfs -o sec=sys "${file_share_array[$i]}" "${mount_path_array[$i]}"
154+
# Verify mount
155+
if mount | grep "${file_share_array[$i]}"; then
156+
echo "Mount found"
157+
else
158+
echo "No mount found"
159+
rm -rf "${mount_path_array[$i]}"
160+
fi
161+
# Update permission to 777 for all users to access
162+
chmod 777 "${mount_path_array[$i]}"
163+
# Update mount to fstab for automount
164+
echo "${file_share_array[$i]} ${mount_path_array[$i]} nfs rw,sec=sys,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,_netdev 0 0 " >> /etc/fstab
165+
echo "Setting custom file shares is completed." >> $logfile
166+
done
167+
fi
141168

142169
echo "source ${LSF_CONF}/profile.lsf" >> "${lsfadmin_home_dir}"/.bashrc
143170
echo "source ${LSF_CONF}/profile.lsf" >> /root/.bashrc

modules/landing_zone_vsi/variables.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@ variable "file_share" {
236236
description = "VPC file share mount points considering the ip address and the file share name"
237237
}
238238

239+
variable "vpc_file_share_count" {
240+
type = number
241+
description = "Requested number of VPC file shares."
242+
}
243+
239244
variable "login_private_ips" {
240245
description = "Login private IPs"
241246
type = string

0 commit comments

Comments
 (0)