From 3a7498d86dbf8ebe6e9411daf2ab7ee57d4e3188 Mon Sep 17 00:00:00 2001 From: lara Date: Thu, 13 Feb 2025 14:22:21 +0100 Subject: [PATCH 1/3] Test is non CUDA builds are not add to accelorator path with jax --- .../2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml index 7ac4ba6cca..6408ec74c1 100644 --- a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml @@ -5,3 +5,4 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21699 from-commit: e3407bd127d248c08960f6b09c973da0fdecc2c3 + - jax-0.4.25-gfbf-2023a-CUDA-12.1.1.eb From 80329ceeff0dd28f70d33c04352cd8810a6e445b Mon Sep 17 00:00:00 2001 From: laraPPr Date: Mon, 2 Jun 2025 14:30:08 +0200 Subject: [PATCH 2/3] add check of nvidia-smi to utils.sh --- EESSI-install-software.sh | 16 +--------------- bot/build.sh | 12 +----------- bot/test.sh | 11 +---------- scripts/utils.sh | 13 +++++++++++++ 4 files changed, 16 insertions(+), 36 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 25057216a3..6448afe8e1 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -17,11 +17,6 @@ display_help() { echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)" } -# Function to check if a command exists -function command_exists() { - command -v "$1" >/dev/null 2>&1 -} - function copy_build_log() { # copy specified build log to specified directory, with some context added build_log=${1} @@ -271,16 +266,7 @@ fi # Install NVIDIA drivers in host_injections (if they exist) if command_exists "nvidia-smi"; then - nvidia-smi --version - ec=$? - if [ ${ec} -eq 0 ]; then - echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..." - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh - else - echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully." - echo "This script now assumes this is NOT a GPU node." - echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error." - fi + check_nvidia-smi_installation fi if [ ! -z "${shared_fs_path}" ]; then diff --git a/bot/build.sh b/bot/build.sh index 845ef0e338..2dcfa1b9b8 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -249,18 +249,8 @@ BUILD_STEP_ARGS+=("--storage" "${STORAGE}") if command_exists "nvidia-smi"; then # Accept that this may fail set +e - nvidia-smi --version - ec=$? + check_nvidia-smi_installation set -e - if [ ${ec} -eq 0 ]; then - echo "Command 'nvidia-smi' found, using available GPU" - BUILD_STEP_ARGS+=("--nvidia" "all") - else - echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully." - echo "This script now assumes this is NOT a GPU node." - echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error." - BUILD_STEP_ARGS+=("--nvidia" "install") - fi else echo "No 'nvidia-smi' found, no available GPU but allowing overriding this check" BUILD_STEP_ARGS+=("--nvidia" "install") diff --git a/bot/test.sh b/bot/test.sh index 7160ff7e5d..8d8570ccdf 100755 --- a/bot/test.sh +++ b/bot/test.sh @@ -217,17 +217,8 @@ TEST_STEP_ARGS+=("--extra-bind-paths" "/sys/fs/cgroup:/hostsys/fs/cgroup:ro") if command_exists "nvidia-smi"; then # Accept that this may fail set +e - nvidia-smi --version - ec=$? + check_nvidia-smi_installation set -e - if [ ${ec} -eq 0 ]; then - echo "Command 'nvidia-smi' found, using available GPU" - TEST_STEP_ARGS+=("--nvidia" "run") - else - echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully." - echo "This script now assumes this is NOT a GPU node." - echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error." - fi fi # prepare arguments to test_suite.sh (specific to test step) diff --git a/scripts/utils.sh b/scripts/utils.sh index 962decd20e..426f622253 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -147,3 +147,16 @@ function get_ipv4_address { echo "${hipv4}" return 0 } + +function check_nvidia-smi_installation { + nvidia-smi --version + ec=$? + if [ ${ec} -eq 0 ]; then + echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..." + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + else + echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully." + echo "This script now assumes this is NOT a GPU node." + echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error." + fi +} From 03427e6985d6a384b2bc81826305fd670087c646 Mon Sep 17 00:00:00 2001 From: laraPPr Date: Mon, 2 Jun 2025 14:39:26 +0200 Subject: [PATCH 3/3] clean up before merging --- .../2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml index 6408ec74c1..7ac4ba6cca 100644 --- a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml @@ -5,4 +5,3 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21699 from-commit: e3407bd127d248c08960f6b09c973da0fdecc2c3 - - jax-0.4.25-gfbf-2023a-CUDA-12.1.1.eb