Merge pull request #5 from ocaisa/cuda_fallbacks #87
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions | |
name: Tests for eessi_module_functionality in software.eessi.io | |
on: | |
push: | |
branches: [ "main" ] | |
pull_request: | |
permissions: | |
contents: read # to fetch code (actions/checkout) | |
jobs: | |
basic_checks: | |
runs-on: ubuntu-22.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
EESSI_VERSION: | |
- '2023.06' | |
steps: | |
- name: Check out software-layer repository | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
- name: Mount EESSI CernVM-FS pilot repository | |
uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 | |
with: | |
cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb | |
cvmfs_http_proxy: DIRECT | |
cvmfs_repositories: software.eessi.io | |
- name: Test for making sure spider cache is being used and not being rebuilt | |
run: | | |
. /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash # Initialise Lmod | |
export MODULEPATH=init/modules | |
configfile="configfile.txt" | |
module -T load EESSI/${{matrix.EESSI_VERSION}} | |
module --config > "${configfile}" 2>&1 | |
grep cache "${configfile}" | grep software | grep -v compat | |
if timeout 10s bash -c "LMOD_PAGER=none module --terse avail" && grep cache "${configfile}" | grep software | grep -v compat; then | |
echo "EESSI spider cache is being used" | |
else | |
echo "EESSI spider cache is being rebuilt" >&2 | |
exit 1 | |
fi | |
env | grep LMOD | |
module purge | |
unset MODULEPATH | |
- name: Test for archdetect_cpu functionality with invalid path | |
run: | | |
# Initialise Lmod | |
. /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash | |
export MODULEPATH=init/modules | |
set +e # Do not exit immediately if a command exits with a non-zero status | |
export EESSI_ARCHDETECT_OPTIONS_OVERRIDE="dummy/cpu" | |
outfile="outfile.txt" | |
module load EESSI/${{matrix.EESSI_VERSION}} > "${outfile}" 2>&1 | |
cat "${outfile}" | |
if grep -q "Software directory check" "${outfile}"; then | |
echo "Test for picking up invalid path on \${archdetect_cpu} PASSED" | |
else | |
echo "Test for picking up invalid path on \${archdetect_cpu} FAILED" >&2 | |
exit 1 | |
fi | |
unset EESSI_ARCHDETECT_OPTIONS_OVERRIDE | |
set -e # Re-enable exit on non-zero status | |
lmod_and_init_script_comparison: | |
runs-on: ubuntu-22.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
EESSI_VERSION: | |
- '2023.06' | |
EESSI_SOFTWARE_SUBDIR_OVERRIDE: | |
- x86_64/amd/zen3 | |
- x86_64/amd/zen4 | |
EESSI_ACCELERATOR_TARGET_OVERRIDE: | |
- accel/nvidia/cc80 | |
# This should fall back to cc70 but that is checked later (in this step we just check for consistency) | |
- accel/nvidia/cc77 | |
steps: | |
- name: Check out software-layer repository | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
- name: Mount EESSI CernVM-FS pilot repository | |
uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 | |
with: | |
cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb | |
cvmfs_http_proxy: DIRECT | |
cvmfs_repositories: software.eessi.io | |
- name: Fix EESSI version in init scripts | |
run: | | |
sed -i "s/__EESSI_VERSION_DEFAULT__/${{matrix.EESSI_VERSION}}/g" init/eessi_defaults | |
- name: Test for expected variables match between Lmod init script and original bash script | |
run: | | |
# Initialise Lmod | |
. /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash | |
# Set our path overrides according to our matrix | |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} | |
export EESSI_ACCELERATOR_TARGET_OVERRIDE=${{matrix.EESSI_ACCELERATOR_TARGET_OVERRIDE}} | |
moduleoutfile="moduleout.txt" | |
sourceoutfile="sourceout.txt" | |
# First do (and undo) the Lmod initialisation | |
export MODULEPATH=init/modules | |
# Turn on debug output in case we want to take a look | |
export EESSI_DEBUG_INIT=true | |
CPU_ARCH=$(./init/eessi_archdetect.sh -a cpupath) | |
module load EESSI/${{matrix.EESSI_VERSION}} | |
# EESSI_DEBUG_INIT/EESSI_ARCHDETECT_OPTIONS only relevant for Lmod init | |
unset EESSI_DEBUG_INIT | |
# Store all relevant environment variables | |
env | grep -E '(^EESSI_|^LMOD_RC|^LMOD_PACKAGE_PATH|^MODULEPATH)' | grep -v EESSI_ARCHDETECT_OPTIONS | sort > "${moduleoutfile}" | |
module unload EESSI/${{matrix.EESSI_VERSION}} | |
# We should only have two EESSI_* variables defined (which set the overrides) | |
if [ "$(env | grep -c '^EESSI')" -ne 2 ]; then | |
echo "Expected 2 EESSI-related environment variables, but found a different number." | |
env | grep '^EESSI' | |
exit 1 | |
fi | |
# Now do the init script initialisation | |
source ./init/bash | |
# source script version sets environment variables to force archdetect, ignore these | |
unset EESSI_USE_ARCHSPEC | |
unset EESSI_USE_ARCHDETECT | |
env | grep -E '(^EESSI_|^LMOD_RC|^LMOD_PACKAGE_PATH|^MODULEPATH)' | sort > "${sourceoutfile}" | |
# Now compare the two results | |
echo "" | |
echo "Lmod initialisation:" | |
cat "${moduleoutfile}" | |
echo "" | |
echo "Source script initialisation:" | |
cat "${sourceoutfile}" | |
echo "" | |
echo "" | |
if (diff "${moduleoutfile}" "${sourceoutfile}" > /dev/null); then | |
echo "Test for checking env variables PASSED" | |
else | |
echo "Test for checking env variables FAILED" >&2 | |
diff --unified=0 "${moduleoutfile}" "${sourceoutfile}" | |
exit 1 | |
fi | |
make_sure_load_and_unload_work: | |
runs-on: ubuntu-24.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
EESSI_VERSION: | |
- '2023.06' | |
EESSI_SOFTWARE_SUBDIR_OVERRIDE: | |
- none | |
- x86_64/amd/zen2 | |
- x86_64/amd/zen4 | |
EESSI_ACCELERATOR_TARGET_OVERRIDE: | |
- none | |
- accel/nvidia/cc80 | |
- accel/nvidia/cc77 | |
include: | |
# For each override we expect a specific path (which may differ from the original due to overrides) | |
- EESSI_ACCELERATOR_TARGET_OVERRIDE: accel/nvidia/cc80 | |
FINAL_ACCELERATOR_PATH_EXPECTED: accel/nvidia/cc80 | |
- EESSI_ACCELERATOR_TARGET_OVERRIDE: accel/nvidia/cc77 # deliberately chose a non-existent CUDA capability | |
FINAL_ACCELERATOR_PATH_EXPECTED: accel/nvidia/cc70 # this reverts to the fallback case (which does exist) | |
steps: | |
- name: Check out software-layer repository | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
- name: Mount EESSI CernVM-FS pilot repository | |
uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 | |
with: | |
cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb | |
cvmfs_http_proxy: DIRECT | |
cvmfs_repositories: software.eessi.io | |
- name: Test for identical environment after loading and unloading the EESSI module | |
run: | | |
# Initialise Lmod | |
. /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/compat/linux/$(uname -m)/usr/share/Lmod/init/bash | |
# Set our cpu path overrides according to our matrix | |
if [[ "${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}" != "none" ]]; then | |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} | |
fi | |
# Set our accelerator path overrides according to our matrix | |
if [[ "${{matrix.EESSI_ACCELERATOR_TARGET_OVERRIDE}}" != "none" ]]; then | |
export EESSI_ACCELERATOR_TARGET_OVERRIDE=${{matrix.EESSI_ACCELERATOR_TARGET_OVERRIDE}} | |
export FINAL_ACCELERATOR_PATH_EXPECTED=${{matrix.FINAL_ACCELERATOR_PATH_EXPECTED}} | |
fi | |
# Turn on debug output in case we want to take a look | |
export EESSI_DEBUG_INIT=true | |
initial_env_file="initial_env.txt" | |
module_cycled_file="load_unload_cycle.txt" | |
# prepare Lmod, resetting it in a roundabout way given we don't want defaults set | |
export MODULEPATH=init/modules:.github/workflows/modules | |
module load fake_module | |
module purge | |
module unuse .github/workflows/modules | |
module avail | |
# Store the initial environment (ignoring Lmod tables) | |
env | grep -v _ModuleTable | sort > "${initial_env_file}" | |
# Do (and undo) loading the EESSI module | |
CPU_ARCH=$(./init/eessi_archdetect.sh -a cpupath) | |
module load EESSI/${{matrix.EESSI_VERSION}} | |
module unload EESSI/${{matrix.EESSI_VERSION}} | |
env | grep -v _ModuleTable | sort > "${module_cycled_file}" | |
# Now compare the two results (do not expose the files, as they contain the full environment!) | |
if (diff "${initial_env_file}" "${module_cycled_file}" > /dev/null); then | |
echo "Test for checking env variables PASSED" | |
else | |
echo "Test for checking env variables FAILED" >&2 | |
diff --unified=0 "${initial_env_file}" "${module_cycled_file}" | |
exit 1 | |
fi | |
module load EESSI/${{matrix.EESSI_VERSION}} | |
# Make sure our CPU and GPU architectures are what we expect | |
# (script uses EESSI_SOFTWARE_SUBDIR_OVERRIDE and EESSI_ACCELERATOR_TARGET_OVERRIDE | |
# as the starting point for the comparison) | |
python .github/workflows/scripts/verify_eessi_environment.py |