Skip to content

Commit 63dda70

Browse files
authored
Merge branch '2023.06-software.eessi.io' into maqao
2 parents eb59828 + ed4dde3 commit 63dda70

File tree

2 files changed

+105
-54
lines changed

2 files changed

+105
-54
lines changed

.github/workflows/test-software.eessi.io.yml

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ jobs:
5252
steps:
5353
- name: Check out software-layer repository
5454
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
55+
with:
56+
fetch-depth: 0 # Fetch all history for all branches and tags
5557

5658
- name: Mount EESSI CernVM-FS pilot repository
5759
uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0
@@ -67,17 +69,19 @@ jobs:
6769
# set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash),
6870
# to prevent issues with checks in the Easybuild configuration that use this variable
6971
export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*}
70-
module load EasyBuild
71-
which eb
72-
eb --version
7372
export EESSI_PREFIX=/cvmfs/software.eessi.io/versions/${EESSI_VERSION}
7473
export EESSI_OS_TYPE=linux
7574
env | grep ^EESSI | sort
7675
7776
# first check the CPU-only builds for this CPU target
78-
echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${EESSI_VERSION}/eessi-${EESSI_VERSION}-*.yml with latest EasyBuild release)"
77+
echo "first run check_missing_installations.sh for CPU-only builds"
7978
for easystack_file in $(EESSI_VERSION=${EESSI_VERSION} .github/workflows/scripts/only_latest_easystacks.sh); do
80-
echo "check missing installations for ${easystack_file}..."
79+
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*.yml/\1/g')
80+
echo "check missing installations for ${easystack_file} with EasyBuild ${eb_version}..."
81+
module purge
82+
module load EasyBuild/${eb_version}
83+
which eb
84+
eb --version
8185
./check_missing_installations.sh ${easystack_file}
8286
ec=$?
8387
if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi
@@ -92,7 +96,12 @@ jobs:
9296
module use ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all
9397
echo "checking missing installations for accelerator ${accel} using modulepath: ${MODULEPATH}"
9498
for easystack_file in $(EESSI_VERSION=${EESSI_VERSION} ACCEL_EASYSTACKS=1 .github/workflows/scripts/only_latest_easystacks.sh); do
95-
echo "check missing installations for ${easystack_file}..."
99+
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*.yml/\1/g')
100+
echo "check missing installations for ${easystack_file} with EasyBuild ${eb_version}..."
101+
module purge
102+
module load EasyBuild/${eb_version}
103+
which eb
104+
eb --version
96105
./check_missing_installations.sh ${easystack_file}
97106
ec=$?
98107
if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi
@@ -132,3 +141,28 @@ jobs:
132141
echo "captured missing package; test PASSED"
133142
exit 0
134143
fi
144+
145+
- name: Check that EasyBuild hook is up to date
146+
if: ${{ github.event_name == 'pull_request' }}
147+
run: |
148+
FILE="eb_hooks.py"
149+
TEMP_FILE="$(mktemp)"
150+
151+
# Fetch base branch
152+
git fetch origin ${{ github.base_ref }}
153+
154+
# Check if the hooks has changed in the PR
155+
if git diff --name-only origin/${{ github.base_ref }}...HEAD | grep -q "^$FILE$"; then
156+
echo "Hooks changed in PR. Using PR version."
157+
cp "$FILE" "$TEMP_FILE"
158+
else
159+
echo "File not changed in PR. Using default branch version."
160+
git show origin/${{ github.base_ref }}:$FILE > "$TEMP_FILE"
161+
fi
162+
163+
# Compare the hooks to what is shipped in the repository
164+
# (it is overkill, but harmless, to do this for every architecture)
165+
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}
166+
source /cvmfs/software.eessi.io/versions/${EESSI_VERSION}/init/bash
167+
module load EESSI-extend
168+
diff "$TEMP_FILE" "$EASYBUILD_HOOKS"

eb_hooks.py

Lines changed: 65 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from easybuild.tools.run import run_cmd
1414
from easybuild.tools.systemtools import AARCH64, POWER, X86_64, get_cpu_architecture, get_cpu_features
1515
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC
16+
from easybuild.tools.version import VERSION as EASYBUILD_VERSION
1617

1718
# prefer importing LooseVersion from easybuild.tools, but fall back to distuils in case EasyBuild <= 4.7.0 is used
1819
try:
@@ -126,9 +127,15 @@ def post_ready_hook(self, *args, **kwargs):
126127
Post-ready hook: limit parallellism for selected builds based on software name and CPU target.
127128
parallelism needs to be limited because some builds require a lot of memory per used core.
128129
"""
129-
# 'parallel' easyconfig parameter is set via EasyBlock.set_parallel in ready step based on available cores.
130+
# 'parallel' (EB4) or 'max_parallel' (EB5) easyconfig parameter is set via EasyBlock.set_parallel in ready step
131+
# based on available cores.
132+
133+
# Check whether we have EasyBuild 4 or 5
134+
parallel_param = 'parallel'
135+
if EASYBUILD_VERSION >= '5':
136+
parallel_param = 'max_parallel'
130137
# get current parallelism setting
131-
parallel = self.cfg['parallel']
138+
parallel = self.cfg[parallel_param]
132139
if parallel == 1:
133140
return # no need to limit if already using 1 core
134141

@@ -152,7 +159,7 @@ def post_ready_hook(self, *args, **kwargs):
152159

153160
# apply the limit if it's different from current
154161
if new_parallel != parallel:
155-
self.cfg['parallel'] = new_parallel
162+
self.cfg[parallel_param] = new_parallel
156163
msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
157164
print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)
158165

@@ -965,52 +972,56 @@ def post_postproc_cuda(self, *args, **kwargs):
965972
Remove files from CUDA installation that we are not allowed to ship,
966973
and replace them with a symlink to a corresponding installation under host_injections.
967974
"""
975+
if self.name == 'CUDA':
976+
# This hook only acts on an installation under repositories that _we_ ship (*.eessi.io/versions)
977+
eessi_installation = bool(re.search(EESSI_INSTALLATION_REGEX, self.installdir))
978+
979+
if eessi_installation:
980+
print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...")
981+
982+
# read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped
983+
eula_path = os.path.join(self.installdir, 'EULA.txt')
984+
relevant_eula_lines = []
985+
with open(eula_path) as infile:
986+
copy = False
987+
for line in infile:
988+
if line.strip() == "2.6. Attachment A":
989+
copy = True
990+
continue
991+
elif line.strip() == "2.7. Attachment B":
992+
copy = False
993+
continue
994+
elif copy:
995+
relevant_eula_lines.append(line)
996+
997+
# create list without file extensions, they're not really needed and they only complicate things
998+
allowlist = ['EULA', 'README']
999+
file_extensions = ['.so', '.a', '.h', '.bc']
1000+
for line in relevant_eula_lines:
1001+
for word in line.split():
1002+
if any(ext in word for ext in file_extensions):
1003+
allowlist.append(os.path.splitext(word)[0])
1004+
# The EULA of CUDA 12.4 introduced a typo (confirmed by NVIDIA):
1005+
# libnvrtx-builtins_static.so should be libnvrtc-builtins_static.so
1006+
if 'libnvrtx-builtins_static' in allowlist:
1007+
allowlist.remove('libnvrtx-builtins_static')
1008+
allowlist.append('libnvrtc-builtins_static')
1009+
allowlist = sorted(set(allowlist))
1010+
self.log.info(
1011+
"Allowlist for files in CUDA installation that can be redistributed: " + ', '.join(allowlist)
1012+
)
9681013

969-
# We need to check if we are doing an EESSI-distributed installation
970-
eessi_installation = bool(re.search(EESSI_INSTALLATION_REGEX, self.installdir))
971-
972-
if self.name == 'CUDA' and eessi_installation:
973-
print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...")
974-
975-
# read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped
976-
eula_path = os.path.join(self.installdir, 'EULA.txt')
977-
relevant_eula_lines = []
978-
with open(eula_path) as infile:
979-
copy = False
980-
for line in infile:
981-
if line.strip() == "2.6. Attachment A":
982-
copy = True
983-
continue
984-
elif line.strip() == "2.7. Attachment B":
985-
copy = False
986-
continue
987-
elif copy:
988-
relevant_eula_lines.append(line)
989-
990-
# create list without file extensions, they're not really needed and they only complicate things
991-
allowlist = ['EULA', 'README']
992-
file_extensions = ['.so', '.a', '.h', '.bc']
993-
for line in relevant_eula_lines:
994-
for word in line.split():
995-
if any(ext in word for ext in file_extensions):
996-
allowlist.append(os.path.splitext(word)[0])
997-
# The EULA of CUDA 12.4 introduced a typo (confirmed by NVIDIA):
998-
# libnvrtx-builtins_static.so should be libnvrtc-builtins_static.so
999-
if 'libnvrtx-builtins_static' in allowlist:
1000-
allowlist.remove('libnvrtx-builtins_static')
1001-
allowlist.append('libnvrtc-builtins_static')
1002-
allowlist = sorted(set(allowlist))
1003-
self.log.info("Allowlist for files in CUDA installation that can be redistributed: " + ', '.join(allowlist))
1004-
1005-
# Do some quick sanity checks for things we should or shouldn't have in the list
1006-
if 'nvcc' in allowlist:
1007-
raise EasyBuildError("Found 'nvcc' in allowlist: %s" % allowlist)
1008-
if 'libcudart' not in allowlist:
1009-
raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist)
1014+
# Do some quick sanity checks for things we should or shouldn't have in the list
1015+
if 'nvcc' in allowlist:
1016+
raise EasyBuildError("Found 'nvcc' in allowlist: %s" % allowlist)
1017+
if 'libcudart' not in allowlist:
1018+
raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist)
10101019

1011-
# replace files that are not distributable with symlinks into
1012-
# host_injections
1013-
replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist)
1020+
# replace files that are not distributable with symlinks into
1021+
# host_injections
1022+
replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist)
1023+
else:
1024+
print_msg(f"EESSI hook to respect CUDA license not triggered for installation path {self.installdir}")
10141025
else:
10151026
raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!")
10161027

@@ -1269,15 +1280,21 @@ def set_maximum(parallel, max_value):
12691280
'*': (divide_by_factor, 2),
12701281
CPU_TARGET_A64FX: (set_maximum, 12),
12711282
},
1283+
'nodejs': {
1284+
CPU_TARGET_A64FX: (divide_by_factor, 2),
1285+
},
12721286
'MBX': {
12731287
'*': (divide_by_factor, 2),
12741288
},
1289+
'PyTorch': {
1290+
CPU_TARGET_A64FX: (divide_by_factor, 4),
1291+
},
12751292
'TensorFlow': {
12761293
'*': (divide_by_factor, 2),
12771294
CPU_TARGET_A64FX: (set_maximum, 8),
12781295
},
12791296
'Qt5': {
1280-
CPU_TARGET_A64FX: (divide_by_factor, 2),
1297+
CPU_TARGET_A64FX: (set_maximum, 8),
12811298
},
12821299
'ROOT': {
12831300
CPU_TARGET_A64FX: (divide_by_factor, 2),

0 commit comments

Comments
 (0)