13
13
from easybuild .tools .run import run_cmd
14
14
from easybuild .tools .systemtools import AARCH64 , POWER , X86_64 , get_cpu_architecture , get_cpu_features
15
15
from easybuild .tools .toolchain .compiler import OPTARCH_GENERIC
16
+ from easybuild .tools .version import VERSION as EASYBUILD_VERSION
16
17
17
18
# prefer importing LooseVersion from easybuild.tools, but fall back to distuils in case EasyBuild <= 4.7.0 is used
18
19
try :
@@ -126,9 +127,15 @@ def post_ready_hook(self, *args, **kwargs):
126
127
Post-ready hook: limit parallellism for selected builds based on software name and CPU target.
127
128
parallelism needs to be limited because some builds require a lot of memory per used core.
128
129
"""
129
- # 'parallel' easyconfig parameter is set via EasyBlock.set_parallel in ready step based on available cores.
130
+ # 'parallel' (EB4) or 'max_parallel' (EB5) easyconfig parameter is set via EasyBlock.set_parallel in ready step
131
+ # based on available cores.
132
+
133
+ # Check whether we have EasyBuild 4 or 5
134
+ parallel_param = 'parallel'
135
+ if EASYBUILD_VERSION >= '5' :
136
+ parallel_param = 'max_parallel'
130
137
# get current parallelism setting
131
- parallel = self .cfg ['parallel' ]
138
+ parallel = self .cfg [parallel_param ]
132
139
if parallel == 1 :
133
140
return # no need to limit if already using 1 core
134
141
@@ -152,7 +159,7 @@ def post_ready_hook(self, *args, **kwargs):
152
159
153
160
# apply the limit if it's different from current
154
161
if new_parallel != parallel :
155
- self .cfg ['parallel' ] = new_parallel
162
+ self .cfg [parallel_param ] = new_parallel
156
163
msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
157
164
print_msg (msg % (new_parallel , parallel , self .name , cpu_target ), log = self .log )
158
165
@@ -965,52 +972,56 @@ def post_postproc_cuda(self, *args, **kwargs):
965
972
Remove files from CUDA installation that we are not allowed to ship,
966
973
and replace them with a symlink to a corresponding installation under host_injections.
967
974
"""
975
+ if self .name == 'CUDA' :
976
+ # This hook only acts on an installation under repositories that _we_ ship (*.eessi.io/versions)
977
+ eessi_installation = bool (re .search (EESSI_INSTALLATION_REGEX , self .installdir ))
978
+
979
+ if eessi_installation :
980
+ print_msg ("Replacing files in CUDA installation that we can not ship with symlinks to host_injections..." )
981
+
982
+ # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped
983
+ eula_path = os .path .join (self .installdir , 'EULA.txt' )
984
+ relevant_eula_lines = []
985
+ with open (eula_path ) as infile :
986
+ copy = False
987
+ for line in infile :
988
+ if line .strip () == "2.6. Attachment A" :
989
+ copy = True
990
+ continue
991
+ elif line .strip () == "2.7. Attachment B" :
992
+ copy = False
993
+ continue
994
+ elif copy :
995
+ relevant_eula_lines .append (line )
996
+
997
+ # create list without file extensions, they're not really needed and they only complicate things
998
+ allowlist = ['EULA' , 'README' ]
999
+ file_extensions = ['.so' , '.a' , '.h' , '.bc' ]
1000
+ for line in relevant_eula_lines :
1001
+ for word in line .split ():
1002
+ if any (ext in word for ext in file_extensions ):
1003
+ allowlist .append (os .path .splitext (word )[0 ])
1004
+ # The EULA of CUDA 12.4 introduced a typo (confirmed by NVIDIA):
1005
+ # libnvrtx-builtins_static.so should be libnvrtc-builtins_static.so
1006
+ if 'libnvrtx-builtins_static' in allowlist :
1007
+ allowlist .remove ('libnvrtx-builtins_static' )
1008
+ allowlist .append ('libnvrtc-builtins_static' )
1009
+ allowlist = sorted (set (allowlist ))
1010
+ self .log .info (
1011
+ "Allowlist for files in CUDA installation that can be redistributed: " + ', ' .join (allowlist )
1012
+ )
968
1013
969
- # We need to check if we are doing an EESSI-distributed installation
970
- eessi_installation = bool (re .search (EESSI_INSTALLATION_REGEX , self .installdir ))
971
-
972
- if self .name == 'CUDA' and eessi_installation :
973
- print_msg ("Replacing files in CUDA installation that we can not ship with symlinks to host_injections..." )
974
-
975
- # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped
976
- eula_path = os .path .join (self .installdir , 'EULA.txt' )
977
- relevant_eula_lines = []
978
- with open (eula_path ) as infile :
979
- copy = False
980
- for line in infile :
981
- if line .strip () == "2.6. Attachment A" :
982
- copy = True
983
- continue
984
- elif line .strip () == "2.7. Attachment B" :
985
- copy = False
986
- continue
987
- elif copy :
988
- relevant_eula_lines .append (line )
989
-
990
- # create list without file extensions, they're not really needed and they only complicate things
991
- allowlist = ['EULA' , 'README' ]
992
- file_extensions = ['.so' , '.a' , '.h' , '.bc' ]
993
- for line in relevant_eula_lines :
994
- for word in line .split ():
995
- if any (ext in word for ext in file_extensions ):
996
- allowlist .append (os .path .splitext (word )[0 ])
997
- # The EULA of CUDA 12.4 introduced a typo (confirmed by NVIDIA):
998
- # libnvrtx-builtins_static.so should be libnvrtc-builtins_static.so
999
- if 'libnvrtx-builtins_static' in allowlist :
1000
- allowlist .remove ('libnvrtx-builtins_static' )
1001
- allowlist .append ('libnvrtc-builtins_static' )
1002
- allowlist = sorted (set (allowlist ))
1003
- self .log .info ("Allowlist for files in CUDA installation that can be redistributed: " + ', ' .join (allowlist ))
1004
-
1005
- # Do some quick sanity checks for things we should or shouldn't have in the list
1006
- if 'nvcc' in allowlist :
1007
- raise EasyBuildError ("Found 'nvcc' in allowlist: %s" % allowlist )
1008
- if 'libcudart' not in allowlist :
1009
- raise EasyBuildError ("Did not find 'libcudart' in allowlist: %s" % allowlist )
1014
+ # Do some quick sanity checks for things we should or shouldn't have in the list
1015
+ if 'nvcc' in allowlist :
1016
+ raise EasyBuildError ("Found 'nvcc' in allowlist: %s" % allowlist )
1017
+ if 'libcudart' not in allowlist :
1018
+ raise EasyBuildError ("Did not find 'libcudart' in allowlist: %s" % allowlist )
1010
1019
1011
- # replace files that are not distributable with symlinks into
1012
- # host_injections
1013
- replace_non_distributable_files_with_symlinks (self .log , self .installdir , self .name , allowlist )
1020
+ # replace files that are not distributable with symlinks into
1021
+ # host_injections
1022
+ replace_non_distributable_files_with_symlinks (self .log , self .installdir , self .name , allowlist )
1023
+ else :
1024
+ print_msg (f"EESSI hook to respect CUDA license not triggered for installation path { self .installdir } " )
1014
1025
else :
1015
1026
raise EasyBuildError ("CUDA-specific hook triggered for non-CUDA easyconfig?!" )
1016
1027
@@ -1269,15 +1280,21 @@ def set_maximum(parallel, max_value):
1269
1280
'*' : (divide_by_factor , 2 ),
1270
1281
CPU_TARGET_A64FX : (set_maximum , 12 ),
1271
1282
},
1283
+ 'nodejs' : {
1284
+ CPU_TARGET_A64FX : (divide_by_factor , 2 ),
1285
+ },
1272
1286
'MBX' : {
1273
1287
'*' : (divide_by_factor , 2 ),
1274
1288
},
1289
+ 'PyTorch' : {
1290
+ CPU_TARGET_A64FX : (divide_by_factor , 4 ),
1291
+ },
1275
1292
'TensorFlow' : {
1276
1293
'*' : (divide_by_factor , 2 ),
1277
1294
CPU_TARGET_A64FX : (set_maximum , 8 ),
1278
1295
},
1279
1296
'Qt5' : {
1280
- CPU_TARGET_A64FX : (divide_by_factor , 2 ),
1297
+ CPU_TARGET_A64FX : (set_maximum , 8 ),
1281
1298
},
1282
1299
'ROOT' : {
1283
1300
CPU_TARGET_A64FX : (divide_by_factor , 2 ),
0 commit comments