Merge pull request #17 from boegel/A64FX_parallel_limit

ocaisa · web-flow · commit 383b39e8096a · 2025-06-20T12:15:52.000+02:00
use standard limit for A64FX: only use quarter of available cores + fix determining level of parallelism in a way compatible with EasyBuild 4.x and 5.x
diff --git a/eb_hooks.py b/eb_hooks.py
@@ -128,21 +128,18 @@ def post_ready_hook(self, *args, **kwargs):
     Post-ready hook: limit parallellism for selected builds based on software name and CPU target.
                      parallelism needs to be limited because some builds require a lot of memory per used core.
     """
-    # 'parallel' (EB4) or 'max_parallel' (EB5) easyconfig parameter is set via EasyBlock.set_parallel in ready step
-    # based on available cores.
-
-    # Check whether we have EasyBuild 4 or 5
-    parallel_param = 'parallel'
-    if EASYBUILD_VERSION >= '5':
-        parallel_param = 'max_parallel'
-    # get current parallelism setting
-    parallel = self.cfg[parallel_param]
+    # 'parallel' easyconfig parameter (EB4) or the parallel property (EB5) is set via EasyBlock.set_parallel
+    # in ready step based on available cores
+    parallel = getattr(self, 'parallel', self.cfg['parallel'])
+    
     if parallel == 1:
         return  # no need to limit if already using 1 core
 
     # get CPU target
     cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
 
+    new_parallel = parallel
+
     # check if we have limits defined for this software
     if self.name in PARALLELISM_LIMITS:
         limits = PARALLELISM_LIMITS[self.name]
@@ -158,11 +155,19 @@ def post_ready_hook(self, *args, **kwargs):
         else:
             return  # no applicable limits found
 
-        # apply the limit if it's different from current
-        if new_parallel != parallel:
-            self.cfg[parallel_param] = new_parallel
-            msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
-            print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)
+    # check if there's a general limit set for CPU target
+    elif cpu_target in PARALLELISM_LIMITS:
+        operation_func, operation_args = PARALLELISM_LIMITS[cpu_target]
+        new_parallel = operation_func(parallel, operation_args)
+
+    # apply the limit if it's different from current
+    if new_parallel != parallel:
+        if EASYBUILD_VERSION >= '5':
+            self.cfg.parallel = new_parallel
+        else:
+            self.cfg['parallel'] = new_parallel
+        msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
+        print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)
 
 
 def pre_prepare_hook(self, *args, **kwargs):
@@ -1376,27 +1381,22 @@ def set_maximum(parallel, max_value):
 # specific CPU target is defined in the data structure below. If not, it checks for
 # the generic '*' entry.
 PARALLELISM_LIMITS = {
+    # by default, only use quarter of cores when building for A64FX;
+    # this is done because total memory is typically limited on A64FX due to HBM,
+    # Deucalion has 32GB HBM for 48 cores per node
+    CPU_TARGET_A64FX: (divide_by_factor, 4),
+    # software-specific limits
     'libxc': {
         '*': (divide_by_factor, 2),
-        CPU_TARGET_A64FX: (set_maximum, 12),
-    },
-    'nodejs': {
-        CPU_TARGET_A64FX: (divide_by_factor, 2),
     },
     'MBX': {
         '*': (divide_by_factor, 2),
     },
-    'PyTorch': {
-        CPU_TARGET_A64FX: (divide_by_factor, 4),
-    },
     'TensorFlow': {
         '*': (divide_by_factor, 2),
         CPU_TARGET_A64FX: (set_maximum, 8),
     },
     'Qt5': {
         CPU_TARGET_A64FX: (set_maximum, 8),
     },
-    'ROOT': {
-        CPU_TARGET_A64FX: (divide_by_factor, 2),
-    },
 }