Skip to content

Commit dab5e43

Browse files
committed
fix: handeling hip local memory error in core and compiler
1 parent 4c0a877 commit dab5e43

File tree

2 files changed

+26
-6
lines changed

2 files changed

+26
-6
lines changed

kernel_tuner/backends/compiler.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,23 @@ def compile(self, kernel_instance):
265265
if platform.system() == "Darwin":
266266
lib_extension = ".dylib"
267267

268-
subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"])
269-
subprocess.check_call(
268+
subprocess.run(
269+
[self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"],
270+
stdout=subprocess.PIPE,
271+
stderr=subprocess.PIPE,
272+
text=True,
273+
check=True
274+
)
275+
276+
subprocess.run(
270277
[self.compiler, filename + ".o"]
271278
+ compiler_options
272279
+ ["-shared", "-o", filename + lib_extension]
273-
+ lib_args
280+
+ lib_args,
281+
stdout=subprocess.PIPE,
282+
stderr=subprocess.PIPE,
283+
text=True,
284+
check=True
274285
)
275286

276287
self.lib = np.ctypeslib.load_library(filename, ".")
@@ -396,10 +407,16 @@ def memcpy_htod(self, dest, src):
396407

397408
def cleanup_lib(self):
398409
"""unload the previously loaded shared library"""
410+
if self.lib is None:
411+
return
412+
399413
if not self.using_openmp and not self.using_openacc:
400414
# this if statement is necessary because shared libraries that use
401415
# OpenMP will core dump when unloaded, this is a well-known issue with OpenMP
402416
logging.debug("unloading shared library")
403-
_ctypes.dlclose(self.lib._handle)
417+
try:
418+
_ctypes.dlclose(self.lib._handle)
419+
finally:
420+
self.lib = None
404421

405422
units = {}

kernel_tuner/core.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -647,8 +647,11 @@ def compile_kernel(self, instance, verbose):
647647
shared_mem_error_messages = [
648648
"uses too much shared data",
649649
"local memory limit exceeded",
650+
r"local memory \(\d+\) exceeds limit \(\d+\)",
650651
]
651-
if any(msg in str(e) for msg in shared_mem_error_messages):
652+
error_message = str(e.stderr) if hasattr(e, "stderr") else str(e)
653+
if any(re.search(msg, error_message) for msg in shared_mem_error_messages):
654+
print("DEBUG: SHARED MEM ERROR")
652655
logging.debug(
653656
"compile_kernel failed due to kernel using too much shared memory"
654657
)
@@ -715,7 +718,7 @@ def create_kernel_instance(self, kernel_source, kernel_options, params, verbose)
715718
)
716719

717720
# check for templated kernel
718-
if kernel_source.lang in ["CUDA", "NVCUDA"] and "<" in name and ">" in name:
721+
if kernel_source.lang in ["CUDA", "NVCUDA", "HIP"] and "<" in name and ">" in name:
719722
kernel_string, name = wrap_templated_kernel(kernel_string, name)
720723

721724
# Preprocess GPU arguments. Require for handling `Tunable` arguments

0 commit comments

Comments
 (0)