-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Closed
Labels
Description
Describe the bug
In the following example, including the #include "cutlass/gemm/device/gemm_universal_adapter.h" line is causing CuPy to be unable to find the qwert_entry0 function. Could including that header be affecting the function names in the compiled program?
Steps/Code to reproduce bug
kernel = r"""
#include <new>
#include <assert.h>
#include <stdio.h>
#include <cutlass/cutlass.h>
#include <cutlass/numeric_types.h>
#include "cutlass/gemm/device/gemm_universal_adapter.h"
using namespace cute;
#include <cooperative_groups.h>
#include <cuda/semaphore>
__device__ cuda::binary_semaphore<cuda::thread_scope_system> console_lock(1);
extern "C" __global__ void qwert_entry0() {
int v0;
v0 = threadIdx.x;
int v1;
v1 = blockIdx.x;
int v2;
v2 = v1 * 256l;
int v3;
v3 = v0 + v2;
bool v4;
v4 = v3 == 0l;
if (v4){
cuda::counting_semaphore<cuda::thread_scope_system, 1l> & v5 = console_lock;
auto v6 = cooperative_groups::coalesced_threads();
v5.acquire();
printf("%s\n","hello");
v5.release();
v6.sync() ;
return ;
} else {
return ;
}
}
"""
import cupy as cp
from dataclasses import dataclass
from typing import NamedTuple, Union, Callable, Tuple
i8 = int; i16 = int; i32 = int; i64 = int; u8 = int; u16 = int; u32 = int; u64 = int; f32 = float; f64 = float; char = str; string = str
options = []
options.append('--define-macro=NDEBUG')
options.append('--dopt=on')
options.append('--diag-suppress=550,20012,68,39,177')
options.append('--restrict')
options.append('--maxrregcount=256')
options.append('-I"G:/cutlass-3.5.1/include"')
options.append('-I"G:/cutlass-3.5.1/tools/util/include"')
options.append('--std=c++20')
options.append('-D__CUDA_NO_HALF_CONVERSIONS__')
raw_module = cp.RawModule(code=kernel, backend='nvcc', enable_cooperative_groups=True, options=tuple(options))
def main_body():
v0 = cp.cuda.Device().attributes['MultiProcessorCount']
v1 = v0 == 24
del v0
v2 = v1 == False
if v2:
v3 = "The number of SMs per GPU at runtime must much that what is declared atop of corecuda.base. Make sure to use the correct constant so it can be propagated at compile time."
assert v1, v3
del v3
else:
pass
del v1, v2
v5 = raw_module.get_function(f"qwert_entry0")
v5.max_dynamic_shared_size_bytes = 81920
v5((24,),(256,),(),shared_mem=81920)
del v5
return
def main():
r = main_body()
cp.cuda.get_current_stream().synchronize() # This line is here so the `__trap()` calls on the kernel aren't missed.
return r
if __name__ == '__main__': print(main())Expected behavior
Thread 0 should print hello.
Environment details (please complete the following information):
- Environment location: Local, Win 11
Additional context
Here is what happens when I run the script.
PS C:\Spiral_s_ML_Library> c:; cd 'c:\Spiral_s_ML_Library'; & 'c:\Users\mrakg\AppData\Local\pypoetry\Cache\virtualenvs\ui-EoO7T__V-py3.11\Scripts\python.exe' 'c:\Users\mrakg\.vscode\extensions\ms-python.debugpy-2024.10.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher' '60529' '--' 'c:\Spiral_s_ML_Library\tests\cutlass\test2.py'
Traceback (most recent call last):
File "c:\Spiral_s_ML_Library\tests\cutlass\test2.py", line 78, in <module>
if __name__ == '__main__': print(main())
^^^^^^
File "c:\Spiral_s_ML_Library\tests\cutlass\test2.py", line 74, in main
r = main_body()
File "c:\Spiral_s_ML_Library\tests\cutlass\test2.py", line 67, in main_body
v5 = raw_module.get_function(f"qwert_entry0")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "cupy\_core\raw.pyx", line 487, in cupy._core.raw.RawModule.get_function
File "cupy\_core\raw.pyx", line 100, in cupy._core.raw.RawKernel.kernel.__get__
File "cupy\_core\raw.pyx", line 121, in cupy._core.raw.RawKernel._kernel
File "cupy\cuda\function.pyx", line 275, in cupy.cuda.function.Module.get_function
File "cupy\cuda\function.pyx", line 216, in cupy.cuda.function.Function.__init__
File "cupy_backends\cuda\api\driver.pyx", line 244, in cupy_backends.cuda.api.driver.moduleGetFunction
File "cupy_backends\cuda\api\driver.pyx", line 63, in cupy_backends.cuda.api.driver.check_status
cupy_backends.cuda.api.driver.CUDADriverError: CUDA_ERROR_NOT_FOUND: named symbol not found
In order to actually run the script, you'll have to install CuPy.