diff --git a/CMakeLists.txt b/CMakeLists.txt index 0450e19..3695218 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,7 @@ if(NOT MSVC) endif() find_package(Threads REQUIRED) +find_package(OpenCL REQUIRED) ################################################################################ @@ -128,6 +129,11 @@ add_executable(cuda_memtest cuda_memtest.cpp ) +add_executable(ocl_memtest + ocl_tests.cpp + ocl_memtest.cpp + ) + if(CUDA_MEMTEST_BACKEND STREQUAL "cuda") target_link_libraries(cuda_memtest INTERFACE CUDA::cudart) target_link_libraries(cuda_memtest INTERFACE CUDA::cuda_driver) @@ -152,6 +158,10 @@ endif() if(NOT MSVC) target_link_libraries(cuda_memtest PRIVATE Threads::Threads) + target_link_libraries(ocl_memtest + PRIVATE Threads::Threads + PRIVATE OpenCL::OpenCL + ) endif() ## annotate with RPATH's @@ -175,11 +185,14 @@ endif() option(CUDA_MEMTEST_RELEASE "disable all runtime asserts" ON) if(CUDA_MEMTEST_RELEASE) target_compile_definitions(cuda_memtest PRIVATE NDEBUG) + target_compile_definitions(ocl_memtest PRIVATE NDEBUG) endif(CUDA_MEMTEST_RELEASE) ################################################################################ # Install cuda_memtest ################################################################################ -install(TARGETS cuda_memtest +install(TARGETS + cuda_memtest + ocl_memtest RUNTIME DESTINATION bin) diff --git a/ocl_memtest.cpp b/ocl_memtest.cpp index 69deb5b..22b0e69 100644 --- a/ocl_memtest.cpp +++ b/ocl_memtest.cpp @@ -18,7 +18,7 @@ unsigned int exit_on_error = 0; -#define KERNEL_FILE "ocl_memtest_kernels.cpp" +#define KERNEL_FILE "ocl_memtest_kernels.cl" #define MAX_KERNEL_FILE_SIZE (1024*1024) diff --git a/ocl_memtest_kernels.cpp b/ocl_memtest_kernels.cl similarity index 97% rename from ocl_memtest_kernels.cpp rename to ocl_memtest_kernels.cl index c7ee6f8..0755d10 100644 --- a/ocl_memtest_kernels.cpp +++ b/ocl_memtest_kernels.cl @@ -595,9 +595,9 @@ kernel0_local_write(__global char* ptr, unsigned long memsize) for(i=idx; i < n; i+= total_num_threads){ - __global unsigned long * start_p= (__global unsigned long)(ptr + i*BLOCKSIZE); - __global unsigned long* end_p = (__global unsigned long*)(ptr + (i+1)*BLOCKSIZE); - __global unsigned long * p =start_p; + __global unsigned long * start_p = (__global unsigned long*)(ptr + i*BLOCKSIZE); + __global unsigned long * end_p = (__global unsigned long*)(ptr + (i+1)*BLOCKSIZE); + __global unsigned long * p = start_p; unsigned int pattern = 1; unsigned int mask = 8; @@ -645,9 +645,9 @@ kernel0_local_read(__global char* ptr, unsigned long memsize, for(i=idx; i < n; i+= total_num_threads){ - __global unsigned long * start_p= (__global unsigned long)(ptr + i*BLOCKSIZE); - __global unsigned long* end_p = (__global unsigned long*)(ptr + (i+1)*BLOCKSIZE); - __global unsigned long * p =start_p; + __global unsigned long * start_p = (__global unsigned long*)(ptr + i*BLOCKSIZE); + __global unsigned long * end_p = (__global unsigned long*)(ptr + (i+1)*BLOCKSIZE); + __global unsigned long * p = start_p; unsigned int pattern = 1; unsigned int mask = 8;