Skip to content

Commit 5106ad9

Browse files
committed
Introduced ACC_OPENCL_CMEM
- Moved CMEM support from SMM to backend. - Renamed GLOBAL to CONSTANT (kernel).
1 parent 16cbca9 commit 5106ad9

File tree

6 files changed

+37
-30
lines changed

6 files changed

+37
-30
lines changed

src/acc/opencl/acc_opencl.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ int c_dbcsr_acc_opencl_order_devices(const void* dev_a, const void* dev_b) {
150150
}
151151

152152

153+
/** Setup to run prior to touching OpenCL runtime. */
154+
void c_dbcsr_acc_opencl_configure(void);
153155
void c_dbcsr_acc_opencl_configure(void) {
154156
const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK"));
155157
const char* const env_nranks = getenv("MPI_LOCALNRANKS"); /* TODO */
@@ -762,6 +764,15 @@ int c_dbcsr_acc_finalize(void) {
762764
}
763765

764766

767+
int c_dbcsr_acc_opencl_use_cmem(const c_dbcsr_acc_opencl_device_t* devinfo) {
768+
# if defined(ACC_OPENCL_CMEM)
769+
return (0 != devinfo->size_maxalloc && devinfo->size_maxalloc <= devinfo->size_maxcmem) ? EXIT_SUCCESS : EXIT_FAILURE;
770+
# else
771+
return EXIT_FAILURE;
772+
# endif
773+
}
774+
775+
765776
void c_dbcsr_acc_clear_errors(void) {}
766777

767778

@@ -1111,6 +1122,16 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i
11111122
{
11121123
devinfo->unified = CL_FALSE;
11131124
}
1125+
if (EXIT_SUCCESS !=
1126+
clGetDeviceInfo(active_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &devinfo->size_maxalloc, NULL))
1127+
{
1128+
devinfo->size_maxalloc = 0;
1129+
}
1130+
if (EXIT_SUCCESS !=
1131+
clGetDeviceInfo(active_id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &devinfo->size_maxcmem, NULL))
1132+
{
1133+
devinfo->size_maxcmem = 0;
1134+
}
11141135
if (EXIT_SUCCESS != clGetDeviceInfo(active_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), devinfo->wgsize, NULL)) {
11151136
devinfo->wgsize[0] = 1;
11161137
}

src/acc/opencl/acc_opencl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@
9393
#if !defined(ACC_OPENCL_DELIMS)
9494
# define ACC_OPENCL_DELIMS ",;"
9595
#endif
96+
#if !defined(ACC_OPENCL_CMEM) && 1
97+
# define ACC_OPENCL_CMEM
98+
#endif
9699
#if !defined(ACC_OPENCL_ASYNC) && 1
97100
# define ACC_OPENCL_ASYNC getenv("ACC_OPENCL_ASYNC")
98101
#endif
@@ -262,6 +265,8 @@ typedef struct c_dbcsr_acc_opencl_device_t {
262265
* smaller if an alternative SG-size exists (SG is zero if no support).
263266
*/
264267
size_t wgsize[3];
268+
/** Maximum size of memory allocations and constant buffer. */
269+
cl_ulong size_maxalloc, size_maxcmem;
265270
/** Kind of device (GPU, CPU, or other). */
266271
cl_device_type type;
267272
/** Whether host memory is unified. */
@@ -349,6 +354,8 @@ typedef struct c_dbcsr_acc_opencl_config_t {
349354
/** Global configuration setup in c_dbcsr_acc_init. */
350355
extern c_dbcsr_acc_opencl_config_t c_dbcsr_acc_opencl_config;
351356

357+
/** If buffers are hinted for non-concurrent writes aka "OpenCL constant". */
358+
int c_dbcsr_acc_opencl_use_cmem(const c_dbcsr_acc_opencl_device_t* devinfo);
352359
/** Determines host-pointer registration for modification. */
353360
c_dbcsr_acc_opencl_info_memptr_t* c_dbcsr_acc_opencl_info_hostptr(const void* memory);
354361
/** Determines device-pointer registration for modification (internal); offset is measured in elsize. */

src/acc/opencl/smm/kernels/multiply.cl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ __attribute__((reqd_work_group_size(WG, 1, 1)))
7070
__attribute__((intel_reqd_sub_group_size(SG)))
7171
#endif
7272
kernel void
73-
FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* restrict bdata, GLOBAL const int* restrict param_stack,
73+
FN(global T* restrict cdata, CONSTANT const T* restrict adata, CONSTANT const T* restrict bdata,
74+
CONSTANT const int* restrict param_stack,
7475
#if (1 < BS)
7576
int param_format, int stack_size, int bs) {
7677
const int gid = get_group_id(0), idx = get_local_id(0);
@@ -80,11 +81,11 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
8081
#endif
8182
const SINT pzero = (0 == param_format ? 1 : 0), pnext = (0 == param_format ? 3 : 6);
8283
/* param_stack/indexes can be one-based (Fortran) depending on param_format */
83-
GLOBAL const int* restrict param_base = param_stack + gid * (pnext * bs) + (0 == param_format ? 0 : 3);
84+
CONSTANT const int* restrict param_base = param_stack + gid * (pnext * bs) + (0 == param_format ? 0 : 3);
8485
#if defined(SLM_P) && (1 < BS)
8586
local int params[3 * BS]; /* bs <= BS */
8687
#else
87-
GLOBAL const int* restrict params = param_base;
88+
CONSTANT const int* restrict params = param_base;
8889
#endif
8990
#if defined(SLM_A)
9091
# if (1 != BK || BM < SM || 1 != BN)

src/acc/opencl/smm/kernels/transpose.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
/*------------------------------------------------------------------------------------------------*/
99

1010
__attribute__((reqd_work_group_size(WG, 1, 1))) kernel void FN(
11-
int trs_offset, GLOBAL const int* restrict trs_stack, global T* restrict matrix) {
11+
int trs_offset, CONSTANT const int* restrict trs_stack, global T* restrict matrix) {
1212
/* offset in the transpose-stack that this block ID should handle */
1313
const int offset = trs_stack[trs_offset + get_group_id(0)];
1414
/* matrix according to the index (transpose-stack) */

src/acc/opencl/smm/opencl_libsmm.c

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,6 @@
3737
# if !defined(OPENCL_LIBSMM_NLOCKS_SMM)
3838
# define OPENCL_LIBSMM_NLOCKS_SMM 16
3939
# endif
40-
# if !defined(OPENCL_LIBSMM_CMEM) && 1
41-
# define OPENCL_LIBSMM_CMEM
42-
# endif
4340
# if !defined(OPENCL_LIBSMM_TODO) && 0
4441
# define OPENCL_LIBSMM_TODO
4542
# endif
@@ -90,22 +87,6 @@ opencl_libsmm_acc_dbm_launch_fn_t opencl_libsmm_acc_dbm_launch_fn;
9087
int opencl_libsmm_initialized;
9188

9289

93-
int opencl_libsmm_use_cmem(cl_device_id device) {
94-
# if defined(OPENCL_LIBSMM_CMEM)
95-
int result = EXIT_SUCCESS;
96-
cl_ulong size_maxalloc = 1, size_maxcmem = 0;
97-
ACC_OPENCL_CHECK(result, clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &size_maxalloc, NULL),
98-
"retrieve maximum size of memory allocation");
99-
ACC_OPENCL_CHECK(result, clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &size_maxcmem, NULL),
100-
"retrieve maximum size of constant buffer");
101-
return (EXIT_SUCCESS == result ? (size_maxalloc <= size_maxcmem ? EXIT_SUCCESS : EXIT_FAILURE) : result);
102-
# else
103-
LIBXSMM_UNUSED(device);
104-
return EXIT_FAILURE;
105-
# endif
106-
}
107-
108-
10990
int opencl_libsmm_write_trans_params(FILE* stream, int only_key, const opencl_libsmm_transkey_t* key,
11091
const opencl_libsmm_trans_t* config, const char* delim, const char* begin, const char* close) {
11192
int result = 0;
@@ -699,8 +680,8 @@ int libsmm_acc_transpose(const int* dev_trs_stack, int offset, int stack_size, v
699680
const cl_device_id device_id = c_dbcsr_acc_opencl_config.devices[c_dbcsr_acc_opencl_config.device_id];
700681
const c_dbcsr_acc_opencl_device_t* const devinfo = &c_dbcsr_acc_opencl_config.device;
701682
const char *const env_cl = OPENCL_LIBSMM_TRANSENV("BUILDOPTS"), *const env_bm = OPENCL_LIBSMM_TRANSENV("BM");
702-
const char* const cmem = (EXIT_SUCCESS != opencl_libsmm_use_cmem(device_id) ? "global" : "constant");
703-
const char* const build_format = "-DGLOBAL=%s -DINPLACE=%i -DFN=%s -DSM=%i -DSN=%i -DWG=%i -DT=%s";
683+
const char* const cmem = (EXIT_SUCCESS != c_dbcsr_acc_opencl_use_cmem(devinfo) ? "global" : "constant");
684+
const char* const build_format = "-DCONSTANT=%s -DINPLACE=%i -DFN=%s -DSM=%i -DSN=%i -DWG=%i -DT=%s";
704685
const char *const env_inplace = OPENCL_LIBSMM_TRANSENV("INPLACE"), *tname = "";
705686
# if defined(OPENCL_LIBSMM_TRANS_INPLACE)
706687
const int inplace = ((m == n) && (NULL == env_inplace ? 1 : ('0' != *env_inplace)));
@@ -1079,15 +1060,15 @@ int opencl_libsmm_acc_process(const int* host_param_stack, const int* dev_param_
10791060
new_config.wgsize[kernel_idx] = (2 > new_config.wg ? (nbm * nbn) : ((int)LIBXSMM_UP2POT(nbm * nbn)));
10801061
}
10811062
if (new_config.wgsize[kernel_idx] <= devinfo->wgsize[0]) { /* SMM can be handled by device */
1082-
const char* const cmem = (EXIT_SUCCESS != opencl_libsmm_use_cmem(device_id) ? "global" : "constant");
1063+
const char* const cmem = (EXIT_SUCCESS != c_dbcsr_acc_opencl_use_cmem(devinfo) ? "global" : "constant");
10831064
const char* const env_nrepeat = getenv("NREPEAT_SMM");
10841065
const int typesize = OPENCL_LIBSMM_TYPESIZE(datatype);
10851066
const int slm_a = (1 != new_config.aa ? 0 : (LIBXSMM_ISPOT(k_max * typesize) + 1));
10861067
const int slm_b = (1 != new_config.ab ? 0 : (LIBXSMM_ISPOT(k_max * typesize) + 1));
10871068
const int slm_c = (1 != new_config.ac ? 0 : (LIBXSMM_ISPOT(m_max * typesize) + 1));
10881069
/* compose build parameters and flags */
10891070
nchar = LIBXSMM_SNPRINTF(build_params, sizeof(build_params),
1090-
"-DT=%s -DGPU=%u -DGLOBAL=%s -DWG=%i -DSG=%i -DFN=%s -DREPEAT=%i -DLU=%i "
1071+
"-DT=%s -DGPU=%u -DCONSTANT=%s -DWG=%i -DSG=%i -DFN=%s -DREPEAT=%i -DLU=%i "
10911072
"-DSM=%i -DSN=%i -DSK=%i -DBS=%i -DVL=%i %s -DBM=%i -DBN=%i -DBK=%i "
10921073
"%s %s %s %s %s %s %s %s ", /* space! */
10931074
tname, CL_DEVICE_TYPE_GPU == devinfo->type, cmem, (int)new_config.wgsize[kernel_idx], (int)sgs, fname,

src/acc/opencl/smm/opencl_libsmm.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,6 @@ typedef struct opencl_libsmm_perfest_t {
7474
/** Returns environment variable's value for given domain and key. */
7575
const char* opencl_libsmm_getenv(const char domain[], const char key[]);
7676

77-
/** If buffers are hinted for non-concurrent writes aka "OpenCL constant". */
78-
int opencl_libsmm_use_cmem(cl_device_id device);
79-
8077
/**
8178
* TRANS-kernel: write key and tunables into a (file-)stream.
8279
* If config=NULL, key/parameter names are written. The arguments

0 commit comments

Comments
 (0)