Skip to content

Commit 15c3001

Browse files
committed
ocl: profiling must be outside of locked region
1 parent 6092b99 commit 15c3001

File tree

1 file changed

+38
-26
lines changed

1 file changed

+38
-26
lines changed

src/acc/opencl/acc_opencl_mem.c

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
571571
const cl_bool finish = CL_TRUE;
572572
# endif
573573
const c_dbcsr_acc_opencl_stream_t* str;
574+
cl_event event = NULL;
574575
ACC_OPENCL_ACQUIRE(c_dbcsr_acc_opencl_config.lock_memory);
575576
str = (NULL != stream ? ACC_OPENCL_STREAM(stream) : c_dbcsr_acc_opencl_stream(NULL, ACC_OPENCL_OMP_TID()));
576577
assert(NULL != str);
@@ -581,22 +582,24 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
581582
size_t offset = 0;
582583
c_dbcsr_acc_opencl_info_memptr_t* const info = c_dbcsr_acc_opencl_info_devptr_modify(
583584
NULL, dev_mem, 1 /*elsize*/, &nbytes, &offset);
584-
cl_event event = NULL;
585585
if (NULL != info) {
586586
result = clEnqueueWriteBuffer(str->queue, info->memory, finish, offset, nbytes, host_mem, 0, NULL,
587587
NULL == c_dbcsr_acc_opencl_config.hist_h2d ? NULL : &event);
588588
/*if (NULL != event && EXIT_SUCCESS == result) info->data = (void*)libxsmm_timer_tick();*/
589589
}
590590
else result = EXIT_FAILURE;
591-
if (NULL != event && EXIT_SUCCESS == result) {
591+
}
592+
ACC_OPENCL_RELEASE(c_dbcsr_acc_opencl_config.lock_memory);
593+
if (NULL != event) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
594+
if (EXIT_SUCCESS == result) {
592595
assert(NULL != c_dbcsr_acc_opencl_config.hist_h2d);
593-
if (!finish) { /* event released by c_dbcsr_acc_memcpy_notify */
596+
if (!finish) { /* asynchronous */
594597
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, dev_mem);
595598
}
596599
else c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, dev_mem); /* synchronous */
597600
}
601+
else ACC_OPENCL_EXPECT(EXIT_SUCCESS == clReleaseEvent(event));
598602
}
599-
ACC_OPENCL_RELEASE(c_dbcsr_acc_opencl_config.lock_memory);
600603
}
601604
# if defined(ACC_OPENCL_PROFILE_DBCSR)
602605
if (0 != c_dbcsr_acc_opencl_config.profile) c_dbcsr_timestop(&routine_handle);
@@ -643,12 +646,15 @@ int c_dbcsr_acc_memcpy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, v
643646
}
644647
else result = EXIT_FAILURE;
645648
ACC_OPENCL_RELEASE(c_dbcsr_acc_opencl_config.lock_memory);
646-
if (NULL != event && EXIT_SUCCESS == result) {
647-
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2h /*&& NULL == devinfo->clEnqueueMemcpyINTEL*/);
648-
if (!finish) { /* event released by c_dbcsr_acc_memcpy_notify */
649-
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, nconst.ptr);
649+
if (NULL != event) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
650+
if (EXIT_SUCCESS == result) {
651+
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2h /*&& NULL == devinfo->clEnqueueMemcpyINTEL*/);
652+
if (!finish) { /* asynchronous */
653+
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, nconst.ptr);
654+
}
655+
else c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, nconst.ptr); /* synchronous */
650656
}
651-
else c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, nconst.ptr); /* synchronous */
657+
else ACC_OPENCL_EXPECT(EXIT_SUCCESS == clReleaseEvent(event));
652658
}
653659
}
654660
# if defined(ACC_OPENCL_PROFILE_DBCSR)
@@ -670,21 +676,20 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
670676
# endif
671677
assert((NULL != devmem_src && NULL != devmem_dst) || 0 == nbytes);
672678
if (NULL != devmem_src && NULL != devmem_dst && 0 != nbytes) {
679+
# if defined(ACC_OPENCL_ASYNC)
680+
cl_event event = NULL;
681+
cl_event* const pevent = (0 == (4 & c_dbcsr_acc_opencl_config.async) || NULL == stream) ? &event : NULL;
682+
# else
683+
cl_event event = NULL, *const pevent = NULL;
684+
# endif
673685
union {
674686
const void* input;
675687
void* ptr;
676688
} nconst = {devmem_src};
677-
cl_event event = NULL, *pevent = NULL;
678689
const c_dbcsr_acc_opencl_stream_t* str;
679690
ACC_OPENCL_ACQUIRE(c_dbcsr_acc_opencl_config.lock_memory);
680691
str = (NULL != stream ? ACC_OPENCL_STREAM(stream) : c_dbcsr_acc_opencl_stream(NULL, ACC_OPENCL_OMP_TID()));
681692
assert(NULL != str && NULL != c_dbcsr_acc_opencl_config.device.context);
682-
# if defined(ACC_OPENCL_ASYNC)
683-
if (0 == (4 & c_dbcsr_acc_opencl_config.async) || NULL == stream)
684-
# endif
685-
{
686-
pevent = &event;
687-
}
688693
if (NULL != c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL) {
689694
result = c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL(
690695
str->queue, CL_FALSE /*blocking*/, devmem_dst, devmem_src, nbytes, 0, NULL, pevent);
@@ -705,19 +710,26 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
705710
else result = EXIT_FAILURE;
706711
}
707712
ACC_OPENCL_RELEASE(c_dbcsr_acc_opencl_config.lock_memory);
708-
if (NULL != event) {
709-
if (NULL != c_dbcsr_acc_opencl_config.hist_d2d && EXIT_SUCCESS == result) {
710-
assert(NULL == c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL);
711-
result = clRetainEvent(event); /* released by c_dbcsr_acc_memcpy_notify */
712-
if (EXIT_SUCCESS == result) {
713+
if (NULL != event) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
714+
if (EXIT_SUCCESS == result) {
715+
if (NULL == pevent) { /* asynchronous */
716+
assert(NULL == c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL);
717+
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2d);
713718
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, nconst.ptr);
714719
}
720+
else { /* synchronous */
721+
result = clWaitForEvents(1, &event);
722+
if (EXIT_SUCCESS == result) {
723+
if (NULL != c_dbcsr_acc_opencl_config.hist_d2d) {
724+
assert(NULL == c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL);
725+
c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, nconst.ptr);
726+
}
727+
else result = clReleaseEvent(event);
728+
}
729+
else ACC_OPENCL_EXPECT(EXIT_SUCCESS == clReleaseEvent(event));
730+
}
715731
}
716-
if (NULL != pevent && EXIT_SUCCESS == result) result = clWaitForEvents(1, &event);
717-
if (NULL != event) {
718-
const int result_release = clReleaseEvent(event);
719-
if (EXIT_SUCCESS == result) result = result_release;
720-
}
732+
else ACC_OPENCL_EXPECT(EXIT_SUCCESS == clReleaseEvent(event));
721733
}
722734
}
723735
# if defined(ACC_OPENCL_PROFILE_DBCSR)

0 commit comments

Comments
 (0)