@@ -571,6 +571,7 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
571571 const cl_bool finish = CL_TRUE ;
572572# endif
573573 const c_dbcsr_acc_opencl_stream_t * str ;
574+ cl_event event = NULL ;
574575 ACC_OPENCL_ACQUIRE (c_dbcsr_acc_opencl_config .lock_memory );
575576 str = (NULL != stream ? ACC_OPENCL_STREAM (stream ) : c_dbcsr_acc_opencl_stream (NULL , ACC_OPENCL_OMP_TID ()));
576577 assert (NULL != str );
@@ -581,22 +582,24 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
581582 size_t offset = 0 ;
582583 c_dbcsr_acc_opencl_info_memptr_t * const info = c_dbcsr_acc_opencl_info_devptr_modify (
583584 NULL , dev_mem , 1 /*elsize*/ , & nbytes , & offset );
584- cl_event event = NULL ;
585585 if (NULL != info ) {
586586 result = clEnqueueWriteBuffer (str -> queue , info -> memory , finish , offset , nbytes , host_mem , 0 , NULL ,
587587 NULL == c_dbcsr_acc_opencl_config .hist_h2d ? NULL : & event );
588588 /*if (NULL != event && EXIT_SUCCESS == result) info->data = (void*)libxsmm_timer_tick();*/
589589 }
590590 else result = EXIT_FAILURE ;
591- if (NULL != event && EXIT_SUCCESS == result ) {
591+ }
592+ ACC_OPENCL_RELEASE (c_dbcsr_acc_opencl_config .lock_memory );
593+ if (NULL != event ) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
594+ if (EXIT_SUCCESS == result ) {
592595 assert (NULL != c_dbcsr_acc_opencl_config .hist_h2d );
593- if (!finish ) { /* event released by c_dbcsr_acc_memcpy_notify */
596+ if (!finish ) { /* asynchronous */
594597 result = clSetEventCallback (event , CL_COMPLETE , c_dbcsr_acc_memcpy_notify , dev_mem );
595598 }
596599 else c_dbcsr_acc_memcpy_notify (event , CL_COMPLETE , dev_mem ); /* synchronous */
597600 }
601+ else ACC_OPENCL_EXPECT (EXIT_SUCCESS == clReleaseEvent (event ));
598602 }
599- ACC_OPENCL_RELEASE (c_dbcsr_acc_opencl_config .lock_memory );
600603 }
601604# if defined(ACC_OPENCL_PROFILE_DBCSR )
602605 if (0 != c_dbcsr_acc_opencl_config .profile ) c_dbcsr_timestop (& routine_handle );
@@ -643,12 +646,15 @@ int c_dbcsr_acc_memcpy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, v
643646 }
644647 else result = EXIT_FAILURE ;
645648 ACC_OPENCL_RELEASE (c_dbcsr_acc_opencl_config .lock_memory );
646- if (NULL != event && EXIT_SUCCESS == result ) {
647- assert (NULL != c_dbcsr_acc_opencl_config .hist_d2h /*&& NULL == devinfo->clEnqueueMemcpyINTEL*/ );
648- if (!finish ) { /* event released by c_dbcsr_acc_memcpy_notify */
649- result = clSetEventCallback (event , CL_COMPLETE , c_dbcsr_acc_memcpy_notify , nconst .ptr );
649+ if (NULL != event ) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
650+ if (EXIT_SUCCESS == result ) {
651+ assert (NULL != c_dbcsr_acc_opencl_config .hist_d2h /*&& NULL == devinfo->clEnqueueMemcpyINTEL*/ );
652+ if (!finish ) { /* asynchronous */
653+ result = clSetEventCallback (event , CL_COMPLETE , c_dbcsr_acc_memcpy_notify , nconst .ptr );
654+ }
655+ else c_dbcsr_acc_memcpy_notify (event , CL_COMPLETE , nconst .ptr ); /* synchronous */
650656 }
651- else c_dbcsr_acc_memcpy_notify ( event , CL_COMPLETE , nconst . ptr ); /* synchronous */
657+ else ACC_OPENCL_EXPECT ( EXIT_SUCCESS == clReleaseEvent ( event ));
652658 }
653659 }
654660# if defined(ACC_OPENCL_PROFILE_DBCSR )
@@ -670,21 +676,20 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
670676# endif
671677 assert ((NULL != devmem_src && NULL != devmem_dst ) || 0 == nbytes );
672678 if (NULL != devmem_src && NULL != devmem_dst && 0 != nbytes ) {
679+ # if defined(ACC_OPENCL_ASYNC )
680+ cl_event event = NULL ;
681+ cl_event * const pevent = (0 == (4 & c_dbcsr_acc_opencl_config .async ) || NULL == stream ) ? & event : NULL ;
682+ # else
683+ cl_event event = NULL , * const pevent = NULL ;
684+ # endif
673685 union {
674686 const void * input ;
675687 void * ptr ;
676688 } nconst = {devmem_src };
677- cl_event event = NULL , * pevent = NULL ;
678689 const c_dbcsr_acc_opencl_stream_t * str ;
679690 ACC_OPENCL_ACQUIRE (c_dbcsr_acc_opencl_config .lock_memory );
680691 str = (NULL != stream ? ACC_OPENCL_STREAM (stream ) : c_dbcsr_acc_opencl_stream (NULL , ACC_OPENCL_OMP_TID ()));
681692 assert (NULL != str && NULL != c_dbcsr_acc_opencl_config .device .context );
682- # if defined(ACC_OPENCL_ASYNC )
683- if (0 == (4 & c_dbcsr_acc_opencl_config .async ) || NULL == stream )
684- # endif
685- {
686- pevent = & event ;
687- }
688693 if (NULL != c_dbcsr_acc_opencl_config .device .clEnqueueMemcpyINTEL ) {
689694 result = c_dbcsr_acc_opencl_config .device .clEnqueueMemcpyINTEL (
690695 str -> queue , CL_FALSE /*blocking*/ , devmem_dst , devmem_src , nbytes , 0 , NULL , pevent );
@@ -705,19 +710,26 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
705710 else result = EXIT_FAILURE ;
706711 }
707712 ACC_OPENCL_RELEASE (c_dbcsr_acc_opencl_config .lock_memory );
708- if (NULL != event ) {
709- if (NULL != c_dbcsr_acc_opencl_config . hist_d2d && EXIT_SUCCESS == result ) {
710- assert (NULL == c_dbcsr_acc_opencl_config . device . clEnqueueMemcpyINTEL );
711- result = clRetainEvent ( event ); /* released by c_dbcsr_acc_memcpy_notify */
712- if ( EXIT_SUCCESS == result ) {
713+ if (NULL != event ) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
714+ if (EXIT_SUCCESS == result ) {
715+ if (NULL == pevent ) { /* asynchronous */
716+ assert ( NULL == c_dbcsr_acc_opencl_config . device . clEnqueueMemcpyINTEL );
717+ assert ( NULL != c_dbcsr_acc_opencl_config . hist_d2d );
713718 result = clSetEventCallback (event , CL_COMPLETE , c_dbcsr_acc_memcpy_notify , nconst .ptr );
714719 }
720+ else { /* synchronous */
721+ result = clWaitForEvents (1 , & event );
722+ if (EXIT_SUCCESS == result ) {
723+ if (NULL != c_dbcsr_acc_opencl_config .hist_d2d ) {
724+ assert (NULL == c_dbcsr_acc_opencl_config .device .clEnqueueMemcpyINTEL );
725+ c_dbcsr_acc_memcpy_notify (event , CL_COMPLETE , nconst .ptr );
726+ }
727+ else result = clReleaseEvent (event );
728+ }
729+ else ACC_OPENCL_EXPECT (EXIT_SUCCESS == clReleaseEvent (event ));
730+ }
715731 }
716- if (NULL != pevent && EXIT_SUCCESS == result ) result = clWaitForEvents (1 , & event );
717- if (NULL != event ) {
718- const int result_release = clReleaseEvent (event );
719- if (EXIT_SUCCESS == result ) result = result_release ;
720- }
732+ else ACC_OPENCL_EXPECT (EXIT_SUCCESS == clReleaseEvent (event ));
721733 }
722734 }
723735# if defined(ACC_OPENCL_PROFILE_DBCSR )
0 commit comments