Skip to content

Commit 628a780

Browse files
committed
add tracing for cl_intel_concurrent_dispatch
1 parent 87e5440 commit 628a780

File tree

5 files changed

+79
-0
lines changed

5 files changed

+79
-0
lines changed

intercept/src/cli_ext.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,25 @@ typedef struct _cl_queue_family_properties_intel {
12501250
#define CL_QUEUE_CAPABILITY_BARRIER_INTEL (1 << 25)
12511251
#define CL_QUEUE_CAPABILITY_KERNEL_INTEL (1 << 26)
12521252

1253+
///////////////////////////////////////////////////////////////////////////////
1254+
// cl_intel_concurrent_dispatch
1255+
1256+
#define CL_KERNEL_EXEC_INFO_DISPATCH_TYPE_INTEL 0x4257
1257+
1258+
typedef cl_uint cl_kernel_exec_info_dispatch_type_intel;
1259+
1260+
#define CL_KERNEL_EXEC_INFO_DISPATCH_TYPE_DEFAULT_INTEL 0
1261+
#define CL_KERNEL_EXEC_INFO_DISPATCH_TYPE_CONCURRENT_INTEL 1
1262+
1263+
extern CL_API_ENTRY
1264+
cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(
1265+
cl_command_queue command_queue,
1266+
cl_kernel kernel,
1267+
cl_uint work_dim,
1268+
const size_t* global_work_offset,
1269+
const size_t* local_work_size,
1270+
size_t* max_work_group_count);
1271+
12531272
///////////////////////////////////////////////////////////////////////////////
12541273
// cl_intel_create_buffer_with_properties
12551274

intercept/src/dispatch.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9055,6 +9055,51 @@ CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(
90559055
NULL_FUNCTION_POINTER_RETURN_ERROR(CL_INVALID_COMMAND_QUEUE);
90569056
}
90579057

9058+
///////////////////////////////////////////////////////////////////////////////
9059+
//
9060+
// cl_intel_concurrent_dispatch
9061+
CL_API_ENTRY cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(
9062+
cl_command_queue commandQueue,
9063+
cl_kernel kernel,
9064+
cl_uint workDim,
9065+
const size_t *globalWorkOffset,
9066+
const size_t *localWorkSize,
9067+
size_t *maxWorkGroupCount)
9068+
{
9069+
CLIntercept* pIntercept = GetIntercept();
9070+
9071+
if( pIntercept )
9072+
{
9073+
const auto& dispatchX = pIntercept->dispatchX(commandQueue);
9074+
if( dispatchX.clGetKernelMaxConcurrentWorkGroupCountINTEL )
9075+
{
9076+
GET_ENQUEUE_COUNTER();
9077+
CALL_LOGGING_ENTER_KERNEL(
9078+
kernel,
9079+
"queue = %p, kernel = %p",
9080+
commandQueue,
9081+
kernel );
9082+
HOST_PERFORMANCE_TIMING_START();
9083+
9084+
cl_int retVal = dispatchX.clGetKernelMaxConcurrentWorkGroupCountINTEL(
9085+
commandQueue,
9086+
kernel,
9087+
workDim,
9088+
globalWorkOffset,
9089+
localWorkSize,
9090+
maxWorkGroupCount );
9091+
9092+
HOST_PERFORMANCE_TIMING_END();
9093+
CHECK_ERROR( retVal );
9094+
CALL_LOGGING_EXIT( retVal );
9095+
9096+
return retVal;
9097+
}
9098+
}
9099+
9100+
NULL_FUNCTION_POINTER_RETURN_ERROR(CL_INVALID_COMMAND_QUEUE);
9101+
}
9102+
90589103
///////////////////////////////////////////////////////////////////////////////
90599104
//
90609105
// cl_intel_accelerator

intercept/src/dispatch.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,15 @@ struct CLdispatchX
547547
cl_int (CL_API_CALL *clReleaseAcceleratorINTEL) (
548548
cl_accelerator_intel accelerator );
549549

550+
// cl_intel_concurrent_dispatch
551+
cl_int (CL_API_CALL *clGetKernelMaxConcurrentWorkGroupCountINTEL) (
552+
cl_command_queue command_queue,
553+
cl_kernel kernel,
554+
cl_uint work_dim,
555+
const size_t* global_work_offset,
556+
const size_t* local_work_size,
557+
size_t* max_work_group_count);
558+
550559
// cl_intel_create_buffer_with_properties
551560
cl_mem (CL_API_CALL *clCreateBufferWithPropertiesINTEL) (
552561
cl_context context,

intercept/src/enummap.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,9 @@ CEnumNameMap::CEnumNameMap()
10241024
ADD_ENUM_NAME( m_cl_command_queue_capabilities_intel, CL_QUEUE_CAPABILITY_BARRIER_INTEL );
10251025
ADD_ENUM_NAME( m_cl_command_queue_capabilities_intel, CL_QUEUE_CAPABILITY_KERNEL_INTEL );
10261026

1027+
// cl_intel_concurrent_dispatch
1028+
ADD_ENUM_NAME( m_cl_int, CL_KERNEL_EXEC_INFO_DISPATCH_TYPE_INTEL );
1029+
10271030
// cl_intel_device_attribute_query
10281031
ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IP_VERSION_INTEL );
10291032
ADD_ENUM_NAME( m_cl_int, CL_DEVICE_ID_INTEL );

intercept/src/intercept.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13184,6 +13184,9 @@ void* CLIntercept::getExtensionFunctionAddress(
1318413184
CHECK_RETURN_EXTENSION_FUNCTION( clRetainAcceleratorINTEL );
1318513185
CHECK_RETURN_EXTENSION_FUNCTION( clReleaseAcceleratorINTEL );
1318613186

13187+
// cl_intel_concurrent_dispatch
13188+
CHECK_RETURN_EXTENSION_FUNCTION( clGetKernelMaxConcurrentWorkGroupCountINTEL );
13189+
1318713190
// cl_intel_create_buffer_with_properties
1318813191
CHECK_RETURN_EXTENSION_FUNCTION( clCreateBufferWithPropertiesINTEL );
1318913192

0 commit comments

Comments
 (0)