Skip to content

Commit 6a9afe8

Browse files
committed
initial support for dumping a command buffer to a dot file
1 parent 4eb604d commit 6a9afe8

File tree

5 files changed

+455
-2
lines changed

5 files changed

+455
-2
lines changed

intercept/src/cmdbuftracer.h

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
/*
2+
// Copyright (c) 2025 Intel Corporation
3+
//
4+
// SPDX-License-Identifier: MIT
5+
*/
6+
#pragma once
7+
8+
#include <atomic>
9+
#include <cinttypes>
10+
#include <map>
11+
#include <vector>
12+
#include <sstream>
13+
14+
#include <stdint.h>
15+
16+
#include "common.h"
17+
18+
struct SCommandBufferTraceInfo
19+
{
20+
std::ostringstream trace;
21+
22+
void create(
23+
cl_command_buffer_khr cmdbuf,
24+
bool isInOrder)
25+
{
26+
queueIsInOrder = isInOrder;
27+
trace << "digraph {\n";
28+
trace << " // " << (queueIsInOrder ? "in-order" : "out-of-order") << " command-buffer\n";
29+
}
30+
31+
void traceCommand(
32+
cl_command_queue queue,
33+
const char* cmd,
34+
const std::string& tag,
35+
cl_uint num_sync_points_in_wait_list,
36+
const cl_sync_point_khr* sync_point_wait_list,
37+
cl_sync_point_khr* sync_point)
38+
{
39+
SCommandBufferTraceId id =
40+
sync_point == nullptr ?
41+
makeInternalId() :
42+
makeSyncPointId(*sync_point);
43+
44+
trace << " " << (id.isInternal ? "internal" : "syncpoint") << id.id
45+
<< " [shape=oval, label=\"" << cmd;
46+
if( !tag.empty() )
47+
{
48+
trace << "( " << tag << " )";
49+
}
50+
trace << "\"]\n";
51+
52+
for( cl_uint s = 0; s < num_sync_points_in_wait_list; s++ )
53+
{
54+
trace << " syncpoint" << sync_point_wait_list[s]
55+
<< " -> "
56+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
57+
<< " // explicit dependency\n";
58+
}
59+
60+
for( const auto& dep : implicitDeps )
61+
{
62+
trace << " " << (dep.isInternal ? "internal" : "syncpoint") << dep.id
63+
<< " -> "
64+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
65+
<< " [style=dashed] // implicit dependency\n";
66+
}
67+
68+
if( queueIsInOrder )
69+
{
70+
implicitDeps.clear();
71+
implicitDeps.push_back(id);
72+
}
73+
else
74+
{
75+
outstandingIds.push_back(id);
76+
}
77+
}
78+
79+
void traceBarrier(
80+
cl_command_queue queue,
81+
const char* cmd,
82+
cl_uint num_sync_points_in_wait_list,
83+
const cl_sync_point_khr* sync_point_wait_list,
84+
cl_sync_point_khr* sync_point)
85+
{
86+
SCommandBufferTraceId id =
87+
sync_point == nullptr ?
88+
makeInternalId() :
89+
makeSyncPointId(*sync_point);
90+
91+
trace << " " << (id.isInternal ? "internal" : "syncpoint") << id.id
92+
<< " [shape=octagon, label=\"" << cmd << "\"]\n";
93+
94+
// If there is a sync point wait list, then the barrier depends on all
95+
// of the commands in the sync point wait list. Otherwise, the barrier
96+
// depends on all of the outstanding ids.
97+
if( num_sync_points_in_wait_list > 0 )
98+
{
99+
for( cl_uint s = 0; s < num_sync_points_in_wait_list; s++ )
100+
{
101+
trace << " syncpoint" << sync_point_wait_list[s]
102+
<< " -> "
103+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
104+
<< " // explicit dependency\n";
105+
}
106+
}
107+
else
108+
{
109+
for( const auto& dep : outstandingIds )
110+
{
111+
trace << " " << (dep.isInternal ? "internal" : "syncpoint") << dep.id
112+
<< " -> "
113+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
114+
<< " [style=dotted] // barrier dependency\n";
115+
}
116+
outstandingIds.clear();
117+
}
118+
119+
// Add the implicit dependencies.
120+
for( const auto& dep : implicitDeps )
121+
{
122+
trace << " " << (dep.isInternal ? "internal" : "syncpoint") << dep.id
123+
<< " -> "
124+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
125+
<< " [style=dashed] // implicit dependency\n";
126+
}
127+
128+
// Now, the only implicit dependency that remains is this barrier.
129+
implicitDeps.clear();
130+
implicitDeps.push_back(id);
131+
}
132+
133+
void finalize()
134+
{
135+
trace << "}\n";
136+
}
137+
138+
private:
139+
struct SCommandBufferTraceId
140+
{
141+
bool isInternal = false;
142+
uint32_t id = 0;
143+
};
144+
145+
std::atomic<uint32_t> nextInternalId;
146+
147+
bool queueIsInOrder;
148+
149+
std::vector<SCommandBufferTraceId> implicitDeps;
150+
std::vector<SCommandBufferTraceId> outstandingIds;
151+
152+
SCommandBufferTraceId makeInternalId()
153+
{
154+
SCommandBufferTraceId id;
155+
id.isInternal = true;
156+
id.id = nextInternalId.fetch_add(1, std::memory_order_relaxed);
157+
return id;
158+
}
159+
160+
SCommandBufferTraceId makeSyncPointId(
161+
cl_sync_point_khr sync_point)
162+
{
163+
SCommandBufferTraceId id;
164+
id.isInternal = false;
165+
id.id = sync_point;
166+
return id;
167+
}
168+
};

intercept/src/controls.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ CLI_CONTROL( std::string, SPIRVDis, "spirv-dis",
120120
CLI_CONTROL( std::string, DefaultOptions, "-cc1 -x cl -cl-std=CL1.2 -D__OPENCL_C_VERSION__=120 -D__OPENCL_VERSION__=120 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build a non-OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." )
121121
CLI_CONTROL( std::string, OpenCL2Options, "-cc1 -x cl -cl-std=CL2.0 -D__OPENCL_C_VERSION__=200 -D__OPENCL_VERSION__=200 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build an OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." )
122122

123+
CLI_CONTROL_SEPARATOR( Controls for Dumping Command Buffers: )
124+
CLI_CONTROL( bool, DumpCommandBuffers, false, "TODO" )
125+
123126
CLI_CONTROL_SEPARATOR( Controls for Dumping and Injecting Buffers and Images: )
124127
CLI_CONTROL( bool, DumpBufferHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of a buffer, SVM, or USM allocation rather than the full contents of the buffer. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." )
125128
CLI_CONTROL( bool, DumpImageHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of an image rather than the full contents of the image. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." )

intercept/src/dispatch.cpp

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10469,6 +10469,7 @@ CL_API_ENTRY cl_command_buffer_khr CL_API_CALL clCreateCommandBufferKHR(
1046910469
HOST_PERFORMANCE_TIMING_END();
1047010470
CHECK_ERROR( errcode_ret[0] );
1047110471
ADD_OBJECT_ALLOCATION( retVal );
10472+
TRACE_COMMAND_BUFFER_CREATE( retVal, num_queues, queues );
1047210473
CALL_LOGGING_EXIT( errcode_ret[0], "returned %p", retVal );
1047310474

1047410475
if( retVal != NULL )
@@ -10509,6 +10510,7 @@ CL_API_ENTRY cl_int CL_API_CALL clFinalizeCommandBufferKHR(
1050910510

1051010511
HOST_PERFORMANCE_TIMING_END();
1051110512
CHECK_ERROR( retVal );
10513+
TRACE_COMMAND_BUFFER_FINALIZE( retVal, command_buffer );
1051210514
CALL_LOGGING_EXIT( retVal );
1051310515

1051410516
return retVal;
@@ -10715,6 +10717,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandBarrierWithWaitListKHR(
1071510717

1071610718
HOST_PERFORMANCE_TIMING_END();
1071710719
CHECK_ERROR( retVal );
10720+
TRACE_COMMAND_BUFFER_BARRIER(
10721+
retVal,
10722+
command_buffer,
10723+
num_sync_points_in_wait_list,
10724+
sync_point_wait_list,
10725+
sync_point );
1071810726
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1071910727
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1072010728

@@ -10779,6 +10787,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyBufferKHR(
1077910787

1078010788
HOST_PERFORMANCE_TIMING_END();
1078110789
CHECK_ERROR( retVal );
10790+
TRACE_COMMAND_BUFFER_COMMAND(
10791+
retVal,
10792+
command_buffer,
10793+
num_sync_points_in_wait_list,
10794+
sync_point_wait_list,
10795+
sync_point );
1078210796
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1078310797
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1078410798

@@ -10851,6 +10865,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyBufferRectKHR(
1085110865

1085210866
HOST_PERFORMANCE_TIMING_END();
1085310867
CHECK_ERROR( retVal );
10868+
TRACE_COMMAND_BUFFER_COMMAND(
10869+
retVal,
10870+
command_buffer,
10871+
num_sync_points_in_wait_list,
10872+
sync_point_wait_list,
10873+
sync_point );
1085410874
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1085510875
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1085610876

@@ -10915,6 +10935,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyBufferToImageKHR(
1091510935

1091610936
HOST_PERFORMANCE_TIMING_END();
1091710937
CHECK_ERROR( retVal );
10938+
TRACE_COMMAND_BUFFER_COMMAND(
10939+
retVal,
10940+
command_buffer,
10941+
num_sync_points_in_wait_list,
10942+
sync_point_wait_list,
10943+
sync_point );
1091810944
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1091910945
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1092010946

@@ -10979,6 +11005,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyImageKHR(
1097911005

1098011006
HOST_PERFORMANCE_TIMING_END();
1098111007
CHECK_ERROR( retVal );
11008+
TRACE_COMMAND_BUFFER_COMMAND(
11009+
retVal,
11010+
command_buffer,
11011+
num_sync_points_in_wait_list,
11012+
sync_point_wait_list,
11013+
sync_point );
1098211014
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1098311015
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1098411016

@@ -11045,6 +11077,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyImageToBufferKHR(
1104511077

1104611078
HOST_PERFORMANCE_TIMING_END();
1104711079
CHECK_ERROR( retVal );
11080+
TRACE_COMMAND_BUFFER_COMMAND(
11081+
retVal,
11082+
command_buffer,
11083+
num_sync_points_in_wait_list,
11084+
sync_point_wait_list,
11085+
sync_point );
1104811086
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1104911087
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1105011088

@@ -11110,6 +11148,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandFillBufferKHR(
1111011148

1111111149
HOST_PERFORMANCE_TIMING_END();
1111211150
CHECK_ERROR( retVal );
11151+
TRACE_COMMAND_BUFFER_COMMAND(
11152+
retVal,
11153+
command_buffer,
11154+
num_sync_points_in_wait_list,
11155+
sync_point_wait_list,
11156+
sync_point );
1111311157
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1111411158
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1111511159

@@ -11173,6 +11217,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandFillImageKHR(
1117311217

1117411218
HOST_PERFORMANCE_TIMING_END();
1117511219
CHECK_ERROR( retVal );
11220+
TRACE_COMMAND_BUFFER_COMMAND(
11221+
retVal,
11222+
command_buffer,
11223+
num_sync_points_in_wait_list,
11224+
sync_point_wait_list,
11225+
sync_point );
1117611226
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1117711227
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1117811228

@@ -11236,6 +11286,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandSVMMemcpyKHR(
1123611286

1123711287
HOST_PERFORMANCE_TIMING_END();
1123811288
CHECK_ERROR( retVal );
11289+
TRACE_COMMAND_BUFFER_COMMAND(
11290+
retVal,
11291+
command_buffer,
11292+
num_sync_points_in_wait_list,
11293+
sync_point_wait_list,
11294+
sync_point );
1123911295
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1124011296
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1124111297

@@ -11301,6 +11357,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandSVMMemFillKHR(
1130111357

1130211358
HOST_PERFORMANCE_TIMING_END();
1130311359
CHECK_ERROR( retVal );
11360+
TRACE_COMMAND_BUFFER_COMMAND(
11361+
retVal,
11362+
command_buffer,
11363+
num_sync_points_in_wait_list,
11364+
sync_point_wait_list,
11365+
sync_point );
1130411366
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1130511367
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1130611368

@@ -11362,6 +11424,13 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandNDRangeKernelKHR(
1136211424
command_queue,
1136311425
kernel,
1136411426
argsString.c_str() );
11427+
GET_TIMING_TAGS_COMMAND_BUFFER_KERNEL(
11428+
command_buffer,
11429+
kernel,
11430+
work_dim,
11431+
global_work_offset,
11432+
global_work_size,
11433+
local_work_size );
1136511434
HOST_PERFORMANCE_TIMING_START();
1136611435

1136711436
cl_int retVal = dispatchX.clCommandNDRangeKernelKHR(
@@ -11380,6 +11449,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandNDRangeKernelKHR(
1138011449

1138111450
HOST_PERFORMANCE_TIMING_END();
1138211451
CHECK_ERROR( retVal );
11452+
TRACE_COMMAND_BUFFER_COMMAND_WITH_TAG(
11453+
retVal,
11454+
command_buffer,
11455+
num_sync_points_in_wait_list,
11456+
sync_point_wait_list,
11457+
sync_point );
1138311458
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1138411459
ADD_MUTABLE_COMMAND_NDRANGE( mutable_handle, command_buffer, work_dim );
1138511460

0 commit comments

Comments
 (0)