Skip to content

Commit c9db413

Browse files
authored
add support for dumping command buffers to dot files (intel#404)
* initial support for dumping a command buffer to a dot file * add support for dumping command buffers to a file * cleanup and renaming * formatting fix * add record tags for command buffer kernel records * final cleanup
1 parent 46fc1d4 commit c9db413

File tree

7 files changed

+608
-2
lines changed

7 files changed

+608
-2
lines changed

docs/controls.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,16 @@ This is the list of options that is implicitly passed to CLANG to build a non-Op
525525

526526
This is the list of options that is implicitly passed to CLANG to build an OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options.
527527

528+
### Controls for Dumping Command Buffers
529+
530+
##### `OmitCommandBufferNumber` (bool)
531+
532+
If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the command buffer number from dumped file names and hash tracking. This can produce deterministic results even if command buffers are creatd and finalized in a non-deterministic order (say, by multiple threads).
533+
534+
##### `DumpCommandBuffers` (bool)
535+
536+
If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the commands and dependencies in a command buffer to a file when the command buffer is successfully finalized. The file name will have the form "CLI\_\<Command BufferNumber\>\_\<Uniqueue Command BufferHash Code\>\_cmdbuf.dot". The command buffer is described using the DOT graph description language.
537+
528538
### Controls for Dumping and Injecting Buffers and Images
529539

530540
##### `DumpBufferHashes` (bool)

intercept/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ source_group(Resources FILES
6060
set(CLINTERCEPT_SOURCE_FILES
6161
src/chrometracer.h
6262
src/chrometracer.cpp
63+
src/cmdbufrecorder.h
6364
src/clIntercept.def
6465
src/clIntercept.map
6566
src/cli_ext.h

intercept/src/cmdbufrecorder.h

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/*
2+
// Copyright (c) 2025 Intel Corporation
3+
//
4+
// SPDX-License-Identifier: MIT
5+
*/
6+
#pragma once
7+
8+
#include <atomic>
9+
#include <cinttypes>
10+
#include <map>
11+
#include <vector>
12+
#include <sstream>
13+
14+
#include <stdint.h>
15+
16+
#include "common.h"
17+
18+
struct SCommandBufferRecord
19+
{
20+
void recordCreate(
21+
cl_command_buffer_khr cmdbuf,
22+
bool isInOrder)
23+
{
24+
queueIsInOrder = isInOrder;
25+
dotstring << "digraph {\n";
26+
dotstring << " // " << (queueIsInOrder ? "in-order" : "out-of-order") << " command-buffer\n";
27+
}
28+
29+
void recordCommand(
30+
cl_command_queue queue,
31+
const char* cmd,
32+
const std::string& tag,
33+
cl_uint num_sync_points_in_wait_list,
34+
const cl_sync_point_khr* sync_point_wait_list,
35+
cl_sync_point_khr* sync_point)
36+
{
37+
SCommandBufferId id =
38+
sync_point == nullptr ?
39+
makeInternalId() :
40+
makeSyncPointId(*sync_point);
41+
42+
dotstring << " " << (id.isInternal ? "internal" : "syncpoint") << id.id
43+
<< " [shape=oval, label=\"" << cmd;
44+
if( !tag.empty() )
45+
{
46+
dotstring << "( " << tag << " )";
47+
}
48+
dotstring << "\"]\n";
49+
50+
for( cl_uint s = 0; s < num_sync_points_in_wait_list; s++ )
51+
{
52+
dotstring << " syncpoint" << sync_point_wait_list[s]
53+
<< " -> "
54+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
55+
<< " // explicit dependency\n";
56+
}
57+
58+
for( const auto& dep : implicitDeps )
59+
{
60+
dotstring << " " << (dep.isInternal ? "internal" : "syncpoint") << dep.id
61+
<< " -> "
62+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
63+
<< " [style=dashed] // implicit dependency\n";
64+
}
65+
66+
if( queueIsInOrder )
67+
{
68+
implicitDeps.clear();
69+
implicitDeps.push_back(id);
70+
}
71+
else
72+
{
73+
outstandingIds.push_back(id);
74+
}
75+
}
76+
77+
void recordBarrier(
78+
cl_command_queue queue,
79+
const char* cmd,
80+
cl_uint num_sync_points_in_wait_list,
81+
const cl_sync_point_khr* sync_point_wait_list,
82+
cl_sync_point_khr* sync_point)
83+
{
84+
SCommandBufferId id =
85+
sync_point == nullptr ?
86+
makeInternalId() :
87+
makeSyncPointId(*sync_point);
88+
89+
dotstring << " " << (id.isInternal ? "internal" : "syncpoint") << id.id
90+
<< " [shape=octagon, label=\"" << cmd << "\"]\n";
91+
92+
// If there is a sync point wait list, then the barrier depends on all
93+
// of the commands in the sync point wait list. Otherwise, the barrier
94+
// depends on all of the outstanding ids.
95+
if( num_sync_points_in_wait_list > 0 )
96+
{
97+
for( cl_uint s = 0; s < num_sync_points_in_wait_list; s++ )
98+
{
99+
dotstring << " syncpoint" << sync_point_wait_list[s]
100+
<< " -> "
101+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
102+
<< " // explicit dependency\n";
103+
}
104+
}
105+
else
106+
{
107+
for( const auto& dep : outstandingIds )
108+
{
109+
dotstring << " " << (dep.isInternal ? "internal" : "syncpoint") << dep.id
110+
<< " -> "
111+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
112+
<< " [style=dotted] // barrier dependency\n";
113+
}
114+
outstandingIds.clear();
115+
}
116+
117+
// Add the implicit dependencies.
118+
for( const auto& dep : implicitDeps )
119+
{
120+
dotstring << " " << (dep.isInternal ? "internal" : "syncpoint") << dep.id
121+
<< " -> "
122+
<< (id.isInternal ? "internal" : "syncpoint") << id.id
123+
<< " [style=dashed] // implicit dependency\n";
124+
}
125+
126+
// Now, the only implicit dependency that remains is this barrier.
127+
implicitDeps.clear();
128+
implicitDeps.push_back(id);
129+
}
130+
131+
void recordFinalize()
132+
{
133+
dotstring << "}\n";
134+
}
135+
136+
// Note: this cannot return a reference, because the underlying string is a
137+
// temporary object.
138+
const std::string getRecording() const
139+
{
140+
return dotstring.str();
141+
}
142+
143+
private:
144+
struct SCommandBufferId
145+
{
146+
bool isInternal = false;
147+
uint32_t id = 0;
148+
};
149+
150+
std::ostringstream dotstring;
151+
152+
std::atomic<uint32_t> nextInternalId;
153+
154+
bool queueIsInOrder;
155+
156+
std::vector<SCommandBufferId> implicitDeps;
157+
std::vector<SCommandBufferId> outstandingIds;
158+
159+
SCommandBufferId makeInternalId()
160+
{
161+
SCommandBufferId id;
162+
id.isInternal = true;
163+
id.id = nextInternalId.fetch_add(1, std::memory_order_relaxed);
164+
return id;
165+
}
166+
167+
SCommandBufferId makeSyncPointId(
168+
cl_sync_point_khr sync_point)
169+
{
170+
SCommandBufferId id;
171+
id.isInternal = false;
172+
id.id = sync_point;
173+
return id;
174+
}
175+
};

intercept/src/controls.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ CLI_CONTROL( std::string, SPIRVDis, "spirv-dis",
120120
CLI_CONTROL( std::string, DefaultOptions, "-cc1 -x cl -cl-std=CL1.2 -D__OPENCL_C_VERSION__=120 -D__OPENCL_VERSION__=120 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build a non-OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." )
121121
CLI_CONTROL( std::string, OpenCL2Options, "-cc1 -x cl -cl-std=CL2.0 -D__OPENCL_C_VERSION__=200 -D__OPENCL_VERSION__=200 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build an OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." )
122122

123+
CLI_CONTROL_SEPARATOR( Controls for Dumping Command Buffers: )
124+
CLI_CONTROL( bool, OmitCommandBufferNumber, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the command buffer number from dumped file names and hash tracking. This can produce deterministic results even if command buffers are creatd and finalized in a non-deterministic order (say, by multiple threads)." )
125+
CLI_CONTROL( bool, DumpCommandBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the commands and dependencies in a command buffer to a file when the command buffer is successfully finalized. The file name will have the form \"CLI_<Command BufferNumber>_<Uniqueue Command BufferHash Code>_cmdbuf.dot\". The command buffer is described using the DOT graph description language." )
126+
123127
CLI_CONTROL_SEPARATOR( Controls for Dumping and Injecting Buffers and Images: )
124128
CLI_CONTROL( bool, DumpBufferHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of a buffer, SVM, or USM allocation rather than the full contents of the buffer. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." )
125129
CLI_CONTROL( bool, DumpImageHashes, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump hashes of an image rather than the full contents of the image. This can be useful to identify which kernel enqueues generate different results without requiring a large amount of disk space." )

intercept/src/dispatch.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10469,6 +10469,7 @@ CL_API_ENTRY cl_command_buffer_khr CL_API_CALL clCreateCommandBufferKHR(
1046910469
HOST_PERFORMANCE_TIMING_END();
1047010470
CHECK_ERROR( errcode_ret[0] );
1047110471
ADD_OBJECT_ALLOCATION( retVal );
10472+
RECORD_COMMAND_BUFFER_CREATE( retVal, num_queues, queues );
1047210473
CALL_LOGGING_EXIT( errcode_ret[0], "returned %p", retVal );
1047310474

1047410475
if( retVal != NULL )
@@ -10509,6 +10510,8 @@ CL_API_ENTRY cl_int CL_API_CALL clFinalizeCommandBufferKHR(
1050910510

1051010511
HOST_PERFORMANCE_TIMING_END();
1051110512
CHECK_ERROR( retVal );
10513+
RECORD_COMMAND_BUFFER_FINALIZE( retVal, command_buffer );
10514+
DUMP_COMMAND_BUFFER_RECORDING( retVal, command_buffer );
1051210515
CALL_LOGGING_EXIT( retVal );
1051310516

1051410517
return retVal;
@@ -10715,6 +10718,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandBarrierWithWaitListKHR(
1071510718

1071610719
HOST_PERFORMANCE_TIMING_END();
1071710720
CHECK_ERROR( retVal );
10721+
RECORD_COMMAND_BUFFER_BARRIER(
10722+
retVal,
10723+
command_buffer,
10724+
num_sync_points_in_wait_list,
10725+
sync_point_wait_list,
10726+
sync_point );
1071810727
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1071910728
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1072010729

@@ -10779,6 +10788,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyBufferKHR(
1077910788

1078010789
HOST_PERFORMANCE_TIMING_END();
1078110790
CHECK_ERROR( retVal );
10791+
RECORD_COMMAND_BUFFER_COMMAND(
10792+
retVal,
10793+
command_buffer,
10794+
num_sync_points_in_wait_list,
10795+
sync_point_wait_list,
10796+
sync_point );
1078210797
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1078310798
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1078410799

@@ -10851,6 +10866,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyBufferRectKHR(
1085110866

1085210867
HOST_PERFORMANCE_TIMING_END();
1085310868
CHECK_ERROR( retVal );
10869+
RECORD_COMMAND_BUFFER_COMMAND(
10870+
retVal,
10871+
command_buffer,
10872+
num_sync_points_in_wait_list,
10873+
sync_point_wait_list,
10874+
sync_point );
1085410875
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1085510876
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1085610877

@@ -10915,6 +10936,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyBufferToImageKHR(
1091510936

1091610937
HOST_PERFORMANCE_TIMING_END();
1091710938
CHECK_ERROR( retVal );
10939+
RECORD_COMMAND_BUFFER_COMMAND(
10940+
retVal,
10941+
command_buffer,
10942+
num_sync_points_in_wait_list,
10943+
sync_point_wait_list,
10944+
sync_point );
1091810945
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1091910946
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1092010947

@@ -10979,6 +11006,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyImageKHR(
1097911006

1098011007
HOST_PERFORMANCE_TIMING_END();
1098111008
CHECK_ERROR( retVal );
11009+
RECORD_COMMAND_BUFFER_COMMAND(
11010+
retVal,
11011+
command_buffer,
11012+
num_sync_points_in_wait_list,
11013+
sync_point_wait_list,
11014+
sync_point );
1098211015
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1098311016
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1098411017

@@ -11045,6 +11078,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandCopyImageToBufferKHR(
1104511078

1104611079
HOST_PERFORMANCE_TIMING_END();
1104711080
CHECK_ERROR( retVal );
11081+
RECORD_COMMAND_BUFFER_COMMAND(
11082+
retVal,
11083+
command_buffer,
11084+
num_sync_points_in_wait_list,
11085+
sync_point_wait_list,
11086+
sync_point );
1104811087
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1104911088
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1105011089

@@ -11110,6 +11149,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandFillBufferKHR(
1111011149

1111111150
HOST_PERFORMANCE_TIMING_END();
1111211151
CHECK_ERROR( retVal );
11152+
RECORD_COMMAND_BUFFER_COMMAND(
11153+
retVal,
11154+
command_buffer,
11155+
num_sync_points_in_wait_list,
11156+
sync_point_wait_list,
11157+
sync_point );
1111311158
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1111411159
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1111511160

@@ -11173,6 +11218,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandFillImageKHR(
1117311218

1117411219
HOST_PERFORMANCE_TIMING_END();
1117511220
CHECK_ERROR( retVal );
11221+
RECORD_COMMAND_BUFFER_COMMAND(
11222+
retVal,
11223+
command_buffer,
11224+
num_sync_points_in_wait_list,
11225+
sync_point_wait_list,
11226+
sync_point );
1117611227
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1117711228
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1117811229

@@ -11236,6 +11287,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandSVMMemcpyKHR(
1123611287

1123711288
HOST_PERFORMANCE_TIMING_END();
1123811289
CHECK_ERROR( retVal );
11290+
RECORD_COMMAND_BUFFER_COMMAND(
11291+
retVal,
11292+
command_buffer,
11293+
num_sync_points_in_wait_list,
11294+
sync_point_wait_list,
11295+
sync_point );
1123911296
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1124011297
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1124111298

@@ -11301,6 +11358,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandSVMMemFillKHR(
1130111358

1130211359
HOST_PERFORMANCE_TIMING_END();
1130311360
CHECK_ERROR( retVal );
11361+
RECORD_COMMAND_BUFFER_COMMAND(
11362+
retVal,
11363+
command_buffer,
11364+
num_sync_points_in_wait_list,
11365+
sync_point_wait_list,
11366+
sync_point );
1130411367
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1130511368
ADD_MUTABLE_COMMAND( mutable_handle, command_buffer );
1130611369

@@ -11362,6 +11425,14 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandNDRangeKernelKHR(
1136211425
command_queue,
1136311426
kernel,
1136411427
argsString.c_str() );
11428+
GET_RECORD_TAG_COMMAND_BUFFER_KERNEL(
11429+
command_buffer,
11430+
kernel,
11431+
work_dim,
11432+
global_work_offset,
11433+
global_work_size,
11434+
local_work_size,
11435+
mutable_handle );
1136511436
HOST_PERFORMANCE_TIMING_START();
1136611437

1136711438
cl_int retVal = dispatchX.clCommandNDRangeKernelKHR(
@@ -11380,6 +11451,12 @@ CL_API_ENTRY cl_int CL_API_CALL clCommandNDRangeKernelKHR(
1138011451

1138111452
HOST_PERFORMANCE_TIMING_END();
1138211453
CHECK_ERROR( retVal );
11454+
RECORD_COMMAND_BUFFER_COMMAND_WITH_TAG(
11455+
retVal,
11456+
command_buffer,
11457+
num_sync_points_in_wait_list,
11458+
sync_point_wait_list,
11459+
sync_point );
1138311460
CALL_LOGGING_EXIT_SYNC_POINT( retVal, sync_point );
1138411461
ADD_MUTABLE_COMMAND_NDRANGE( mutable_handle, command_buffer, work_dim );
1138511462

0 commit comments

Comments
 (0)