Skip to content

Commit 532a4ec

Browse files
authored
Merge pull request #1924 from Bensuo/fabio/cmd_buffer_kernel_update
Add support for command-buffer kernel updates
2 parents 2296205 + d944ff3 commit 532a4ec

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1831
-651
lines changed

include/ur_api.h

Lines changed: 66 additions & 17 deletions
Large diffs are not rendered by default.

include/ur_ddi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1932,6 +1932,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)(
19321932
const size_t *,
19331933
const size_t *,
19341934
uint32_t,
1935+
ur_kernel_handle_t *,
1936+
uint32_t,
19351937
const ur_exp_command_buffer_sync_point_t *,
19361938
ur_exp_command_buffer_sync_point_t *,
19371939
ur_exp_command_buffer_command_handle_t *);

include/ur_print.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpExternalSemaphoreDesc(const struct
970970
/// - `buff_size < out_size`
971971
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, char *buffer, const size_t buff_size, size_t *out_size);
972972

973+
///////////////////////////////////////////////////////////////////////////////
974+
/// @brief Print ur_device_command_buffer_update_capability_flag_t enum
975+
/// @returns
976+
/// - ::UR_RESULT_SUCCESS
977+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
978+
/// - `buff_size < out_size`
979+
UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceCommandBufferUpdateCapabilityFlags(enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);
980+
973981
///////////////////////////////////////////////////////////////////////////////
974982
/// @brief Print ur_exp_command_buffer_info_t enum
975983
/// @returns

include/ur_print.hpp

Lines changed: 131 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
197197
template <>
198198
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);
199199

200+
template <>
201+
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag);
200202
template <>
201203
inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size);
202204

@@ -335,6 +337,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
335337
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_mem_desc_t params);
336338
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_semaphore_desc_t params);
337339
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_image_copy_region_t params);
340+
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value);
338341
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value);
339342
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_command_info_t value);
340343
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params);
@@ -2541,8 +2544,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25412544
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
25422545
os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
25432546
break;
2544-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:
2545-
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP";
2547+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP:
2548+
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP";
25462549
break;
25472550
case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP:
25482551
os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP";
@@ -4049,15 +4052,16 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
40494052

40504053
os << ")";
40514054
} break;
4052-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
4053-
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4054-
if (sizeof(ur_bool_t) > size) {
4055-
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4055+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
4056+
const ur_device_command_buffer_update_capability_flags_t *tptr = (const ur_device_command_buffer_update_capability_flags_t *)ptr;
4057+
if (sizeof(ur_device_command_buffer_update_capability_flags_t) > size) {
4058+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_command_buffer_update_capability_flags_t) << ")";
40564059
return UR_RESULT_ERROR_INVALID_SIZE;
40574060
}
40584061
os << (const void *)(tptr) << " (";
40594062

4060-
os << *tptr;
4063+
ur::details::printFlag<ur_device_command_buffer_update_capability_flag_t>(os,
4064+
*tptr);
40614065

40624066
os << ")";
40634067
} break;
@@ -9701,6 +9705,103 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_image_copy
97019705
return os;
97029706
}
97039707
///////////////////////////////////////////////////////////////////////////////
9708+
/// @brief Print operator for the ur_device_command_buffer_update_capability_flag_t type
9709+
/// @returns
9710+
/// std::ostream &
9711+
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value) {
9712+
switch (value) {
9713+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS:
9714+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS";
9715+
break;
9716+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE:
9717+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE";
9718+
break;
9719+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE:
9720+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE";
9721+
break;
9722+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET:
9723+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET";
9724+
break;
9725+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE:
9726+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE";
9727+
break;
9728+
default:
9729+
os << "unknown enumerator";
9730+
break;
9731+
}
9732+
return os;
9733+
}
9734+
9735+
namespace ur::details {
9736+
///////////////////////////////////////////////////////////////////////////////
9737+
/// @brief Print ur_device_command_buffer_update_capability_flag_t flag
9738+
template <>
9739+
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag) {
9740+
uint32_t val = flag;
9741+
bool first = true;
9742+
9743+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) {
9744+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
9745+
if (!first) {
9746+
os << " | ";
9747+
} else {
9748+
first = false;
9749+
}
9750+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
9751+
}
9752+
9753+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) {
9754+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
9755+
if (!first) {
9756+
os << " | ";
9757+
} else {
9758+
first = false;
9759+
}
9760+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
9761+
}
9762+
9763+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) {
9764+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
9765+
if (!first) {
9766+
os << " | ";
9767+
} else {
9768+
first = false;
9769+
}
9770+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
9771+
}
9772+
9773+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) {
9774+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
9775+
if (!first) {
9776+
os << " | ";
9777+
} else {
9778+
first = false;
9779+
}
9780+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
9781+
}
9782+
9783+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) {
9784+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
9785+
if (!first) {
9786+
os << " | ";
9787+
} else {
9788+
first = false;
9789+
}
9790+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
9791+
}
9792+
if (val != 0) {
9793+
std::bitset<32> bits(val);
9794+
if (!first) {
9795+
os << " | ";
9796+
}
9797+
os << "unknown bit flags " << bits;
9798+
} else if (first) {
9799+
os << "0";
9800+
}
9801+
return UR_RESULT_SUCCESS;
9802+
}
9803+
} // namespace ur::details
9804+
///////////////////////////////////////////////////////////////////////////////
97049805
/// @brief Print operator for the ur_exp_command_buffer_info_t type
97059806
/// @returns
97069807
/// std::ostream &
@@ -9953,6 +10054,12 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu
995310054
ur::details::printStruct(os,
995410055
(params.pNext));
995510056

10057+
os << ", ";
10058+
os << ".hNewKernel = ";
10059+
10060+
ur::details::printPtr(os,
10061+
(params.hNewKernel));
10062+
995610063
os << ", ";
995710064
os << ".numNewMemObjArgs = ";
995810065

@@ -15951,6 +16058,23 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1595116058
ur::details::printPtr(os,
1595216059
*(params->ppLocalWorkSize));
1595316060

16061+
os << ", ";
16062+
os << ".numKernelAlternatives = ";
16063+
16064+
os << *(params->pnumKernelAlternatives);
16065+
16066+
os << ", ";
16067+
os << ".phKernelAlternatives = {";
16068+
for (size_t i = 0; *(params->pphKernelAlternatives) != NULL && i < *params->pnumKernelAlternatives; ++i) {
16069+
if (i != 0) {
16070+
os << ", ";
16071+
}
16072+
16073+
ur::details::printPtr(os,
16074+
(*(params->pphKernelAlternatives))[i]);
16075+
}
16076+
os << "}";
16077+
1595416078
os << ", ";
1595516079
os << ".numSyncPointsInWaitList = ";
1595616080

scripts/core/EXP-COMMAND-BUFFER.rst

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ were obtained from.
144144
// sync-point
145145
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
146146
pGlobalWorkOffset, pGlobalWorkSize,
147-
pLocalWorkSize, 1, &syncPoint,
148-
nullptr, nullptr);
147+
pLocalWorkSize, 0, nullptr, 1,
148+
&syncPoint, nullptr, nullptr);
149149
150150
Enqueueing Command-Buffers
151151
--------------------------------------------------------------------------------
@@ -167,13 +167,21 @@ Updating Command-Buffer Commands
167167

168168
An adapter implementing the command-buffer experimental feature can optionally
169169
support updating the configuration of kernel commands recorded to a
170-
command-buffer. Support for this is reported by returning true in the
171-
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query.
170+
command-buffer. The attributes of kernel commands that can be updated are
171+
device specific and can be queried using the
172+
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP query.
172173

173174
Updating kernel commands is done by passing the new kernel configuration
174175
to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of
175176
the kernel command to update. Configurations that can be changed are the
176-
parameters to the kernel and the execution ND-Range.
177+
kernel handle, the parameters to the kernel and the execution ND-Range.
178+
179+
Kernel handles that might be used to update the kernel of a command, need
180+
to be registered when the command is created. This can be done
181+
using the ``phKernelAlternatives`` parameter of
182+
${x}CommandBufferAppendKernelLaunchExp. The command can then be updated
183+
to use the new kernel handle by passing it to
184+
${x}CommandBufferUpdateKernelLaunchExp.
177185

178186
.. parsed-literal::
179187
@@ -187,12 +195,14 @@ parameters to the kernel and the execution ND-Range.
187195
${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer);
188196
189197
// Append a kernel command which has two buffer parameters, an input
190-
// and an output.
198+
// and an output. Register hNewKernel as an alternative kernel handle
199+
// which can later be used to change the kernel handle associated
200+
// with this command.
191201
${x}_exp_command_buffer_command_handle_t hCommand;
192202
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
193203
pGlobalWorkOffset, pGlobalWorkSize,
194-
pLocalWorkSize, 0, nullptr,
195-
nullptr, &hCommand);
204+
pLocalWorkSize, 1, &hNewKernel,
205+
0, nullptr, nullptr, &hCommand);
196206
197207
// Close the command-buffer before updating
198208
${x}CommandBufferFinalizeExp(hCommandBuffer);
@@ -220,6 +230,7 @@ parameters to the kernel and the execution ND-Range.
220230
${x}_exp_command_buffer_update_kernel_launch_desc_t update {
221231
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
222232
nullptr, // pNext
233+
hNewKernel // hNewKernel
223234
2, // numNewMemobjArgs
224235
0, // numNewPointerArgs
225236
0, // numNewValueArgs
@@ -249,7 +260,13 @@ Enums
249260
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
250261
* ${x}_device_info_t
251262
* ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP
252-
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP
263+
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP
264+
* ${x}_device_command_buffer_update_capability_flags_t
265+
* UPDATE_KERNEL_ARGUMENTS
266+
* LOCAL_WORK_SIZE
267+
* GLOBAL_WORK_SIZE
268+
* GLOBAL_WORK_OFFSET
269+
* KERNEL_HANDLE
253270
* ${x}_result_t
254271
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP
255272
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP
@@ -340,6 +357,8 @@ Changelog
340357
+-----------+-------------------------------------------------------+
341358
| 1.4 | Add function definitions for kernel command update |
342359
+-----------+-------------------------------------------------------+
360+
| 1.5 | Add support for updating kernel handles. |
361+
+-----------+-------------------------------------------------------+
343362

344363
Contributors
345364
--------------------------------------------------------------------------------
@@ -348,3 +367,4 @@ Contributors
348367
* Ewan Crawford `ewan@codeplay.com <ewan@codeplay.com>`_
349368
* Maxime France-Pillois `maxime.francepillois@codeplay.com <maxime.francepillois@codeplay.com>`_
350369
* Aaron Greig `aaron.greig@codeplay.com <aaron.greig@codeplay.com>`_
370+
* Fábio Mestre `fabio.mestre@codeplay.com <fabio.mestre@codeplay.com>`_

0 commit comments

Comments
 (0)