Skip to content

Commit 2c20169

Browse files
authored
[UR] Make kernel launch global offset param optional. (#18573)
The offset is already deprecated so this will let sycl omit it entirely in the future, for now it simplifies some adapter code by allowing the check for whether we're working with an offset to happen once at the sycl level with subsequent checks being simple nullptr comparisons.
1 parent 588243f commit 2c20169

File tree

14 files changed

+42
-54
lines changed

14 files changed

+42
-54
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2475,6 +2475,11 @@ static ur_result_t SetKernelParamsAndLaunch(
24752475
if (EnforcedLocalSize)
24762476
LocalSize = RequiredWGSize;
24772477
}
2478+
2479+
const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 ||
2480+
NDRDesc.GlobalOffset[1] != 0 ||
2481+
NDRDesc.GlobalOffset[2] != 0;
2482+
24782483
std::vector<ur_exp_launch_property_t> property_list;
24792484
if (KernelUsesClusterLaunch) {
24802485
ur_exp_launch_property_value_t launch_property_value_cluster_range;
@@ -2505,8 +2510,9 @@ static ur_result_t SetKernelParamsAndLaunch(
25052510
ur_result_t Error =
25062511
Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunchCustomExp>(
25072512
Queue->getHandleRef(), Kernel, NDRDesc.Dims,
2508-
&NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize,
2509-
property_list.size(), property_list.data(), RawEvents.size(),
2513+
HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr,
2514+
&NDRDesc.GlobalSize[0], LocalSize, property_list.size(),
2515+
property_list.data(), RawEvents.size(),
25102516
RawEvents.empty() ? nullptr : &RawEvents[0],
25112517
OutEventImpl ? &UREvent : nullptr);
25122518
if ((Error == UR_RESULT_SUCCESS) && OutEventImpl) {
@@ -2523,8 +2529,9 @@ static ur_result_t SetKernelParamsAndLaunch(
25232529
Args...);
25242530
}
25252531
return Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunch>(Args...);
2526-
}(Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
2527-
&NDRDesc.GlobalSize[0], LocalSize, RawEvents.size(),
2532+
}(Queue->getHandleRef(), Kernel, NDRDesc.Dims,
2533+
HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr, &NDRDesc.GlobalSize[0],
2534+
LocalSize, RawEvents.size(),
25282535
RawEvents.empty() ? nullptr : &RawEvents[0],
25292536
OutEventImpl ? &UREvent : nullptr);
25302537
if (Error == UR_RESULT_SUCCESS && OutEventImpl) {

unified-runtime/include/ur_api.h

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/scripts/core/enqueue.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ params:
3333
desc: "[in] number of dimensions, from 1 to 3, to specify the global and work-group work-items"
3434
- type: "const size_t*"
3535
name: pGlobalWorkOffset
36-
desc: "[in] pointer to an array of workDim unsigned values that specify the offset used to calculate the global ID of a work-item"
36+
desc: "[in][optional] pointer to an array of workDim unsigned values that specify the offset used to calculate the global ID of a work-item"
3737
- type: "const size_t*"
3838
name: pGlobalWorkSize
3939
desc: "[in] pointer to an array of workDim unsigned values that specify the number of global work-items in workDim that will execute the kernel function"

unified-runtime/source/adapters/level_zero/kernel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ ur_result_t urEnqueueKernelLaunch(
6464
/// [in] number of dimensions, from 1 to 3, to specify the global and
6565
/// work-group work-items
6666
uint32_t WorkDim,
67-
/// [in] pointer to an array of workDim unsigned values that specify the
68-
/// offset used to calculate the global ID of a work-item
67+
/// [in][optional] pointer to an array of workDim unsigned values that
68+
/// specify the offset used to calculate the global ID of a work-item
6969
const size_t *GlobalWorkOffset,
7070
/// [in] pointer to an array of workDim unsigned values that specify the
7171
/// number of global work-items in workDim that will execute the kernel

unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -220,16 +220,6 @@ ur_result_t ur_command_list_manager::appendKernelLaunch(
220220
waitListView.clear();
221221
};
222222

223-
// If the offset is {0, 0, 0}, pass NULL instead.
224-
// This allows us to skip setting the offset.
225-
bool hasOffset = false;
226-
for (uint32_t i = 0; i < workDim; ++i) {
227-
hasOffset |= pGlobalWorkOffset[i];
228-
}
229-
if (!hasOffset) {
230-
pGlobalWorkOffset = NULL;
231-
}
232-
233223
UR_CALL(hKernel->prepareForSubmission(context, device, pGlobalWorkOffset,
234224
workDim, WG[0], WG[1], WG[2],
235225
memoryMigrate));

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -938,16 +938,6 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp(
938938
waitListView.clear();
939939
};
940940

941-
// If the offset is {0, 0, 0}, pass NULL instead.
942-
// This allows us to skip setting the offset.
943-
bool hasOffset = false;
944-
for (uint32_t i = 0; i < workDim; ++i) {
945-
hasOffset |= pGlobalWorkOffset[i];
946-
}
947-
if (!hasOffset) {
948-
pGlobalWorkOffset = NULL;
949-
}
950-
951941
UR_CALL(hKernel->prepareForSubmission(hContext, hDevice, pGlobalWorkOffset,
952942
workDim, WG[0], WG[1], WG[2],
953943
memoryMigrate));

unified-runtime/source/adapters/mock/ur_mockddi.cpp

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/adapters/native_cpu/enqueue.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ struct NDRDescT {
3030
const size_t *GlobalWorkSize, const size_t *LocalWorkSize)
3131
: WorkDim(WorkDim) {
3232
for (uint32_t I = 0; I < WorkDim; I++) {
33-
GlobalOffset[I] = GlobalWorkOffset[I];
33+
GlobalOffset[I] = GlobalWorkOffset ? GlobalWorkOffset[I] : 0;
3434
GlobalSize[I] = GlobalWorkSize[I];
3535
LocalSize[I] = LocalWorkSize ? LocalWorkSize[I] : 1;
3636
}
@@ -72,7 +72,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
7272
urEventWait(numEventsInWaitList, phEventWaitList);
7373
UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
7474
UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
75-
UR_ASSERT(pGlobalWorkOffset, UR_RESULT_ERROR_INVALID_NULL_POINTER);
7675
UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
7776
UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
7877

unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/loader/layers/validation/ur_valddi.cpp

Lines changed: 2 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/loader/ur_ldrddi.cpp

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/loader/ur_libapi.cpp

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/ur_api.cpp

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,19 @@ TEST_P(urEnqueueKernelLaunchTest, Success) {
7575
&global_offset, &global_size, nullptr, 0,
7676
nullptr, nullptr));
7777
ASSERT_SUCCESS(urQueueFinish(queue));
78+
79+
ValidateBuffer(buffer, sizeof(val) * global_size, val);
80+
}
81+
82+
TEST_P(urEnqueueKernelLaunchTest, SuccessNoOffset) {
83+
ur_mem_handle_t buffer = nullptr;
84+
AddBuffer1DArg(sizeof(val) * global_size, &buffer);
85+
AddPodArg(val);
86+
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions, nullptr,
87+
&global_size, nullptr, 0, nullptr,
88+
nullptr));
89+
ASSERT_SUCCESS(urQueueFinish(queue));
90+
7891
ValidateBuffer(buffer, sizeof(val) * global_size, val);
7992
}
8093

@@ -86,11 +99,6 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidNullHandleQueue) {
8699
}
87100

88101
TEST_P(urEnqueueKernelLaunchTest, InvalidNullPointer) {
89-
ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions, nullptr,
90-
&global_size, nullptr, 0, nullptr,
91-
nullptr),
92-
UR_RESULT_ERROR_INVALID_NULL_POINTER);
93-
94102
ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
95103
&global_offset, nullptr, nullptr, 0,
96104
nullptr, nullptr),

0 commit comments

Comments
 (0)