Skip to content

Commit cfd1a26

Browse files
committed
Simplify memcpys and kernel launch
1 parent da92770 commit cfd1a26

File tree

1 file changed

+94
-125
lines changed

1 file changed

+94
-125
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 94 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -318,31 +318,37 @@ static ur_result_t getEventsFromSyncPoints(
318318

319319
// FIXME Refactor Naming?
320320
// FIXME Refactor Why do some events need to be host_visible and others don't
321-
static ur_result_t
322-
createSyncPoint(ur_command_t CommandType,
323-
ur_exp_command_buffer_handle_t CommandBuffer,
324-
uint32_t NumSyncPointsInWaitList,
325-
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
326-
ur_exp_command_buffer_sync_point_t *RetSyncPoint,
327-
bool host_visible, std::vector<ze_event_handle_t> &ZeEventList,
328-
ur_event_handle_t &LaunchEvent) {
329-
// std::vector<ze_event_handle_t> ZeEventList;
330-
// ur_event_handle_t LaunchEvent;
331-
UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList,
332-
SyncPointWaitList, ZeEventList));
333-
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, host_visible,
334-
&LaunchEvent, false, !CommandBuffer->IsProfilingEnabled));
335-
LaunchEvent->CommandType = CommandType;
336-
337-
// Get sync point and register the event with it.
338-
// FIXME Refactor GetNextSyncPoint and RegisterSyncPoint seem redudant. Can be
339-
// made into just one function.
340-
ur_exp_command_buffer_sync_point_t SyncPoint =
341-
CommandBuffer->GetNextSyncPoint();
342-
CommandBuffer->RegisterSyncPoint(SyncPoint, LaunchEvent);
343-
344-
if (RetSyncPoint) {
345-
*RetSyncPoint = SyncPoint;
321+
static ur_result_t createSyncPointIfNeeded(
322+
ur_command_t CommandType, ur_exp_command_buffer_handle_t CommandBuffer,
323+
uint32_t NumSyncPointsInWaitList,
324+
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
325+
ur_exp_command_buffer_sync_point_t *RetSyncPoint, bool host_visible,
326+
std::vector<ze_event_handle_t> &ZeEventList,
327+
ze_event_handle_t &ZeLaunchEvent) {
328+
329+
ZeLaunchEvent = nullptr;
330+
if (!CommandBuffer->IsInOrderCmdList) {
331+
// std::vector<ze_event_handle_t> ZeEventList;
332+
// ur_event_handle_t LaunchEvent;
333+
UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList,
334+
SyncPointWaitList, ZeEventList));
335+
ur_event_handle_t LaunchEvent;
336+
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, host_visible,
337+
&LaunchEvent, false,
338+
!CommandBuffer->IsProfilingEnabled));
339+
LaunchEvent->CommandType = CommandType;
340+
ZeLaunchEvent = LaunchEvent->ZeEvent;
341+
342+
// Get sync point and register the event with it.
343+
// FIXME Refactor GetNextSyncPoint and RegisterSyncPoint seem redudant. Can
344+
// be made into just one function.
345+
ur_exp_command_buffer_sync_point_t SyncPoint =
346+
CommandBuffer->GetNextSyncPoint();
347+
CommandBuffer->RegisterSyncPoint(SyncPoint, LaunchEvent);
348+
349+
if (RetSyncPoint) {
350+
*RetSyncPoint = SyncPoint;
351+
}
346352
}
347353

348354
return UR_RESULT_SUCCESS;
@@ -352,7 +358,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::chooseCommandList(
352358
bool PreferCopyEngine, ze_command_list_handle_t *ZeCommandList) {
353359
// If the copy engine available, the command is enqueued in the
354360
// ZeCopyCommandList.
355-
if (PreferCopyEngine && this->UseCopyEngine()) {
361+
if (PreferCopyEngine && this->UseCopyEngine() && !this->IsInOrderCmdList) {
356362
// We indicate that the ZeCopyCommandList contains commands to be
357363
// submitted.
358364
this->MCopyCommandListEmpty = false;
@@ -386,6 +392,11 @@ ur_result_t ur_exp_command_buffer_handle_t_::chooseCommandList(
386392
UR_RESULT_ERROR_INVALID_VALUE);
387393
}
388394
UR_CALL(chooseCommandList(PreferCopyEngine, ZeCommandList));
395+
return UR_RESULT_SUCCESS;
396+
}
397+
398+
template <typename T> static T *getPointerFromVector(std::vector<T> &V) {
399+
return V.size() == 0 ? nullptr : V.data();
389400
}
390401

391402
// Shared by all memory read/write/copy PI interfaces.
@@ -397,31 +408,22 @@ static ur_result_t enqueueCommandBufferMemCopyHelper(
397408
uint32_t NumSyncPointsInWaitList,
398409
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
399410
ur_exp_command_buffer_sync_point_t *RetSyncPoint) {
400-
if (CommandBuffer->IsInOrderCmdList) {
401-
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
402-
(CommandBuffer->ZeComputeCommandList, Dst, Src, Size, nullptr, 0,
403-
nullptr));
404411

405-
logger::debug("calling zeCommandListAppendMemoryCopy()");
406-
} else {
407-
// FIXME Why doesn't the event need to be host visible
408-
std::vector<ze_event_handle_t> ZeEventList;
409-
ur_event_handle_t LaunchEvent = nullptr;
410-
UR_CALL(createSyncPoint(CommandType, CommandBuffer, NumSyncPointsInWaitList,
411-
SyncPointWaitList, RetSyncPoint, false, ZeEventList,
412-
LaunchEvent));
412+
// FIXME Why doesn't the event need to be host visible
413+
std::vector<ze_event_handle_t> ZeEventList;
414+
ze_event_handle_t ZeLaunchEvent = nullptr;
415+
UR_CALL(createSyncPointIfNeeded(
416+
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList,
417+
RetSyncPoint, false, ZeEventList, ZeLaunchEvent));
413418

414-
ze_command_list_handle_t ZeCommandList;
415-
UR_CALL(CommandBuffer->chooseCommandList(PreferCopyEngine, &ZeCommandList));
419+
ze_command_list_handle_t ZeCommandList;
420+
UR_CALL(CommandBuffer->chooseCommandList(PreferCopyEngine, &ZeCommandList));
416421

417-
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
418-
(ZeCommandList, Dst, Src, Size, LaunchEvent->ZeEvent,
419-
ZeEventList.size(), ZeEventList.data()));
422+
logger::debug("calling zeCommandListAppendMemoryCopy()");
423+
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
424+
(ZeCommandList, Dst, Src, Size, ZeLaunchEvent, ZeEventList.size(),
425+
getPointerFromVector(ZeEventList)));
420426

421-
logger::debug("calling zeCommandListAppendMemoryCopy() with"
422-
" ZeEvent {}",
423-
ur_cast<std::uintptr_t>(LaunchEvent->ZeEvent));
424-
}
425427
return UR_RESULT_SUCCESS;
426428
}
427429

@@ -467,33 +469,21 @@ static ur_result_t enqueueCommandBufferMemCopyRectHelper(
467469
const ze_copy_region_t ZeDstRegion = {DstOriginX, DstOriginY, DstOriginZ,
468470
Width, Height, Depth};
469471

470-
if (CommandBuffer->IsInOrderCmdList) {
471-
ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion,
472-
(CommandBuffer->ZeComputeCommandList, Dst, &ZeDstRegion,
473-
DstPitch, DstSlicePitch, Src, &ZeSrcRegion, SrcPitch,
474-
SrcSlicePitch, nullptr, 0, nullptr));
475-
476-
logger::debug("calling zeCommandListAppendMemoryCopyRegion()");
477-
} else {
478-
// FIXME Why doesn't the event need to be host visible
479-
std::vector<ze_event_handle_t> ZeEventList;
480-
ur_event_handle_t LaunchEvent;
481-
UR_CALL(createSyncPoint(CommandType, CommandBuffer, NumSyncPointsInWaitList,
482-
SyncPointWaitList, RetSyncPoint, false, ZeEventList,
483-
LaunchEvent));
484-
485-
ze_command_list_handle_t ZeCommandList;
486-
UR_CALL(CommandBuffer->chooseCommandList(PreferCopyEngine, &ZeCommandList));
472+
// FIXME Why doesn't the event need to be host visible
473+
std::vector<ze_event_handle_t> ZeEventList;
474+
ze_event_handle_t ZeLaunchEvent = nullptr;
475+
UR_CALL(createSyncPointIfNeeded(
476+
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList,
477+
RetSyncPoint, false, ZeEventList, ZeLaunchEvent));
487478

488-
ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion,
489-
(ZeCommandList, Dst, &ZeDstRegion, DstPitch, DstSlicePitch, Src,
490-
&ZeSrcRegion, SrcPitch, SrcSlicePitch, LaunchEvent->ZeEvent,
491-
ZeEventList.size(), ZeEventList.data()));
479+
ze_command_list_handle_t ZeCommandList;
480+
UR_CALL(CommandBuffer->chooseCommandList(PreferCopyEngine, &ZeCommandList));
492481

493-
logger::debug("calling zeCommandListAppendMemoryCopyRegion() with"
494-
" ZeEvent {}",
495-
ur_cast<std::uintptr_t>(LaunchEvent->ZeEvent));
496-
}
482+
logger::debug("calling zeCommandListAppendMemoryCopyRegion()");
483+
ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion,
484+
(ZeCommandList, Dst, &ZeDstRegion, DstPitch, DstSlicePitch, Src,
485+
&ZeSrcRegion, SrcPitch, SrcSlicePitch, ZeLaunchEvent,
486+
ZeEventList.size(), getPointerFromVector(ZeEventList)));
497487

498488
return UR_RESULT_SUCCESS;
499489
}
@@ -509,32 +499,21 @@ static ur_result_t enqueueCommandBufferFillHelper(
509499
UR_ASSERT((PatternSize > 0) && ((PatternSize & (PatternSize - 1)) == 0),
510500
UR_RESULT_ERROR_INVALID_VALUE);
511501

502+
// FIXME Why does the event need to be host visible?
503+
std::vector<ze_event_handle_t> ZeEventList;
504+
ze_event_handle_t ZeLaunchEvent = nullptr;
505+
UR_CALL(createSyncPointIfNeeded(
506+
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList,
507+
RetSyncPoint, true, ZeEventList, ZeLaunchEvent));
508+
512509
ze_command_list_handle_t ZeCommandList;
513510
UR_CALL(CommandBuffer->chooseCommandList(PreferCopyEngine, &ZeCommandList,
514511
PatternSize));
515512

516-
if (CommandBuffer->IsInOrderCmdList) {
517-
ZE2UR_CALL(zeCommandListAppendMemoryFill,
518-
(CommandBuffer->ZeComputeCommandList, Ptr, Pattern, PatternSize,
519-
Size, nullptr, 0, nullptr));
520-
521-
logger::debug("calling zeCommandListAppendMemoryFill()");
522-
} else {
523-
// FIXME Why does the event need to be host visible?
524-
std::vector<ze_event_handle_t> ZeEventList;
525-
ur_event_handle_t LaunchEvent;
526-
UR_CALL(createSyncPoint(CommandType, CommandBuffer, NumSyncPointsInWaitList,
527-
SyncPointWaitList, RetSyncPoint, true, ZeEventList,
528-
LaunchEvent));
529-
530-
ZE2UR_CALL(zeCommandListAppendMemoryFill,
531-
(ZeCommandList, Ptr, Pattern, PatternSize, Size,
532-
LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data()));
533-
534-
logger::debug("calling zeCommandListAppendMemoryFill() with"
535-
" ZeEvent {}",
536-
ur_cast<std::uintptr_t>(LaunchEvent->ZeEvent));
537-
}
513+
logger::debug("calling zeCommandListAppendMemoryFill()");
514+
ZE2UR_CALL(zeCommandListAppendMemoryFill,
515+
(ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeLaunchEvent,
516+
ZeEventList.size(), getPointerFromVector(ZeEventList)));
538517

539518
return UR_RESULT_SUCCESS;
540519
}
@@ -580,6 +559,7 @@ appendPreconditionEvents(ze_command_list_handle_t CommandList,
580559
ZE2UR_CALL(
581560
zeCommandListAppendBarrier,
582561
(CommandList, nullptr, PrecondEvents.size(), PrecondEvents.data()));
562+
return UR_RESULT_SUCCESS;
583563
}
584564

585565
static bool
@@ -838,28 +818,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
838818
*Command));
839819
}
840820

841-
if (CommandBuffer->IsInOrderCmdList) {
842-
ZE2UR_CALL(zeCommandListAppendLaunchKernel,
843-
(CommandBuffer->ZeComputeCommandList, Kernel->ZeKernel,
844-
&ZeThreadGroupDimensions, nullptr, 0, nullptr));
821+
std::vector<ze_event_handle_t> ZeEventList;
822+
ze_event_handle_t ZeLaunchEvent = nullptr;
823+
UR_CALL(createSyncPointIfNeeded(
824+
UR_COMMAND_KERNEL_LAUNCH, CommandBuffer, NumSyncPointsInWaitList,
825+
SyncPointWaitList, RetSyncPoint, false, ZeEventList, ZeLaunchEvent));
845826

846-
logger::debug("calling zeCommandListAppendLaunchKernel()");
847-
} else {
848-
std::vector<ze_event_handle_t> ZeEventList;
849-
ur_event_handle_t LaunchEvent;
850-
UR_CALL(createSyncPoint(UR_COMMAND_KERNEL_LAUNCH, CommandBuffer,
851-
NumSyncPointsInWaitList, SyncPointWaitList,
852-
RetSyncPoint, false, ZeEventList, LaunchEvent));
853-
854-
ZE2UR_CALL(zeCommandListAppendLaunchKernel,
855-
(CommandBuffer->ZeComputeCommandList, Kernel->ZeKernel,
856-
&ZeThreadGroupDimensions, LaunchEvent->ZeEvent,
857-
ZeEventList.size(), ZeEventList.data()));
858-
859-
logger::debug("calling zeCommandListAppendLaunchKernel() with"
860-
" ZeEvent {}",
861-
ur_cast<std::uintptr_t>(LaunchEvent->ZeEvent));
862-
}
827+
logger::debug("calling zeCommandListAppendLaunchKernel()");
828+
ZE2UR_CALL(zeCommandListAppendLaunchKernel,
829+
(CommandBuffer->ZeComputeCommandList, Kernel->ZeKernel,
830+
&ZeThreadGroupDimensions, ZeLaunchEvent, ZeEventList.size(),
831+
getPointerFromVector(ZeEventList)));
863832

864833
return UR_RESULT_SUCCESS;
865834
}
@@ -1048,10 +1017,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
10481017
} else {
10491018
// FIXME Why does the event need to be host visible?
10501019
std::vector<ze_event_handle_t> ZeEventList;
1051-
ur_event_handle_t LaunchEvent;
1052-
UR_CALL(createSyncPoint(UR_COMMAND_USM_PREFETCH, CommandBuffer,
1053-
NumSyncPointsInWaitList, SyncPointWaitList,
1054-
RetSyncPoint, true, ZeEventList, LaunchEvent));
1020+
ze_event_handle_t ZeLaunchEvent = nullptr;
1021+
UR_CALL(createSyncPointIfNeeded(
1022+
UR_COMMAND_USM_PREFETCH, CommandBuffer, NumSyncPointsInWaitList,
1023+
SyncPointWaitList, RetSyncPoint, true, ZeEventList, ZeLaunchEvent));
10551024

10561025
if (NumSyncPointsInWaitList) {
10571026
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
@@ -1067,7 +1036,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
10671036
// Level Zero does not have a completion "event" with the prefetch API,
10681037
// so manually add command to signal our event.
10691038
ZE2UR_CALL(zeCommandListAppendSignalEvent,
1070-
(CommandBuffer->ZeComputeCommandList, LaunchEvent->ZeEvent));
1039+
(CommandBuffer->ZeComputeCommandList, ZeLaunchEvent));
10711040
}
10721041

10731042
return UR_RESULT_SUCCESS;
@@ -1112,10 +1081,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
11121081
} else {
11131082
// FIMXE Why does the event need to be host visible?
11141083
std::vector<ze_event_handle_t> ZeEventList;
1115-
ur_event_handle_t LaunchEvent;
1116-
UR_CALL(createSyncPoint(UR_COMMAND_USM_ADVISE, CommandBuffer,
1117-
NumSyncPointsInWaitList, SyncPointWaitList,
1118-
RetSyncPoint, true, ZeEventList, LaunchEvent));
1084+
ze_event_handle_t ZeLaunchEvent = nullptr;
1085+
UR_CALL(createSyncPointIfNeeded(
1086+
UR_COMMAND_USM_ADVISE, CommandBuffer, NumSyncPointsInWaitList,
1087+
SyncPointWaitList, RetSyncPoint, true, ZeEventList, ZeLaunchEvent));
11191088

11201089
if (NumSyncPointsInWaitList) {
11211090
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
@@ -1130,7 +1099,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
11301099
// Level Zero does not have a completion "event" with the advise API,
11311100
// so manually add command to signal our event.
11321101
ZE2UR_CALL(zeCommandListAppendSignalEvent,
1133-
(CommandBuffer->ZeComputeCommandList, LaunchEvent->ZeEvent));
1102+
(CommandBuffer->ZeComputeCommandList, ZeLaunchEvent));
11341103
}
11351104

11361105
return UR_RESULT_SUCCESS;

0 commit comments

Comments
 (0)