Skip to content

Commit 8a4777d

Browse files
authored
[SYCL] Reset signalled command list if there no available command list (#6314)
Resetting command lists only in synchronization points caused performance regression because we reuse less command lists and new command lists are getting created more often which adds performance overhead. Return the old behavior back: reset signalled command lists in the getAvailableCommandList.
1 parent ed92c4c commit 8a4777d

File tree

1 file changed

+53
-16
lines changed

1 file changed

+53
-16
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,7 +1118,28 @@ _pi_queue::_pi_queue(std::vector<ze_command_queue_handle_t> &ComputeQueues,
11181118
CopyCommandBatch.QueueBatchSize = ZeCommandListBatchCopyConfig.startSize();
11191119
}
11201120

1121-
static pi_result CleanupCompletedEvent(pi_event);
1121+
static pi_result CleanupCompletedEvent(pi_event Event,
1122+
bool QueueLocked = false);
1123+
1124+
// Helper function to perform the necessary cleanup of the events from reset cmd
1125+
// list.
1126+
static pi_result
1127+
CleanupEventListFromResetCmdList(std::vector<pi_event> &EventListToCleanup,
1128+
bool QueueLocked = false) {
1129+
for (auto Event : EventListToCleanup) {
1130+
// We don't need to synchronize the events since the fence associated with
1131+
// the command list was synchronized.
1132+
{
1133+
std::scoped_lock EventLock(Event->Mutex);
1134+
Event->Completed = true;
1135+
}
1136+
PI_CALL(CleanupCompletedEvent(Event, QueueLocked));
1137+
// This event was removed from the command list, so decrement ref count
1138+
// (it was incremented when they were added to the command list).
1139+
PI_CALL(piEventRelease(Event));
1140+
}
1141+
return PI_SUCCESS;
1142+
}
11221143

11231144
// Reset signalled command lists in the queue and put them to the cache of
11241145
// command lists. A caller must not lock the queue mutex.
@@ -1150,18 +1171,7 @@ pi_result resetCommandLists(pi_queue Queue) {
11501171
}
11511172
}
11521173
}
1153-
for (auto Event : EventListToCleanup) {
1154-
// We don't need to synchronize the events since the fence
1155-
// synchronized above already does that.
1156-
{
1157-
std::scoped_lock EventLock(Event->Mutex);
1158-
Event->Completed = true;
1159-
}
1160-
PI_CALL(CleanupCompletedEvent(Event));
1161-
// This event was removed from the command list, so decrement ref count
1162-
// (it was incremented when they were added to the command list).
1163-
PI_CALL(piEventRelease(Event));
1164-
}
1174+
CleanupEventListFromResetCmdList(EventListToCleanup);
11651175
return PI_SUCCESS;
11661176
}
11671177

@@ -1245,6 +1255,31 @@ _pi_context::getAvailableCommandList(pi_queue Queue,
12451255
}
12461256
}
12471257

1258+
// If there are no available command lists in the cache, then we check for
1259+
// command lists that have already signalled, but have not been added to the
1260+
// available list yet. Each command list has a fence associated which tracks
1261+
// if a command list has completed dispatch of its commands and is ready for
1262+
// reuse. If a command list is found to have been signalled, then the
1263+
// command list & fence are reset and we return.
1264+
for (auto it = Queue->CommandListMap.begin();
1265+
it != Queue->CommandListMap.end(); ++it) {
1266+
// Make sure this is the command list type needed.
1267+
if (UseCopyEngine != it->second.isCopy(Queue))
1268+
continue;
1269+
1270+
ze_result_t ZeResult =
1271+
ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence));
1272+
if (ZeResult == ZE_RESULT_SUCCESS) {
1273+
std::vector<pi_event> EventListToCleanup;
1274+
Queue->resetCommandList(it, false, EventListToCleanup);
1275+
CleanupEventListFromResetCmdList(EventListToCleanup,
1276+
true /* QueueLocked */);
1277+
CommandList = it;
1278+
CommandList->second.ZeFenceInUse = true;
1279+
return PI_SUCCESS;
1280+
}
1281+
}
1282+
12481283
// If there are no available command lists nor signalled command lists, then
12491284
// we must create another command list.
12501285
// Once created, this command list & fence are added to the command list fence
@@ -5452,8 +5487,8 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
54525487
// This currently makes sure to release any kernel that may have been used by
54535488
// the event, updates the last command event in the queue and cleans up all dep
54545489
// events of the event.
5455-
// The caller must not lock any mutexes.
5456-
static pi_result CleanupCompletedEvent(pi_event Event) {
5490+
// If the caller locks queue mutex then it must pass 'true' to QueueLocked.
5491+
static pi_result CleanupCompletedEvent(pi_event Event, bool QueueLocked) {
54575492
pi_kernel AssociatedKernel = nullptr;
54585493
// List of dependent events.
54595494
std::list<pi_event> EventsToBeReleased;
@@ -5489,7 +5524,9 @@ static pi_result CleanupCompletedEvent(pi_event Event) {
54895524
if (AssociatedQueue) {
54905525
{
54915526
// Lock automatically releases when this goes out of scope.
5492-
std::scoped_lock Lock(AssociatedQueue->Mutex);
5527+
std::unique_lock QueueLock(AssociatedQueue->Mutex, std::defer_lock);
5528+
if (!QueueLocked)
5529+
QueueLock.lock();
54935530

54945531
// If this event was the LastCommandEvent in the queue, being used
54955532
// to make sure that commands were executed in-order, remove this.

0 commit comments

Comments
 (0)