@@ -1118,7 +1118,28 @@ _pi_queue::_pi_queue(std::vector<ze_command_queue_handle_t> &ComputeQueues,
1118
1118
CopyCommandBatch.QueueBatchSize = ZeCommandListBatchCopyConfig.startSize ();
1119
1119
}
1120
1120
1121
- static pi_result CleanupCompletedEvent (pi_event);
1121
+ static pi_result CleanupCompletedEvent (pi_event Event,
1122
+ bool QueueLocked = false );
1123
+
1124
+ // Helper function to perform the necessary cleanup of the events from reset cmd
1125
+ // list.
1126
+ static pi_result
1127
+ CleanupEventListFromResetCmdList (std::vector<pi_event> &EventListToCleanup,
1128
+ bool QueueLocked = false ) {
1129
+ for (auto Event : EventListToCleanup) {
1130
+ // We don't need to synchronize the events since the fence associated with
1131
+ // the command list was synchronized.
1132
+ {
1133
+ std::scoped_lock EventLock (Event->Mutex );
1134
+ Event->Completed = true ;
1135
+ }
1136
+ PI_CALL (CleanupCompletedEvent (Event, QueueLocked));
1137
+ // This event was removed from the command list, so decrement ref count
1138
+ // (it was incremented when they were added to the command list).
1139
+ PI_CALL (piEventRelease (Event));
1140
+ }
1141
+ return PI_SUCCESS;
1142
+ }
1122
1143
1123
1144
// Reset signalled command lists in the queue and put them to the cache of
1124
1145
// command lists. A caller must not lock the queue mutex.
@@ -1150,18 +1171,7 @@ pi_result resetCommandLists(pi_queue Queue) {
1150
1171
}
1151
1172
}
1152
1173
}
1153
- for (auto Event : EventListToCleanup) {
1154
- // We don't need to synchronize the events since the fence
1155
- // synchronized above already does that.
1156
- {
1157
- std::scoped_lock EventLock (Event->Mutex );
1158
- Event->Completed = true ;
1159
- }
1160
- PI_CALL (CleanupCompletedEvent (Event));
1161
- // This event was removed from the command list, so decrement ref count
1162
- // (it was incremented when they were added to the command list).
1163
- PI_CALL (piEventRelease (Event));
1164
- }
1174
+ CleanupEventListFromResetCmdList (EventListToCleanup);
1165
1175
return PI_SUCCESS;
1166
1176
}
1167
1177
@@ -1245,6 +1255,31 @@ _pi_context::getAvailableCommandList(pi_queue Queue,
1245
1255
}
1246
1256
}
1247
1257
1258
+ // If there are no available command lists in the cache, then we check for
1259
+ // command lists that have already signalled, but have not been added to the
1260
+ // available list yet. Each command list has a fence associated which tracks
1261
+ // if a command list has completed dispatch of its commands and is ready for
1262
+ // reuse. If a command list is found to have been signalled, then the
1263
+ // command list & fence are reset and we return.
1264
+ for (auto it = Queue->CommandListMap .begin ();
1265
+ it != Queue->CommandListMap .end (); ++it) {
1266
+ // Make sure this is the command list type needed.
1267
+ if (UseCopyEngine != it->second .isCopy (Queue))
1268
+ continue ;
1269
+
1270
+ ze_result_t ZeResult =
1271
+ ZE_CALL_NOCHECK (zeFenceQueryStatus, (it->second .ZeFence ));
1272
+ if (ZeResult == ZE_RESULT_SUCCESS) {
1273
+ std::vector<pi_event> EventListToCleanup;
1274
+ Queue->resetCommandList (it, false , EventListToCleanup);
1275
+ CleanupEventListFromResetCmdList (EventListToCleanup,
1276
+ true /* QueueLocked */ );
1277
+ CommandList = it;
1278
+ CommandList->second .ZeFenceInUse = true ;
1279
+ return PI_SUCCESS;
1280
+ }
1281
+ }
1282
+
1248
1283
// If there are no available command lists nor signalled command lists, then
1249
1284
// we must create another command list.
1250
1285
// Once created, this command list & fence are added to the command list fence
@@ -5452,8 +5487,8 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
5452
5487
// This currently makes sure to release any kernel that may have been used by
5453
5488
// the event, updates the last command event in the queue and cleans up all dep
5454
5489
// events of the event.
5455
- // The caller must not lock any mutexes .
5456
- static pi_result CleanupCompletedEvent (pi_event Event) {
5490
+ // If the caller locks queue mutex then it must pass 'true' to QueueLocked .
5491
+ static pi_result CleanupCompletedEvent (pi_event Event, bool QueueLocked ) {
5457
5492
pi_kernel AssociatedKernel = nullptr ;
5458
5493
// List of dependent events.
5459
5494
std::list<pi_event> EventsToBeReleased;
@@ -5489,7 +5524,9 @@ static pi_result CleanupCompletedEvent(pi_event Event) {
5489
5524
if (AssociatedQueue) {
5490
5525
{
5491
5526
// Lock automatically releases when this goes out of scope.
5492
- std::scoped_lock Lock (AssociatedQueue->Mutex );
5527
+ std::unique_lock QueueLock (AssociatedQueue->Mutex , std::defer_lock);
5528
+ if (!QueueLocked)
5529
+ QueueLock.lock ();
5493
5530
5494
5531
// If this event was the LastCommandEvent in the queue, being used
5495
5532
// to make sure that commands were executed in-order, remove this.
0 commit comments