Skip to content

Commit 5683d95

Browse files
committed
[OpenMP][Offload][AMDGPU] Added tuning constraint for the number of threads
This PR added constraint to ensure that the tuned thread number is smaller than ConstWGSize.
1 parent 4a7c389 commit 5683d95

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1360,7 +1360,10 @@ struct KernelRunRecordTy {
13601360

13611361
if (IdxCUMulti >= CUMultiplierCandidate.size()) {
13621362
// No more element to search.
1363+
// Max run counter to stop further runs.
13631364
// Return current optimal launch parameters.
1365+
TuningData[KernelName].RunCounters = RunLimiter + 1;
1366+
13641367
return {TuningData[KernelName].MinEntry.NumTeams,
13651368
TuningData[KernelName].MinEntry.NumThreads};
13661369
}
@@ -1374,7 +1377,11 @@ struct KernelRunRecordTy {
13741377
IdxThread++;
13751378
TuningData[KernelName].IdxThread = IdxThread;
13761379

1377-
if (IdxThread >= ThreadCandidate.size()) {
1380+
uint16_t ConstWGSize = GenericDevice.getDefaultNumThreads();
1381+
1382+
// Threads should be smaller than ConstWGSize.
1383+
if (IdxThread >= ThreadCandidate.size() ||
1384+
ThreadCandidate[IdxThread] >= ConstWGSize) {
13781385
TuningData[KernelName].IdxThread = 0;
13791386
TuningData[KernelName].IdxCUMultiplier++;
13801387
}

0 commit comments

Comments
 (0)