Skip to content

Commit 6625aa4

Browse files
committed
Added new API to hide the validation check on amd specific attribute
1 parent 5683d95 commit 6625aa4

File tree

3 files changed

+12
-3
lines changed

3 files changed

+12
-3
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,10 @@ struct AMDGPUKernelTy : public GenericKernelTy {
853853
/// Indicates whether or not we need to set up our own private segment size.
854854
bool usesDynamicStack() const { return DynamicStack; }
855855

856+
bool isValidBlockSize(uint32_t BlockSize) const override {
857+
return BlockSize <= ConstWGSize;
858+
}
859+
856860
/// Envar to enable occupancy-based optimization for SPMD kernel.
857861
BoolEnvar OMPX_SPMDOccupancyBasedOpt;
858862

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,9 @@ struct GenericKernelTy {
367367
return ExecutionMode == OMP_TGT_EXEC_MODE_XTEAM_RED;
368368
}
369369

370+
/// Indicate if the input block size is within the limit.
371+
virtual bool isValidBlockSize(uint32_t BlockSize) const { return true; }
372+
370373
protected:
371374
/// Get the execution mode name of the kernel.
372375
const char *getExecutionModeName() const {
@@ -1345,8 +1348,10 @@ struct KernelRunRecordTy {
13451348

13461349
// Get parameters for next kernel launch.
13471350
std::pair<uint32_t, uint32_t>
1348-
getLaunchParamsForKernel(std::string KernelName,
1351+
getLaunchParamsForKernel(const GenericKernelTy &Kernel,
13491352
GenericDeviceTy &GenericDevice) {
1353+
std::string KernelName = Kernel.getName();
1354+
13501355
// If the kernel reaches the run limit,
13511356
// return the current optimal launch parameters.
13521357
if (reachedRunLimitForKernel(KernelName)) {
@@ -1381,7 +1386,7 @@ struct KernelRunRecordTy {
13811386

13821387
// Threads should be smaller than ConstWGSize.
13831388
if (IdxThread >= ThreadCandidate.size() ||
1384-
ThreadCandidate[IdxThread] >= ConstWGSize) {
1389+
!Kernel.isValidBlockSize(ThreadCandidate[IdxThread])) {
13851390
TuningData[KernelName].IdxThread = 0;
13861391
TuningData[KernelName].IdxCUMultiplier++;
13871392
}

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
740740
"Autotuning is enabled, but KernelRunRecord is not initialized!");
741741

742742
auto [Teams, Threads] =
743-
KernelRecord->getLaunchParamsForKernel(KernelName, GenericDevice);
743+
KernelRecord->getLaunchParamsForKernel(*this, GenericDevice);
744744
NumBlocks[0] = Teams;
745745
NumThreads[0] = Threads;
746746
} else {

0 commit comments

Comments
 (0)