Skip to content

Commit 0abbf18

Browse files
committed
Address comments
1 parent c02b0c9 commit 0abbf18

File tree

1 file changed

+13
-8
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+13
-8
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,17 +1097,16 @@ struct AMDGPUKernelTy : public GenericKernelTy {
10971097
int32_t NumTeamsEnvVar = GenericDevice.getOMPNumTeams();
10981098
// CU mulitiplier from envar.
10991099
uint32_t EnvarCUMultiplier = GenericDevice.getXTeamRedTeamsPerCU();
1100-
// Disabled if the value is 0.
1101-
if (EnvarCUMultiplier == 0) {
1102-
EnvarCUMultiplier = UINT_MAX;
1103-
}
11041100

11051101
if (GenericDevice.isFastReductionEnabled()) {
11061102
// When fast reduction is enabled, the number of teams is capped by
11071103
// the MaxCUMultiplier constant.
1108-
MaxNumGroups =
1109-
DeviceNumCUs * std::min(llvm::omp::xteam_red::MaxCUMultiplier,
1110-
static_cast<int16_t>(EnvarCUMultiplier));
1104+
// When envar is enabled, use it for computing MaxNumGroup.
1105+
if (EnvarCUMultiplier > 0) {
1106+
MaxNumGroups = DeviceNumCUs * EnvarCUMultiplier;
1107+
} else {
1108+
MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
1109+
}
11111110
} else {
11121111
// When fast reduction is not enabled, the number of teams is capped
11131112
// by the metadata that clang CodeGen created. The number of teams
@@ -1118,7 +1117,13 @@ struct AMDGPUKernelTy : public GenericKernelTy {
11181117
// ConstWGSize is the block size that CodeGen used.
11191118
uint32_t CUMultiplier =
11201119
llvm::omp::xteam_red::getXteamRedCUMultiplier(ConstWGSize);
1121-
MaxNumGroups = DeviceNumCUs * std::min(CUMultiplier, EnvarCUMultiplier);
1120+
1121+
if (EnvarCUMultiplier > 0) {
1122+
MaxNumGroups =
1123+
DeviceNumCUs * std::min(CUMultiplier, EnvarCUMultiplier);
1124+
} else {
1125+
MaxNumGroups = DeviceNumCUs * CUMultiplier;
1126+
}
11221127
}
11231128

11241129
// If envar OMPX_XTEAMREDUCTION_OCCUPANCY_BASED_OPT is set and no

0 commit comments

Comments
 (0)