@@ -1097,17 +1097,16 @@ struct AMDGPUKernelTy : public GenericKernelTy {
1097
1097
int32_t NumTeamsEnvVar = GenericDevice.getOMPNumTeams ();
1098
1098
// CU mulitiplier from envar.
1099
1099
uint32_t EnvarCUMultiplier = GenericDevice.getXTeamRedTeamsPerCU ();
1100
- // Disabled if the value is 0.
1101
- if (EnvarCUMultiplier == 0 ) {
1102
- EnvarCUMultiplier = UINT_MAX;
1103
- }
1104
1100
1105
1101
if (GenericDevice.isFastReductionEnabled ()) {
1106
1102
// When fast reduction is enabled, the number of teams is capped by
1107
1103
// the MaxCUMultiplier constant.
1108
- MaxNumGroups =
1109
- DeviceNumCUs * std::min (llvm::omp::xteam_red::MaxCUMultiplier,
1110
- static_cast <int16_t >(EnvarCUMultiplier));
1104
+ // When envar is enabled, use it for computing MaxNumGroup.
1105
+ if (EnvarCUMultiplier > 0 ) {
1106
+ MaxNumGroups = DeviceNumCUs * EnvarCUMultiplier;
1107
+ } else {
1108
+ MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
1109
+ }
1111
1110
} else {
1112
1111
// When fast reduction is not enabled, the number of teams is capped
1113
1112
// by the metadata that clang CodeGen created. The number of teams
@@ -1118,7 +1117,13 @@ struct AMDGPUKernelTy : public GenericKernelTy {
1118
1117
// ConstWGSize is the block size that CodeGen used.
1119
1118
uint32_t CUMultiplier =
1120
1119
llvm::omp::xteam_red::getXteamRedCUMultiplier (ConstWGSize);
1121
- MaxNumGroups = DeviceNumCUs * std::min (CUMultiplier, EnvarCUMultiplier);
1120
+
1121
+ if (EnvarCUMultiplier > 0 ) {
1122
+ MaxNumGroups =
1123
+ DeviceNumCUs * std::min (CUMultiplier, EnvarCUMultiplier);
1124
+ } else {
1125
+ MaxNumGroups = DeviceNumCUs * CUMultiplier;
1126
+ }
1122
1127
}
1123
1128
1124
1129
// If envar OMPX_XTEAMREDUCTION_OCCUPANCY_BASED_OPT is set and no
0 commit comments