@@ -2319,7 +2319,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2319
2319
return LoopInfo.takeError ();
2320
2320
2321
2321
InsertPointOrErrorTy WsloopIP =
2322
- applyStaticWorkshareLoop (Loc.DL , *LoopInfo, AllocaIP, !IsNowait);
2322
+ applyStaticWorkshareLoop (Loc.DL , *LoopInfo, AllocaIP,
2323
+ WorksharingLoopType::ForStaticLoop, !IsNowait);
2323
2324
if (!WsloopIP)
2324
2325
return WsloopIP.takeError ();
2325
2326
InsertPointTy AfterIP = *WsloopIP;
@@ -4224,6 +4225,23 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
4224
4225
return createCanonicalLoop (LoopLoc, BodyGen, TripCount, Name);
4225
4226
}
4226
4227
4228
+ // Returns an LLVM function to call for initializing loop bounds using OpenMP
4229
+ // static scheduling for composite `distribute parallel for` depending on
4230
+ // `type`. Only i32 and i64 are supported by the runtime. Always interpret
4231
+ // integers as unsigned similarly to CanonicalLoopInfo.
4232
+ static FunctionCallee
4233
+ getKmpcDistForStaticInitForType (Type *Ty, Module &M,
4234
+ OpenMPIRBuilder &OMPBuilder) {
4235
+ unsigned Bitwidth = Ty->getIntegerBitWidth ();
4236
+ if (Bitwidth == 32 )
4237
+ return OMPBuilder.getOrCreateRuntimeFunction (
4238
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4239
+ if (Bitwidth == 64 )
4240
+ return OMPBuilder.getOrCreateRuntimeFunction (
4241
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4242
+ llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
4243
+ }
4244
+
4227
4245
// Returns an LLVM function to call for initializing loop bounds using OpenMP
4228
4246
// static scheduling depending on `type`. Only i32 and i64 are supported by the
4229
4247
// runtime. Always interpret integers as unsigned similarly to
@@ -4240,10 +4258,9 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
4240
4258
llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
4241
4259
}
4242
4260
4243
- OpenMPIRBuilder::InsertPointOrErrorTy
4244
- OpenMPIRBuilder::applyStaticWorkshareLoop (DebugLoc DL, CanonicalLoopInfo *CLI,
4245
- InsertPointTy AllocaIP,
4246
- bool NeedsBarrier) {
4261
+ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop (
4262
+ DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4263
+ WorksharingLoopType LoopType, bool NeedsBarrier) {
4247
4264
assert (CLI->isValid () && " Requires a valid canonical loop" );
4248
4265
assert (!isConflictIP (AllocaIP, CLI->getPreheaderIP ()) &&
4249
4266
" Require dedicated allocate IP" );
@@ -4259,7 +4276,10 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
4259
4276
// Declare useful OpenMP runtime functions.
4260
4277
Value *IV = CLI->getIndVar ();
4261
4278
Type *IVTy = IV->getType ();
4262
- FunctionCallee StaticInit = getKmpcForStaticInitForType (IVTy, M, *this );
4279
+ FunctionCallee StaticInit =
4280
+ LoopType == WorksharingLoopType::DistributeForStaticLoop
4281
+ ? getKmpcDistForStaticInitForType (IVTy, M, *this )
4282
+ : getKmpcForStaticInitForType (IVTy, M, *this );
4263
4283
FunctionCallee StaticFini =
4264
4284
getOrCreateRuntimeFunction (M, omp::OMPRTL___kmpc_for_static_fini);
4265
4285
@@ -4286,14 +4306,24 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
4286
4306
4287
4307
Value *ThreadNum = getOrCreateThreadID (SrcLoc);
4288
4308
4289
- Constant *SchedulingType = ConstantInt::get (
4290
- I32Type, static_cast <int >(OMPScheduleType::UnorderedStatic));
4309
+ OMPScheduleType SchedType =
4310
+ (LoopType == WorksharingLoopType::DistributeStaticLoop)
4311
+ ? OMPScheduleType::OrderedDistribute
4312
+ : OMPScheduleType::UnorderedStatic;
4313
+ Constant *SchedulingType =
4314
+ ConstantInt::get (I32Type, static_cast <int >(SchedType));
4291
4315
4292
4316
// Call the "init" function and update the trip count of the loop with the
4293
4317
// value it produced.
4294
- Builder.CreateCall (StaticInit,
4295
- {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4296
- PUpperBound, PStride, One, Zero});
4318
+ SmallVector<Value *, 10 > Args (
4319
+ {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4320
+ if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4321
+ Value *PDistUpperBound =
4322
+ Builder.CreateAlloca (IVTy, nullptr , " p.distupperbound" );
4323
+ Args.push_back (PDistUpperBound);
4324
+ }
4325
+ Args.append ({PStride, One, Zero});
4326
+ Builder.CreateCall (StaticInit, Args);
4297
4327
Value *LowerBound = Builder.CreateLoad (IVTy, PLowerBound);
4298
4328
Value *InclusiveUpperBound = Builder.CreateLoad (IVTy, PUpperBound);
4299
4329
Value *TripCountMinusOne =
@@ -4755,7 +4785,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
4755
4785
return applyDynamicWorkshareLoop (DL, CLI, AllocaIP, EffectiveScheduleType,
4756
4786
NeedsBarrier, ChunkSize);
4757
4787
// FIXME: Monotonicity ignored?
4758
- return applyStaticWorkshareLoop (DL, CLI, AllocaIP, NeedsBarrier);
4788
+ return applyStaticWorkshareLoop (DL, CLI, AllocaIP, LoopType, NeedsBarrier);
4759
4789
4760
4790
case OMPScheduleType::BaseStaticChunked:
4761
4791
if (IsOrdered)
0 commit comments