Skip to content

Commit 7b91df3

Browse files
authored
[OMPIRBuilder] Don't use invalid debug loc in reduction functions. (#147950)
We have this pattern of code in OMPIRBuilder for many functions that are used in reduction operations. ``` Function *LtGRFunc = Function::Create BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); ``` The insertion point is moved to the new function but the debug location is not updated. This means that reduction function will use the debug location that points to another function. This problem gets hidden because these functions gets inlined but the potential for failure exists. This patch resets the debug location when insertion point is moved to new function. Some `InsertPointGuard` have been added to make sure why restore the debug location correctly when we are done with the reduction function.
1 parent 74e3dfe commit 7b91df3

File tree

2 files changed

+137
-10
lines changed

2 files changed

+137
-10
lines changed

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
26172617
Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
26182618
const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
26192619
AttributeList FuncAttrs) {
2620-
InsertPointTy SavedIP = Builder.saveIP();
2620+
IRBuilder<>::InsertPointGuard IPG(Builder);
26212621
LLVMContext &Ctx = M.getContext();
26222622
FunctionType *FuncTy = FunctionType::get(
26232623
Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
@@ -2630,6 +2630,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
26302630
WcFunc->addParamAttr(1, Attribute::NoUndef);
26312631
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc);
26322632
Builder.SetInsertPoint(EntryBB);
2633+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
26332634

26342635
// ReduceList: thread local Reduce list.
26352636
// At the stage of the computation when this function is called, partially
@@ -2844,7 +2845,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
28442845
}
28452846

28462847
Builder.CreateRetVoid();
2847-
Builder.restoreIP(SavedIP);
28482848

28492849
return WcFunc;
28502850
}
@@ -2853,6 +2853,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
28532853
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
28542854
AttributeList FuncAttrs) {
28552855
LLVMContext &Ctx = M.getContext();
2856+
IRBuilder<>::InsertPointGuard IPG(Builder);
28562857
FunctionType *FuncTy =
28572858
FunctionType::get(Builder.getVoidTy(),
28582859
{Builder.getPtrTy(), Builder.getInt16Ty(),
@@ -2871,6 +2872,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
28712872
SarFunc->addParamAttr(3, Attribute::SExt);
28722873
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc);
28732874
Builder.SetInsertPoint(EntryBB);
2875+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
28742876

28752877
// Thread local Reduce list used to host the values of data to be reduced.
28762878
Argument *ReduceListArg = SarFunc->getArg(0);
@@ -3017,7 +3019,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
30173019
Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
30183020
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
30193021
AttributeList FuncAttrs) {
3020-
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3022+
IRBuilder<>::InsertPointGuard IPG(Builder);
30213023
LLVMContext &Ctx = M.getContext();
30223024
FunctionType *FuncTy = FunctionType::get(
30233025
Builder.getVoidTy(),
@@ -3033,6 +3035,7 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
30333035

30343036
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
30353037
Builder.SetInsertPoint(EntryBlock);
3038+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
30363039

30373040
// Buffer: global reduction buffer.
30383041
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3120,14 +3123,13 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
31203123
}
31213124

31223125
Builder.CreateRetVoid();
3123-
Builder.restoreIP(OldIP);
31243126
return LtGCFunc;
31253127
}
31263128

31273129
Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
31283130
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
31293131
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3130-
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3132+
IRBuilder<>::InsertPointGuard IPG(Builder);
31313133
LLVMContext &Ctx = M.getContext();
31323134
FunctionType *FuncTy = FunctionType::get(
31333135
Builder.getVoidTy(),
@@ -3143,6 +3145,7 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
31433145

31443146
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
31453147
Builder.SetInsertPoint(EntryBlock);
3148+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
31463149

31473150
// Buffer: global reduction buffer.
31483151
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3203,14 +3206,13 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
32033206
Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
32043207
->addFnAttr(Attribute::NoUnwind);
32053208
Builder.CreateRetVoid();
3206-
Builder.restoreIP(OldIP);
32073209
return LtGRFunc;
32083210
}
32093211

32103212
Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
32113213
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
32123214
AttributeList FuncAttrs) {
3213-
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3215+
IRBuilder<>::InsertPointGuard IPG(Builder);
32143216
LLVMContext &Ctx = M.getContext();
32153217
FunctionType *FuncTy = FunctionType::get(
32163218
Builder.getVoidTy(),
@@ -3226,6 +3228,7 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
32263228

32273229
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
32283230
Builder.SetInsertPoint(EntryBlock);
3231+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
32293232

32303233
// Buffer: global reduction buffer.
32313234
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3311,14 +3314,13 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
33113314
}
33123315

33133316
Builder.CreateRetVoid();
3314-
Builder.restoreIP(OldIP);
33153317
return LtGCFunc;
33163318
}
33173319

33183320
Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
33193321
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
33203322
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3321-
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3323+
IRBuilder<>::InsertPointGuard IPG(Builder);
33223324
LLVMContext &Ctx = M.getContext();
33233325
auto *FuncTy = FunctionType::get(
33243326
Builder.getVoidTy(),
@@ -3334,6 +3336,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
33343336

33353337
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
33363338
Builder.SetInsertPoint(EntryBlock);
3339+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
33373340

33383341
// Buffer: global reduction buffer.
33393342
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3394,7 +3397,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
33943397
Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
33953398
->addFnAttr(Attribute::NoUnwind);
33963399
Builder.CreateRetVoid();
3397-
Builder.restoreIP(OldIP);
33983400
return LtGRFunc;
33993401
}
34003402

@@ -3407,6 +3409,7 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
34073409
Expected<Function *> OpenMPIRBuilder::createReductionFunction(
34083410
StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
34093411
ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3412+
IRBuilder<>::InsertPointGuard IPG(Builder);
34103413
auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
34113414
{Builder.getPtrTy(), Builder.getPtrTy()},
34123415
/* IsVarArg */ false);
@@ -3419,6 +3422,7 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
34193422
BasicBlock *EntryBB =
34203423
BasicBlock::Create(M.getContext(), "entry", ReductionFunc);
34213424
Builder.SetInsertPoint(EntryBB);
3425+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
34223426

34233427
// Need to alloca memory here and deal with the pointers before getting
34243428
// LHS/RHS pointers out
@@ -3746,10 +3750,12 @@ static Error populateReductionFunction(
37463750
Function *ReductionFunc,
37473751
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
37483752
IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
3753+
IRBuilder<>::InsertPointGuard IPG(Builder);
37493754
Module *Module = ReductionFunc->getParent();
37503755
BasicBlock *ReductionFuncBlock =
37513756
BasicBlock::Create(Module->getContext(), "", ReductionFunc);
37523757
Builder.SetInsertPoint(ReductionFuncBlock);
3758+
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
37533759
Value *LHSArrayPtr = nullptr;
37543760
Value *RHSArrayPtr = nullptr;
37553761
if (IsGPU) {
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
4+
omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
5+
omp.declare_reduction @add_reduction_i32 : i32 init {
6+
^bb0(%arg0: i32 loc("test.f90":8:7)):
7+
%0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc2)
8+
omp.yield(%0 : i32) loc(#loc2)
9+
} combiner {
10+
^bb0(%arg0: i32 loc("test.f90":8:7), %arg1: i32 loc("test.f90":8:7)):
11+
%0 = llvm.add %arg0, %arg1 : i32 loc(#loc2)
12+
omp.yield(%0 : i32) loc(#loc2)
13+
} loc(#loc2)
14+
llvm.func @_QQmain() {
15+
%0 = llvm.mlir.constant(1 : i64) : i64 loc(#loc4)
16+
%1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr<5> loc(#loc4)
17+
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
18+
%3 = llvm.mlir.constant(1 : i64) : i64 loc(#loc1)
19+
%4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc1)
20+
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc1)
21+
%6 = llvm.mlir.constant(8191 : index) : i64 loc(#loc5)
22+
%7 = llvm.mlir.constant(0 : index) : i64 loc(#loc5)
23+
%8 = llvm.mlir.constant(1 : index) : i64 loc(#loc5)
24+
%9 = llvm.mlir.constant(0 : i32) : i32 loc(#loc5)
25+
%10 = llvm.mlir.constant(8192 : index) : i64 loc(#loc5)
26+
%11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr<1> loc(#loc6)
27+
%12 = llvm.addrspacecast %11 : !llvm.ptr<1> to !llvm.ptr loc(#loc6)
28+
llvm.store %9, %2 : i32, !llvm.ptr loc(#loc7)
29+
%15 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} loc(#loc4)
30+
%16 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc7)
31+
%17 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64) loc(#loc7)
32+
%18 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.array<8192 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%17) -> !llvm.ptr {name = "arr"} loc(#loc7)
33+
omp.target map_entries(%15 -> %arg0, %16 -> %arg1, %18 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
34+
%19 = llvm.mlir.constant(8192 : i32) : i32 loc(#loc5)
35+
%20 = llvm.mlir.constant(1 : i32) : i32 loc(#loc5)
36+
%21 = llvm.mlir.constant(8192 : index) : i64 loc(#loc6)
37+
omp.teams reduction(@add_reduction_i32 %arg0 -> %arg3 : !llvm.ptr) {
38+
omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
39+
omp.distribute {
40+
omp.wsloop reduction(@add_reduction_i32 %arg3 -> %arg5 : !llvm.ptr) {
41+
omp.loop_nest (%arg6) : i32 = (%20) to (%19) inclusive step (%20) {
42+
llvm.store %arg6, %arg4 : i32, !llvm.ptr loc(#loc2)
43+
%22 = llvm.load %arg5 : !llvm.ptr -> i32 loc(#loc8)
44+
%23 = llvm.load %arg4 : !llvm.ptr -> i32 loc(#loc8)
45+
%34 = llvm.add %22, %23 : i32 loc(#loc8)
46+
llvm.store %34, %arg5 : i32, !llvm.ptr loc(#loc8)
47+
omp.yield loc(#loc2)
48+
} loc(#loc2)
49+
} {omp.composite} loc(#loc2)
50+
} {omp.composite} loc(#loc2)
51+
omp.terminator loc(#loc2)
52+
} {omp.composite} loc(#loc2)
53+
omp.terminator loc(#loc2)
54+
} loc(#loc2)
55+
omp.terminator loc(#loc2)
56+
} loc(#loc13)
57+
llvm.return loc(#loc9)
58+
} loc(#loc12)
59+
llvm.mlir.global internal @_QFEarr() {addr_space = 1 : i32} : !llvm.array<8192 x i32> {
60+
%0 = llvm.mlir.zero : !llvm.array<8192 x i32> loc(#loc6)
61+
llvm.return %0 : !llvm.array<8192 x i32> loc(#loc6)
62+
} loc(#loc6)
63+
} loc(#loc)
64+
65+
#loc = loc("test.f90":4:18)
66+
#loc1 = loc("test.f90":4:18)
67+
#loc2 = loc("test.f90":8:7)
68+
#loc3 = loc("test.f90":1:7)
69+
#loc4 = loc("test.f90":3:18)
70+
#loc5 = loc(unknown)
71+
#loc6 = loc("test.f90":5:18)
72+
#loc7 = loc("test.f90":6:7)
73+
#loc8 = loc("test.f90":10:7)
74+
#loc9 = loc("test.f90":16:7)
75+
76+
#di_file = #llvm.di_file<"target7.f90" in "">
77+
#di_null_type = #llvm.di_null_type
78+
#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>,
79+
sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang",
80+
isOptimized = false, emissionKind = LineTablesOnly>
81+
#di_subroutine_type = #llvm.di_subroutine_type<
82+
callingConvention = DW_CC_program, types = #di_null_type>
83+
#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>,
84+
compileUnit = #di_compile_unit, scope = #di_file, name = "main",
85+
file = #di_file, subprogramFlags = "Definition|MainSubprogram",
86+
type = #di_subroutine_type>
87+
#di_subprogram1 = #llvm.di_subprogram<compileUnit = #di_compile_unit,
88+
name = "target", file = #di_file, subprogramFlags = "Definition",
89+
type = #di_subroutine_type>
90+
91+
92+
#loc12 = loc(fused<#di_subprogram>[#loc3])
93+
#loc13 = loc(fused<#di_subprogram1>[#loc2])
94+
95+
// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func
96+
// CHECK-NOT: !dbg
97+
// CHECK: }
98+
// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func
99+
// CHECK-NOT: !dbg
100+
// CHECK: }
101+
// CHECK-DAG: define internal void @"__omp_offloading_{{.*}}__QQmain_l8_omp$reduction$reduction_func.1"
102+
// CHECK-NOT: !dbg
103+
// CHECK: }
104+
// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func.2
105+
// CHECK-NOT: !dbg
106+
// CHECK: }
107+
// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func.3
108+
// CHECK-NOT: !dbg
109+
// CHECK: }
110+
// CHECK-DAG: define internal void @_omp_reduction_list_to_global_copy_func
111+
// CHECK-NOT: !dbg
112+
// CHECK: }
113+
// CHECK-DAG: define internal void @_omp_reduction_list_to_global_reduce_func
114+
// CHECK-NOT: !dbg
115+
// CHECK: }
116+
// CHECK-DAG: define internal void @_omp_reduction_global_to_list_copy_func
117+
// CHECK-NOT: !dbg
118+
// CHECK: }
119+
// CHECK-DAG: define internal void @_omp_reduction_global_to_list_reduce_func
120+
// CHECK-NOT: !dbg
121+
// CHECK: }

0 commit comments

Comments
 (0)