Skip to content

Commit 1ab0e7d

Browse files
[LICM] Hoisting writeonly calls (#143799)
Adds support for hoisting `writeonly` calls in LICM. This patch adds a missing optimization that allows hoisting of `writeonly` function calls out of loops when it is safe to do so. Previously, such calls were conservatively retained inside the loop body, and the redundant calls were only reduced through unrolling, relying on target-dependent heuristics. Closes #143267 Testing: - Modified previously negative tests for hoisting writeonly calls to be instead positive - Added test cases for hoisting of two writeonly calls where the pointers do/do not alias - Added a test case for not argmemonly writeonly calls.
1 parent 650b451 commit 1ab0e7d

File tree

3 files changed

+211
-58
lines changed

3 files changed

+211
-58
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 80 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ static bool isSafeToExecuteUnconditionally(
186186
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
187187
OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
188188
AssumptionCache *AC, bool AllowSpeculation);
189+
static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
190+
AAResults *AA, Loop *CurLoop,
191+
SinkAndHoistLICMFlags &Flags);
189192
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
190193
Loop *CurLoop, Instruction &I,
191194
SinkAndHoistLICMFlags &Flags,
@@ -1234,8 +1237,11 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
12341237
/*InvariantGroup=*/false);
12351238
}
12361239

1237-
// FIXME: This should use mod/ref information to see if we can hoist or
1238-
// sink the call.
1240+
if (Behavior.onlyWritesMemory()) {
1241+
// can hoist or sink if there are no conflicting read/writes to the
1242+
// memory location written to by the call.
1243+
return noConflictingReadWrites(CI, MSSA, AA, CurLoop, Flags);
1244+
}
12391245

12401246
return false;
12411247
} else if (auto *FI = dyn_cast<FenceInst>(&I)) {
@@ -1253,57 +1259,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
12531259
// arbitrary number of reads in the loop.
12541260
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
12551261
return true;
1256-
// If there are more accesses than the Promotion cap, then give up as we're
1257-
// not walking a list that long.
1258-
if (Flags.tooManyMemoryAccesses())
1259-
return false;
1260-
1261-
auto *SIMD = MSSA->getMemoryAccess(SI);
1262-
BatchAAResults BAA(*AA);
1263-
auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
1264-
// Make sure there are no clobbers inside the loop.
1265-
if (!MSSA->isLiveOnEntryDef(Source) &&
1266-
CurLoop->contains(Source->getBlock()))
1267-
return false;
1268-
1269-
// If there are interfering Uses (i.e. their defining access is in the
1270-
// loop), or ordered loads (stored as Defs!), don't move this store.
1271-
// Could do better here, but this is conservatively correct.
1272-
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
1273-
// moving accesses. Can also extend to dominating uses.
1274-
for (auto *BB : CurLoop->getBlocks())
1275-
if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1276-
for (const auto &MA : *Accesses)
1277-
if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1278-
auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
1279-
const_cast<MemoryUse *>(MU));
1280-
if (!MSSA->isLiveOnEntryDef(MD) &&
1281-
CurLoop->contains(MD->getBlock()))
1282-
return false;
1283-
// Disable hoisting past potentially interfering loads. Optimized
1284-
// Uses may point to an access outside the loop, as getClobbering
1285-
// checks the previous iteration when walking the backedge.
1286-
// FIXME: More precise: no Uses that alias SI.
1287-
if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
1288-
return false;
1289-
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1290-
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1291-
(void)LI; // Silence warning.
1292-
assert(!LI->isUnordered() && "Expected unordered load");
1293-
return false;
1294-
}
1295-
// Any call, while it may not be clobbering SI, it may be a use.
1296-
if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1297-
// Check if the call may read from the memory location written
1298-
// to by SI. Check CI's attributes and arguments; the number of
1299-
// such checks performed is limited above by NoOfMemAccTooLarge.
1300-
ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
1301-
if (isModOrRefSet(MRI))
1302-
return false;
1303-
}
1304-
}
1305-
}
1306-
return true;
1262+
return noConflictingReadWrites(SI, MSSA, AA, CurLoop, Flags);
13071263
}
13081264

13091265
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@@ -2330,6 +2286,77 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
23302286
return Result;
23312287
}
23322288

2289+
// For a given store instruction or writeonly call instruction, this function
2290+
// checks that there are no read or writes that conflict with the memory
2291+
// access in the instruction
2292+
static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
2293+
AAResults *AA, Loop *CurLoop,
2294+
SinkAndHoistLICMFlags &Flags) {
2295+
assert(isa<CallInst>(*I) || isa<StoreInst>(*I));
2296+
// If there are more accesses than the Promotion cap, then give up as we're
2297+
// not walking a list that long.
2298+
if (Flags.tooManyMemoryAccesses())
2299+
return false;
2300+
2301+
auto *IMD = MSSA->getMemoryAccess(I);
2302+
BatchAAResults BAA(*AA);
2303+
auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD);
2304+
// Make sure there are no clobbers inside the loop.
2305+
if (!MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()))
2306+
return false;
2307+
2308+
// If there are interfering Uses (i.e. their defining access is in the
2309+
// loop), or ordered loads (stored as Defs!), don't move this store.
2310+
// Could do better here, but this is conservatively correct.
2311+
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
2312+
// moving accesses. Can also extend to dominating uses.
2313+
for (auto *BB : CurLoop->getBlocks()) {
2314+
auto *Accesses = MSSA->getBlockAccesses(BB);
2315+
if (!Accesses)
2316+
continue;
2317+
for (const auto &MA : *Accesses)
2318+
if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
2319+
auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
2320+
const_cast<MemoryUse *>(MU));
2321+
if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()))
2322+
return false;
2323+
// Disable hoisting past potentially interfering loads. Optimized
2324+
// Uses may point to an access outside the loop, as getClobbering
2325+
// checks the previous iteration when walking the backedge.
2326+
// FIXME: More precise: no Uses that alias I.
2327+
if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU))
2328+
return false;
2329+
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
2330+
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
2331+
(void)LI; // Silence warning.
2332+
assert(!LI->isUnordered() && "Expected unordered load");
2333+
return false;
2334+
}
2335+
// Any call, while it may not be clobbering I, it may be a use.
2336+
if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
2337+
// Check if the call may read from the memory location written
2338+
// to by I. Check CI's attributes and arguments; the number of
2339+
// such checks performed is limited above by NoOfMemAccTooLarge.
2340+
if (auto *SI = dyn_cast<StoreInst>(I)) {
2341+
ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
2342+
if (isModOrRefSet(MRI))
2343+
return false;
2344+
} else {
2345+
auto *SCI = cast<CallInst>(I);
2346+
// If the instruction we are wanting to hoist is also a call
2347+
// instruction then we need not check mod/ref info with itself
2348+
if (SCI == CI)
2349+
continue;
2350+
ModRefInfo MRI = BAA.getModRefInfo(CI, SCI);
2351+
if (isModOrRefSet(MRI))
2352+
return false;
2353+
}
2354+
}
2355+
}
2356+
}
2357+
return true;
2358+
}
2359+
23332360
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
23342361
Loop *CurLoop, Instruction &I,
23352362
SinkAndHoistLICMFlags &Flags,

llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ define void @doesnt_need_and(i32 %arg) {
6464
; GCN-LABEL: doesnt_need_and:
6565
; GCN: ; %bb.0: ; %entry
6666
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
6768
; GCN-NEXT: s_mov_b32 s6, 0
6869
; GCN-NEXT: s_mov_b64 s[4:5], 0
6970
; GCN-NEXT: .LBB1_1: ; %loop
7071
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
7172
; GCN-NEXT: s_add_i32 s6, s6, 1
7273
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
7374
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
74-
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
7575
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
7676
; GCN-NEXT: s_cbranch_execnz .LBB1_1
7777
; GCN-NEXT: ; %bb.2: ; %loopexit

llvm/test/Transforms/LICM/call-hoisting.ll

Lines changed: 130 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,16 @@ exit:
118118

119119
declare void @store(i32 %val, ptr %p) argmemonly writeonly nounwind
120120

121+
; loop invariant calls to writeonly functions such as the above
122+
; should be hoisted
121123
define void @test(ptr %loc) {
122124
; CHECK-LABEL: define void @test(
123125
; CHECK-SAME: ptr [[LOC:%.*]]) {
124126
; CHECK-NEXT: [[ENTRY:.*]]:
127+
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
125128
; CHECK-NEXT: br label %[[LOOP:.*]]
126129
; CHECK: [[LOOP]]:
127130
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
128-
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
129131
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
130132
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
131133
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
@@ -150,10 +152,10 @@ define void @test_multiexit(ptr %loc, i1 %earlycnd) {
150152
; CHECK-LABEL: define void @test_multiexit(
151153
; CHECK-SAME: ptr [[LOC:%.*]], i1 [[EARLYCND:%.*]]) {
152154
; CHECK-NEXT: [[ENTRY:.*]]:
155+
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
153156
; CHECK-NEXT: br label %[[LOOP:.*]]
154157
; CHECK: [[LOOP]]:
155158
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[BACKEDGE:.*]] ]
156-
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
157159
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
158160
; CHECK-NEXT: br i1 [[EARLYCND]], label %[[EXIT1:.*]], label %[[BACKEDGE]]
159161
; CHECK: [[BACKEDGE]]:
@@ -183,6 +185,97 @@ exit2:
183185
ret void
184186
}
185187

188+
; cannot be hoisted because the two pointers can alias one another
189+
define void @neg_two_pointer(ptr %loc, ptr %otherloc) {
190+
; CHECK-LABEL: define void @neg_two_pointer(
191+
; CHECK-SAME: ptr [[LOC:%.*]], ptr [[OTHERLOC:%.*]]) {
192+
; CHECK-NEXT: [[ENTRY:.*]]:
193+
; CHECK-NEXT: br label %[[LOOP:.*]]
194+
; CHECK: [[LOOP]]:
195+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
196+
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
197+
; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]])
198+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
199+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
200+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
201+
; CHECK: [[EXIT]]:
202+
; CHECK-NEXT: ret void
203+
;
204+
entry:
205+
br label %loop
206+
207+
loop:
208+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
209+
call void @store(i32 0, ptr %loc)
210+
call void @store(i32 1, ptr %otherloc)
211+
%iv.next = add i32 %iv, 1
212+
%cmp = icmp slt i32 %iv, 200
213+
br i1 %cmp, label %loop, label %exit
214+
exit:
215+
ret void
216+
}
217+
218+
; hoisted due to pointers not aliasing
219+
define void @two_pointer_noalias(ptr noalias %loc, ptr noalias %otherloc) {
220+
; CHECK-LABEL: define void @two_pointer_noalias(
221+
; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
222+
; CHECK-NEXT: [[ENTRY:.*]]:
223+
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
224+
; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]])
225+
; CHECK-NEXT: br label %[[LOOP:.*]]
226+
; CHECK: [[LOOP]]:
227+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
228+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
229+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
230+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
231+
; CHECK: [[EXIT]]:
232+
; CHECK-NEXT: ret void
233+
;
234+
entry:
235+
br label %loop
236+
237+
loop:
238+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
239+
call void @store(i32 0, ptr %loc)
240+
call void @store(i32 1, ptr %otherloc)
241+
%iv.next = add i32 %iv, 1
242+
%cmp = icmp slt i32 %iv, 200
243+
br i1 %cmp, label %loop, label %exit
244+
exit:
245+
ret void
246+
}
247+
248+
; when there's a conflicting read, store call should not be hoisted
249+
define void @neg_conflicting_read(ptr noalias %loc, ptr noalias %otherloc) {
250+
; CHECK-LABEL: define void @neg_conflicting_read(
251+
; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
252+
; CHECK-NEXT: [[ENTRY:.*]]:
253+
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
254+
; CHECK-NEXT: br label %[[LOOP:.*]]
255+
; CHECK: [[LOOP]]:
256+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
257+
; CHECK-NEXT: call void @load(i32 0, ptr [[LOC]])
258+
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
259+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
260+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
261+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
262+
; CHECK: [[EXIT]]:
263+
; CHECK-NEXT: ret void
264+
;
265+
entry:
266+
call void @store(i32 0, ptr %loc)
267+
br label %loop
268+
loop:
269+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
270+
call void @load(i32 0, ptr %loc)
271+
call void @store(i32 0, ptr %loc)
272+
%iv.next = add i32 %iv, 1
273+
%cmp = icmp slt i32 %iv, 200
274+
br i1 %cmp, label %loop, label %exit
275+
exit:
276+
ret void
277+
}
278+
186279
define void @neg_lv_value(ptr %loc) {
187280
; CHECK-LABEL: define void @neg_lv_value(
188281
; CHECK-SAME: ptr [[LOC:%.*]]) {
@@ -406,14 +499,47 @@ exit:
406499
ret void
407500
}
408501

409-
define void @neg_not_argmemonly(ptr %loc) {
502+
; when the call is not argmemonly and is not the only memory access we
503+
; do not hoist
504+
define void @neg_not_argmemonly(ptr %loc, ptr %loc2) {
410505
; CHECK-LABEL: define void @neg_not_argmemonly(
411-
; CHECK-SAME: ptr [[LOC:%.*]]) {
506+
; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
412507
; CHECK-NEXT: [[ENTRY:.*]]:
413508
; CHECK-NEXT: br label %[[LOOP:.*]]
414509
; CHECK: [[LOOP]]:
415510
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
416511
; CHECK-NEXT: call void @not_argmemonly(i32 0, ptr [[LOC]])
512+
; CHECK-NEXT: call void @load(i32 0, ptr [[LOC2]])
513+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
514+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
515+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
516+
; CHECK: [[EXIT]]:
517+
; CHECK-NEXT: ret void
518+
;
519+
entry:
520+
br label %loop
521+
522+
loop:
523+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
524+
call void @not_argmemonly(i32 0, ptr %loc)
525+
call void @load(i32 0, ptr %loc2)
526+
%iv.next = add i32 %iv, 1
527+
%cmp = icmp slt i32 %iv, 200
528+
br i1 %cmp, label %loop, label %exit
529+
530+
exit:
531+
ret void
532+
}
533+
534+
; when the call is not argmemonly and is only memory access we hoist it
535+
define void @not_argmemonly_hoisted(ptr %loc, ptr %loc2) {
536+
; CHECK-LABEL: define void @not_argmemonly_hoisted(
537+
; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
538+
; CHECK-NEXT: [[ENTRY:.*]]:
539+
; CHECK-NEXT: call void @not_argmemonly(i32 0, ptr [[LOC]])
540+
; CHECK-NEXT: br label %[[LOOP:.*]]
541+
; CHECK: [[LOOP]]:
542+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
417543
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
418544
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
419545
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]

0 commit comments

Comments
 (0)