Skip to content

Commit b8917ac

Browse files
committed
[LICM] Reassociate GEPs to allow hoisting
Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if this would make the inner GEP loop invariant and thus hoistable. This is intended to replace an InstCombine fold that does this (in https://github.com/llvm/llvm-project/blob/04f61fb73dc6a994ab267d431f2fdaedc67430ff/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp#L2006). The problem with the InstCombine fold is that LoopInfo is an optional dependency, so it is not performed reliably. Differential Revision: https://reviews.llvm.org/D146813
1 parent cd91992 commit b8917ac

File tree

3 files changed

+135
-24
lines changed

3 files changed

+135
-24
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
173173
/// Aggregates various functions for hoisting computations out of loop.
174174
static bool hoistArithmetics(Instruction &I, Loop &L,
175175
ICFLoopSafetyInfo &SafetyInfo,
176-
MemorySSAUpdater &MSSAU);
176+
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
177+
DominatorTree *DT);
177178
/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
178179
/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
179180
/// minimun can be computed outside of loop, and X is not a loop-invariant.
@@ -989,7 +990,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
989990

990991
// Try to reassociate instructions so that part of computations can be
991992
// done out of loop.
992-
if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU)) {
993+
if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) {
993994
Changed = true;
994995
continue;
995996
}
@@ -2495,16 +2496,71 @@ static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
24952496
return true;
24962497
}
24972498

2499+
/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2500+
/// this allows hoisting the inner GEP.
2501+
static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2502+
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2503+
DominatorTree *DT) {
2504+
auto *GEP = dyn_cast<GetElementPtrInst>(&I);
2505+
if (!GEP)
2506+
return false;
2507+
2508+
auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
2509+
if (!Src || !Src->hasOneUse() || !L.contains(Src))
2510+
return false;
2511+
2512+
Value *SrcPtr = Src->getPointerOperand();
2513+
auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
2514+
if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
2515+
return false;
2516+
2517+
assert(!all_of(Src->indices(), LoopInvariant) &&
2518+
"Would have been hoisted already");
2519+
2520+
// The swapped GEPs are inbounds if both original GEPs are inbounds
2521+
// and the sign of the offsets is the same. For simplicity, only
2522+
// handle both offsets being non-negative.
2523+
const DataLayout &DL = GEP->getModule()->getDataLayout();
2524+
auto NonNegative = [&](Value *V) {
2525+
return isKnownNonNegative(V, DL, 0, AC, GEP, DT);
2526+
};
2527+
bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2528+
all_of(Src->indices(), NonNegative) &&
2529+
all_of(GEP->indices(), NonNegative);
2530+
2531+
BasicBlock *Preheader = L.getLoopPreheader();
2532+
IRBuilder<> Builder(Preheader->getTerminator());
2533+
Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr,
2534+
SmallVector<Value *>(GEP->indices()),
2535+
"invariant.gep", IsInBounds);
2536+
Builder.SetInsertPoint(GEP);
2537+
Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc,
2538+
SmallVector<Value *>(Src->indices()), "gep",
2539+
IsInBounds);
2540+
GEP->replaceAllUsesWith(NewGEP);
2541+
eraseInstruction(*GEP, SafetyInfo, MSSAU);
2542+
eraseInstruction(*Src, SafetyInfo, MSSAU);
2543+
return true;
2544+
}
2545+
24982546
static bool hoistArithmetics(Instruction &I, Loop &L,
24992547
ICFLoopSafetyInfo &SafetyInfo,
2500-
MemorySSAUpdater &MSSAU) {
2548+
MemorySSAUpdater &MSSAU,
2549+
AssumptionCache *AC, DominatorTree *DT) {
25012550
// Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
25022551
// into (x < min(INV1, INV2)), and hoisting the invariant part of this
25032552
// expression out of the loop.
25042553
if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
25052554
++NumMinMaxHoisted;
25062555
return true;
25072556
}
2557+
2558+
// Try to hoist GEPs by reassociation.
2559+
if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
2560+
++NumHoisted;
2561+
return true;
2562+
}
2563+
25082564
return false;
25092565
}
25102566

llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@ define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA
4141
; CHECK-NEXT: .p2align 4
4242
; CHECK-NEXT: .LBB0_4: # %if.end9
4343
; CHECK-NEXT: #
44-
; CHECK-NEXT: add 9, 3, 9
45-
; CHECK-NEXT: lwz 10, 4(9)
44+
; CHECK-NEXT: lwzx 10, 7, 9
4645
; CHECK-NEXT: addi 10, 10, 1
47-
; CHECK-NEXT: stw 10, 4(9)
46+
; CHECK-NEXT: stwx 10, 7, 9
4847
; CHECK-NEXT: b .LBB0_1
4948
; CHECK-NEXT: .LBB0_5: # %if.then
5049
; CHECK-NEXT: lwax 3, 9, 3

llvm/test/Transforms/LICM/gep-reassociate.ll

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ define void @only_one_inbounds(ptr %ptr, i1 %c, i32 %arg) {
1111
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i32 [[ARG:%.*]]) {
1212
; CHECK-NEXT: entry:
1313
; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG]] to i64
14+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[ARG_EXT]]
1415
; CHECK-NEXT: br label [[LOOP:%.*]]
1516
; CHECK: loop:
1617
; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32()
1718
; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64
18-
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]]
19-
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 [[ARG_EXT]]
20-
; CHECK-NEXT: call void @use(ptr [[PTR3]])
19+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]]
20+
; CHECK-NEXT: call void @use(ptr [[GEP]])
2121
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
2222
; CHECK: exit:
2323
; CHECK-NEXT: ret void
@@ -42,13 +42,13 @@ define void @both_inbounds_one_neg(ptr %ptr, i1 %c) {
4242
; CHECK-LABEL: define void @both_inbounds_one_neg
4343
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) {
4444
; CHECK-NEXT: entry:
45+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 -1
4546
; CHECK-NEXT: br label [[LOOP:%.*]]
4647
; CHECK: loop:
4748
; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32()
4849
; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64
49-
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]]
50-
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 -1
51-
; CHECK-NEXT: call void @use(ptr [[PTR3]])
50+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]]
51+
; CHECK-NEXT: call void @use(ptr [[GEP]])
5252
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
5353
; CHECK: exit:
5454
; CHECK-NEXT: ret void
@@ -72,13 +72,13 @@ define void @both_inbounds_pos(ptr %ptr, i1 %c) {
7272
; CHECK-LABEL: define void @both_inbounds_pos
7373
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) {
7474
; CHECK-NEXT: entry:
75+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
7576
; CHECK-NEXT: br label [[LOOP:%.*]]
7677
; CHECK: loop:
7778
; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32()
7879
; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64
79-
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]]
80-
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR2]], i64 1
81-
; CHECK-NEXT: call void @use(ptr [[PTR3]])
80+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]]
81+
; CHECK-NEXT: call void @use(ptr [[GEP]])
8282
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
8383
; CHECK: exit:
8484
; CHECK-NEXT: ret void
@@ -102,12 +102,12 @@ define void @different_elem_types(ptr %ptr, i1 %c, i64 %arg) {
102102
; CHECK-LABEL: define void @different_elem_types
103103
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG:%.*]]) {
104104
; CHECK-NEXT: entry:
105+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i64, ptr [[PTR]], i64 [[ARG]]
105106
; CHECK-NEXT: br label [[LOOP:%.*]]
106107
; CHECK: loop:
107108
; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64()
108-
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[VAL]]
109-
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i64, ptr [[PTR2]], i64 [[ARG]]
110-
; CHECK-NEXT: call void @use(ptr [[PTR3]])
109+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[VAL]]
110+
; CHECK-NEXT: call void @use(ptr [[GEP]])
111111
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
112112
; CHECK: exit:
113113
; CHECK-NEXT: ret void
@@ -126,6 +126,62 @@ exit:
126126
ret void
127127
}
128128

129+
define void @different_index_types(ptr %ptr, i1 %c, i32 %arg) {
130+
; CHECK-LABEL: define void @different_index_types
131+
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i32 [[ARG:%.*]]) {
132+
; CHECK-NEXT: entry:
133+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[ARG]]
134+
; CHECK-NEXT: br label [[LOOP:%.*]]
135+
; CHECK: loop:
136+
; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64()
137+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL]]
138+
; CHECK-NEXT: call void @use(ptr [[GEP]])
139+
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
140+
; CHECK: exit:
141+
; CHECK-NEXT: ret void
142+
;
143+
entry:
144+
br label %loop
145+
146+
loop:
147+
%val = call i64 @get.i64()
148+
%ptr2 = getelementptr i8, ptr %ptr, i64 %val
149+
%ptr3 = getelementptr i8, ptr %ptr2, i32 %arg
150+
call void @use(ptr %ptr3)
151+
br i1 %c, label %loop, label %exit
152+
153+
exit:
154+
ret void
155+
}
156+
157+
define void @different_index_count(ptr %ptr, i1 %c, i64 %arg1, i64 %arg2) {
158+
; CHECK-LABEL: define void @different_index_count
159+
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) {
160+
; CHECK-NEXT: entry:
161+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]]
162+
; CHECK-NEXT: br label [[LOOP:%.*]]
163+
; CHECK: loop:
164+
; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64()
165+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL]]
166+
; CHECK-NEXT: call void @use(ptr [[GEP]])
167+
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
168+
; CHECK: exit:
169+
; CHECK-NEXT: ret void
170+
;
171+
entry:
172+
br label %loop
173+
174+
loop:
175+
%val = call i64 @get.i64()
176+
%ptr2 = getelementptr i8, ptr %ptr, i64 %val
177+
%ptr3 = getelementptr [0 x i8], ptr %ptr2, i64 %arg1, i64 %arg2
178+
call void @use(ptr %ptr3)
179+
br i1 %c, label %loop, label %exit
180+
181+
exit:
182+
ret void
183+
}
184+
129185
define void @src_has_extra_use(ptr %ptr, i1 %c, i64 %arg) {
130186
; CHECK-LABEL: define void @src_has_extra_use
131187
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG:%.*]]) {
@@ -246,13 +302,13 @@ define void @multiple_indices(ptr %ptr, i1 %c, i64 %arg1, i64 %arg2) {
246302
; CHECK-LABEL: define void @multiple_indices
247303
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) {
248304
; CHECK-NEXT: entry:
305+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]]
249306
; CHECK-NEXT: br label [[LOOP:%.*]]
250307
; CHECK: loop:
251308
; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64()
252309
; CHECK-NEXT: [[VAL2:%.*]] = call i64 @get.i64()
253-
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]]
254-
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]]
255-
; CHECK-NEXT: call void @use(ptr [[PTR3]])
310+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [0 x i8], ptr [[INVARIANT_GEP]], i64 [[VAL1]], i64 [[VAL2]]
311+
; CHECK-NEXT: call void @use(ptr [[GEP]])
256312
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
257313
; CHECK: exit:
258314
; CHECK-NEXT: ret void
@@ -308,12 +364,12 @@ define void @multiple_indices_very_invariant(ptr %ptr, i1 %c, i64 %arg1, i64 %ar
308364
; CHECK-LABEL: define void @multiple_indices_very_invariant
309365
; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]], i64 [[ARG3:%.*]]) {
310366
; CHECK-NEXT: entry:
367+
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]]
311368
; CHECK-NEXT: br label [[LOOP:%.*]]
312369
; CHECK: loop:
313370
; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64()
314-
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG3]], i64 [[VAL1]]
315-
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]]
316-
; CHECK-NEXT: call void @use(ptr [[PTR3]])
371+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [0 x i8], ptr [[INVARIANT_GEP]], i64 [[ARG3]], i64 [[VAL1]]
372+
; CHECK-NEXT: call void @use(ptr [[GEP]])
317373
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
318374
; CHECK: exit:
319375
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)