Skip to content

Commit ffcac69

Browse files
committed
[InstCombine] Add limit for expansion of gep chains
When converting gep subtraction / comparison to offset subtraction / comparison, avoid expanding very long multi-use gep chains. Another improvement we could make is to expand one-use followed by multi-use gep differently, by rewriting the multi-use gep to include the one-use offsets. But I think we want to have some kind of complexity cut-off in any case.
1 parent cc6a864 commit ffcac69

File tree

5 files changed

+167
-2
lines changed

5 files changed

+167
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2144,13 +2144,35 @@ CommonPointerBase CommonPointerBase::compute(Value *LHS, Value *RHS) {
21442144
return Base;
21452145
}
21462146

2147+
bool CommonPointerBase::isExpensive() const {
2148+
bool SeenConst = false;
2149+
unsigned NumGEPs = 0;
2150+
auto ProcessGEPs = [&SeenConst, &NumGEPs](ArrayRef<GEPOperator *> GEPs) {
2151+
bool SeenMultiUse = false;
2152+
for (GEPOperator *GEP : GEPs) {
2153+
// Only count GEPs after the first multi-use GEP. For the first one,
2154+
// we will directly reuse the offset.
2155+
if (SeenMultiUse) {
2156+
bool IsConst = GEP->hasAllConstantIndices();
2157+
SeenConst |= IsConst;
2158+
NumGEPs += !IsConst;
2159+
}
2160+
SeenMultiUse |= !GEP->hasOneUse();
2161+
}
2162+
};
2163+
ProcessGEPs(LHSGEPs);
2164+
ProcessGEPs(RHSGEPs);
2165+
NumGEPs += SeenConst;
2166+
return NumGEPs > 2;
2167+
}
2168+
21472169
/// Optimize pointer differences into the same array into a size. Consider:
21482170
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
21492171
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
21502172
Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
21512173
Type *Ty, bool IsNUW) {
21522174
CommonPointerBase Base = CommonPointerBase::compute(LHS, RHS);
2153-
if (!Base.Ptr)
2175+
if (!Base.Ptr || Base.isExpensive())
21542176
return nullptr;
21552177

21562178
// To avoid duplicating the offset arithmetic, rewrite the GEP to use the

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
709709
};
710710

711711
CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS);
712-
if (Base.Ptr == RHS && CanFold(Base.LHSNW)) {
712+
if (Base.Ptr == RHS && CanFold(Base.LHSNW) && !Base.isExpensive()) {
713713
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
714714
Type *IdxTy = DL.getIndexType(GEPLHS->getType());
715715
Value *Offset =

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,9 @@ struct CommonPointerBase {
896896
GEPNoWrapFlags RHSNW = GEPNoWrapFlags::all();
897897

898898
static CommonPointerBase compute(Value *LHS, Value *RHS);
899+
900+
/// Whether expanding the GEP chains is expensive.
901+
bool isExpensive() const;
899902
};
900903

901904
} // end namespace llvm

llvm/test/Transforms/InstCombine/icmp-gep.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,3 +849,91 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) {
849849
%cmp = icmp ugt ptr %gep2, %base
850850
ret i1 %cmp
851851
}
852+
853+
define i1 @gep_multiple_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3) {
854+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit(
855+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
856+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
857+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
858+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
859+
; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
860+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
861+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
862+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
863+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP3_IDX]]
864+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
865+
; CHECK-NEXT: ret i1 [[CMP]]
866+
;
867+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
868+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
869+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
870+
call void @use(ptr %gep3)
871+
%cmp = icmp eq ptr %gep3, %base
872+
ret i1 %cmp
873+
}
874+
875+
define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
876+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep(
877+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
878+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
879+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
880+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
881+
; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
882+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
883+
; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = mul i64 [[IDX4:%.*]], -4
884+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
885+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
886+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]]
887+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]]
888+
; CHECK-NEXT: ret i1 [[CMP]]
889+
;
890+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
891+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
892+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
893+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx4
894+
call void @use(ptr %gep3)
895+
%cmp = icmp eq ptr %gep4, %base
896+
ret i1 %cmp
897+
}
898+
899+
define i1 @gep_multiple_multi_use_below_limit_consts(ptr %base, i64 %idx1, i64 %idx2) {
900+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit_consts(
901+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 16
902+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
903+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
904+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 16
905+
; CHECK-NEXT: [[GEP4_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
906+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP4_IDX]]
907+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
908+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP2_IDX]], 32
909+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP4_IDX]]
910+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
911+
; CHECK-NEXT: ret i1 [[CMP]]
912+
;
913+
%gep1 = getelementptr i32, ptr %base, i64 4
914+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx1
915+
%gep3 = getelementptr i32, ptr %gep2, i64 4
916+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx2
917+
call void @use(ptr %gep4)
918+
%cmp = icmp eq ptr %gep4, %base
919+
ret i1 %cmp
920+
}
921+
922+
define i1 @gep_multiple_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
923+
; CHECK-LABEL: @gep_multiple_multi_use_above_limit(
924+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
925+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX2:%.*]]
926+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[GEP2]], i64 [[IDX3:%.*]]
927+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX4:%.*]]
928+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
929+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP4]], [[BASE]]
930+
; CHECK-NEXT: ret i1 [[CMP]]
931+
;
932+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
933+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
934+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
935+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx4
936+
call void @use(ptr %gep4)
937+
%cmp = icmp eq ptr %gep4, %base
938+
ret i1 %cmp
939+
}

llvm/test/Transforms/InstCombine/sub-gep.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,3 +1158,55 @@ define i64 @nuw_ptrdiff_mul_nsw_nneg_scale_multiuse(ptr %base, i64 %idx) {
11581158
%diff = sub nuw i64 %lhs, %rhs
11591159
ret i64 %diff
11601160
}
1161+
1162+
define i64 @multiple_geps_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
1163+
; CHECK-LABEL: @multiple_geps_multi_use_below_limit(
1164+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
1165+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
1166+
; CHECK-NEXT: call void @use(ptr [[P2]])
1167+
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
1168+
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
1169+
; CHECK-NEXT: call void @use(ptr [[P4]])
1170+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IDX1]], [[IDX2]]
1171+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[IDX3]], [[IDX4]]
1172+
; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
1173+
; CHECK-NEXT: ret i64 [[GEPDIFF]]
1174+
;
1175+
%p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
1176+
%p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
1177+
call void @use(ptr %p2)
1178+
%p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
1179+
%p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
1180+
call void @use(ptr %p4)
1181+
%i1 = ptrtoint ptr %p4 to i64
1182+
%i2 = ptrtoint ptr %p2 to i64
1183+
%d = sub i64 %i2, %i1
1184+
ret i64 %d
1185+
}
1186+
1187+
define i64 @multiple_geps_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5) {
1188+
; CHECK-LABEL: @multiple_geps_multi_use_above_limit(
1189+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
1190+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
1191+
; CHECK-NEXT: call void @use(ptr [[P2]])
1192+
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
1193+
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
1194+
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P4]], i64 [[IDX5:%.*]]
1195+
; CHECK-NEXT: call void @use(ptr [[P5]])
1196+
; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[P5]] to i64
1197+
; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[P2]] to i64
1198+
; CHECK-NEXT: [[D:%.*]] = sub i64 [[I2]], [[I1]]
1199+
; CHECK-NEXT: ret i64 [[D]]
1200+
;
1201+
%p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
1202+
%p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
1203+
call void @use(ptr %p2)
1204+
%p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
1205+
%p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
1206+
%p5 = getelementptr inbounds nuw i8, ptr %p4, i64 %idx5
1207+
call void @use(ptr %p5)
1208+
%i1 = ptrtoint ptr %p5 to i64
1209+
%i2 = ptrtoint ptr %p2 to i64
1210+
%d = sub i64 %i2, %i1
1211+
ret i64 %d
1212+
}

0 commit comments

Comments
 (0)