diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 1ba548b6ff062..a6bbbf371a4db 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2144,13 +2144,35 @@ CommonPointerBase CommonPointerBase::compute(Value *LHS, Value *RHS) { return Base; } +bool CommonPointerBase::isExpensive() const { + bool SeenConst = false; + unsigned NumGEPs = 0; + auto ProcessGEPs = [&SeenConst, &NumGEPs](ArrayRef GEPs) { + bool SeenMultiUse = false; + for (GEPOperator *GEP : GEPs) { + // Only count GEPs after the first multi-use GEP. For the first one, + // we will directly reuse the offset. + if (SeenMultiUse) { + bool IsConst = GEP->hasAllConstantIndices(); + SeenConst |= IsConst; + NumGEPs += !IsConst; + } + SeenMultiUse |= !GEP->hasOneUse(); + } + }; + ProcessGEPs(LHSGEPs); + ProcessGEPs(RHSGEPs); + NumGEPs += SeenConst; + return NumGEPs > 2; +} + /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty, bool IsNUW) { CommonPointerBase Base = CommonPointerBase::compute(LHS, RHS); - if (!Base.Ptr) + if (!Base.Ptr || Base.isExpensive()) return nullptr; // To avoid duplicating the offset arithmetic, rewrite the GEP to use the diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6737b50405ee2..cbe4e3718a75c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -709,7 +709,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, }; CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS); - if (Base.Ptr == RHS && CanFold(Base.LHSNW)) { + if (Base.Ptr == RHS && CanFold(Base.LHSNW) && !Base.isExpensive()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). Type *IdxTy = DL.getIndexType(GEPLHS->getType()); Value *Offset = diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9adde8094d44d..4ffc11db6856f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -896,6 +896,9 @@ struct CommonPointerBase { GEPNoWrapFlags RHSNW = GEPNoWrapFlags::all(); static CommonPointerBase compute(Value *LHS, Value *RHS); + + /// Whether expanding the GEP chains is expensive. + bool isExpensive() const; }; } // end namespace llvm diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index 3f104056fb1f2..6c7572089532a 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -849,3 +849,91 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) { %cmp = icmp ugt ptr %gep2, %base ret i1 %cmp } + +define i1 @gep_multiple_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3) { +; CHECK-LABEL: @gep_multiple_multi_use_below_limit( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]] +; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]] +; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP3]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP3_IDX]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 %idx1 + %gep2 = getelementptr i32, ptr %gep1, i64 %idx2 + %gep3 = getelementptr i32, ptr %gep2, i64 %idx3 + call void @use(ptr %gep3) + %cmp = icmp eq ptr %gep3, %base + ret i1 %cmp +} + +define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { +; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]] +; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]] +; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]] +; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = mul i64 [[IDX4:%.*]], -4 +; CHECK-NEXT: call void @use(ptr [[GEP3]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 %idx1 + %gep2 = getelementptr i32, ptr %gep1, i64 %idx2 + %gep3 = getelementptr i32, ptr %gep2, i64 %idx3 + %gep4 = getelementptr i32, ptr %gep3, i64 %idx4 + call void @use(ptr %gep3) + %cmp = icmp eq ptr %gep4, %base + ret i1 %cmp +} + +define i1 @gep_multiple_multi_use_below_limit_consts(ptr %base, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @gep_multiple_multi_use_below_limit_consts( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 16 +; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 16 +; CHECK-NEXT: [[GEP4_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP4_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP4]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP2_IDX]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP4_IDX]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 4 + %gep2 = getelementptr i32, ptr %gep1, i64 %idx1 + %gep3 = getelementptr i32, ptr %gep2, i64 4 + %gep4 = getelementptr i32, ptr %gep3, i64 %idx2 + call void @use(ptr %gep4) + %cmp = icmp eq ptr %gep4, %base + ret i1 %cmp +} + +define i1 @gep_multiple_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { +; CHECK-LABEL: @gep_multiple_multi_use_above_limit( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX2:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[GEP2]], i64 [[IDX3:%.*]] +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX4:%.*]] +; CHECK-NEXT: call void @use(ptr [[GEP4]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP4]], [[BASE]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 %idx1 + %gep2 = getelementptr i32, ptr %gep1, i64 %idx2 + %gep3 = getelementptr i32, ptr %gep2, i64 %idx3 + %gep4 = getelementptr i32, ptr %gep3, i64 %idx4 + call void @use(ptr %gep4) + %cmp = icmp eq ptr %gep4, %base + ret i1 %cmp +} diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 11af6b4a0197f..ddede0996d35c 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -1158,3 +1158,55 @@ define i64 @nuw_ptrdiff_mul_nsw_nneg_scale_multiuse(ptr %base, i64 %idx) { %diff = sub nuw i64 %lhs, %rhs ret i64 %diff } + +define i64 @multiple_geps_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { +; CHECK-LABEL: @multiple_geps_multi_use_below_limit( +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]] +; CHECK-NEXT: call void @use(ptr [[P2]]) +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]] +; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]] +; CHECK-NEXT: call void @use(ptr [[P4]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IDX1]], [[IDX2]] +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[IDX3]], [[IDX4]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1 + %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2 + call void @use(ptr %p2) + %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3 + %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4 + call void @use(ptr %p4) + %i1 = ptrtoint ptr %p4 to i64 + %i2 = ptrtoint ptr %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + +define i64 @multiple_geps_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5) { +; CHECK-LABEL: @multiple_geps_multi_use_above_limit( +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]] +; CHECK-NEXT: call void @use(ptr [[P2]]) +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]] +; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]] +; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P4]], i64 [[IDX5:%.*]] +; CHECK-NEXT: call void @use(ptr [[P5]]) +; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[P5]] to i64 +; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[P2]] to i64 +; CHECK-NEXT: [[D:%.*]] = sub i64 [[I2]], [[I1]] +; CHECK-NEXT: ret i64 [[D]] +; + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1 + %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2 + call void @use(ptr %p2) + %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3 + %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4 + %p5 = getelementptr inbounds nuw i8, ptr %p4, i64 %idx5 + call void @use(ptr %p5) + %i1 = ptrtoint ptr %p5 to i64 + %i2 = ptrtoint ptr %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +}