Skip to content

Commit d3d77f7

Browse files
authored
[EarlyCSE,TTI] Don't create new, unused, instructions. (#134534)
getOrCreateResultFromMemIntrinsic can modify the current function by inserting new instructions without EarlyCSE keeping track of the changes. Introduce a new CanCreate argument, and update the function to only create new instructions when CanCreate = true. Use it when appropriate. Fixes #145183
1 parent d3270ad commit d3d77f7

File tree

7 files changed

+46
-23
lines changed

7 files changed

+46
-23
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,12 +1702,13 @@ class TargetTransformInfo {
17021702
/// unordered-atomic memory intrinsic.
17031703
LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const;
17041704

1705-
/// \returns A value which is the result of the given memory intrinsic. New
1706-
/// instructions may be created to extract the result from the given intrinsic
1707-
/// memory operation. Returns nullptr if the target cannot create a result
1708-
/// from the given intrinsic.
1709-
LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1710-
Type *ExpectedType) const;
1705+
/// \returns A value which is the result of the given memory intrinsic. If \p
1706+
/// CanCreate is true, new instructions may be created to extract the result
1707+
/// from the given intrinsic memory operation. Returns nullptr if the target
1708+
/// cannot create a result from the given intrinsic.
1709+
LLVM_ABI Value *
1710+
getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
1711+
bool CanCreate = true) const;
17111712

17121713
/// \returns The type to use in a loop expansion of a memcpy call.
17131714
LLVM_ABI Type *getMemcpyLoopLoweringType(

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -983,8 +983,9 @@ class TargetTransformInfoImplBase {
983983
return 0;
984984
}
985985

986-
virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
987-
Type *ExpectedType) const {
986+
virtual Value *
987+
getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
988+
bool CanCreate = true) const {
988989
return nullptr;
989990
}
990991

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,8 +1294,9 @@ unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
12941294
}
12951295

12961296
Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
1297-
IntrinsicInst *Inst, Type *ExpectedType) const {
1298-
return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1297+
IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate) const {
1298+
return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType,
1299+
CanCreate);
12991300
}
13001301

13011302
Type *TargetTransformInfo::getMemcpyLoopLoweringType(

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4967,9 +4967,9 @@ void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
49674967
BaseT::getPeelingPreferences(L, SE, PP);
49684968
}
49694969

4970-
Value *
4971-
AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
4972-
Type *ExpectedType) const {
4970+
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
4971+
Type *ExpectedType,
4972+
bool CanCreate) const {
49734973
switch (Inst->getIntrinsicID()) {
49744974
default:
49754975
return nullptr;
@@ -4978,7 +4978,7 @@ AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
49784978
case Intrinsic::aarch64_neon_st4: {
49794979
// Create a struct type
49804980
StructType *ST = dyn_cast<StructType>(ExpectedType);
4981-
if (!ST)
4981+
if (!CanCreate || !ST)
49824982
return nullptr;
49834983
unsigned NumElts = Inst->arg_size() - 1;
49844984
if (ST->getNumElements() != NumElts)

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,9 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
270270
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
271271
TTI::PeelingPreferences &PP) const override;
272272

273-
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
274-
Type *ExpectedType) const override;
273+
Value *
274+
getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
275+
bool CanCreate = true) const override;
275276

276277
bool getTgtMemIntrinsic(IntrinsicInst *Inst,
277278
MemIntrinsicInfo &Info) const override;

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,8 @@ class EarlyCSE {
958958
bool overridingStores(const ParseMemoryInst &Earlier,
959959
const ParseMemoryInst &Later);
960960

961-
Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
961+
Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType,
962+
bool CanCreate) const {
962963
// TODO: We could insert relevant casts on type mismatch.
963964
// The load or the store's first operand.
964965
Value *V;
@@ -971,7 +972,8 @@ class EarlyCSE {
971972
V = II->getOperand(0);
972973
break;
973974
default:
974-
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
975+
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType,
976+
CanCreate);
975977
}
976978
} else {
977979
V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
@@ -1255,9 +1257,10 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
12551257

12561258
// For stores check the result values before checking memory generation
12571259
// (otherwise isSameMemGeneration may crash).
1258-
Value *Result = MemInst.isStore()
1259-
? getOrCreateResult(Matching, Other->getType())
1260-
: nullptr;
1260+
Value *Result =
1261+
MemInst.isStore()
1262+
? getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/false)
1263+
: nullptr;
12611264
if (MemInst.isStore() && InVal.DefInst != Result)
12621265
return nullptr;
12631266

@@ -1278,7 +1281,7 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
12781281
return nullptr;
12791282

12801283
if (!Result)
1281-
Result = getOrCreateResult(Matching, Other->getType());
1284+
Result = getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/true);
12821285
return Result;
12831286
}
12841287

llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse -earlycse-debug-hash | FileCheck %s
3-
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes='early-cse<memssa>' | FileCheck %s
3+
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes='early-cse<memssa>' -verify-analysis-invalidation | FileCheck %s
44

55
define <4 x i32> @test_cse(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
66
; CHECK-LABEL: define <4 x i32> @test_cse(
@@ -324,6 +324,22 @@ for.end: ; preds = %for.cond
324324
ret <4 x i32> %res.0
325325
}
326326

327+
define void @test_ld4_st4_no_cse(ptr %p, <16 x i8> %A, <16 x i8> %B) {
328+
; CHECK-LABEL: define void @test_ld4_st4_no_cse(
329+
; CHECK-SAME: ptr [[P:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
330+
; CHECK-NEXT: [[ENTRY:.*:]]
331+
; CHECK-NEXT: [[LD:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[P]])
332+
; CHECK-NEXT: [[EXT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[LD]], 0
333+
; CHECK-NEXT: tail call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[EXT]], <16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> zeroinitializer, ptr [[P]])
334+
; CHECK-NEXT: ret void
335+
;
336+
entry:
337+
%ld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %p)
338+
%ext = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld, 0
339+
tail call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %ext, <16 x i8> %A, <16 x i8> %B, <16 x i8> zeroinitializer, ptr %p)
340+
ret void
341+
}
342+
327343
; Function Attrs: nounwind
328344
declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr nocapture)
329345

0 commit comments

Comments
 (0)