Skip to content

Commit 3877039

Browse files
authored
[LoopIdiom] Select llvm.experimental.memset.pattern intrinsic rather than memset_pattern16 libcall (#126736)
In order to keep the change as incremental as possible, this only introduces the memset.pattern intrinsic in cases where memset_pattern16 would have been used. Future patches can enable it on targets that don't have the intrinsic, and select it in cases where the libcall isn't directly usable. As the memset.pattern intrinsic takes the number of times to store the pattern as an argument unlike memset_pattern16 which takes the number of bytes to write, we no longer try to form an i128 pattern. Special care is taken for cases where multiple stores in the same loop iteration were combined to form a single pattern. For such cases, we inherit the limitation that loops such as the following are supported: ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 2; } ``` But the following doesn't result in a memset.pattern (even though it could be, by forming an appropriate pattern): ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 3; } ``` Addressing this existing deficiency is left for a follow-up due to a desire not to change too much at once (i.e. to target equivalence to the current codegen). A command line option is introduced to force the selection of the intrinsic even in cases it wouldn't be (i.e. in cases where the libcall wouldn't have been selected). This is intended as a transitionary option for testing and experimentation, to be removed at a later point. The only platforms this should impact are those that have the memset_pattern16 libcall (Apple platforms). Testing performed to check for no unexpected codegen changes is described here #126736 (comment)
1 parent 376b3f7 commit 3877039

File tree

7 files changed

+156
-85
lines changed

7 files changed

+156
-85
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 87 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
150150
"with -Os/-Oz"),
151151
cl::init(true), cl::Hidden);
152152

153+
static cl::opt<bool> ForceMemsetPatternIntrinsic(
154+
"loop-idiom-force-memset-pattern-intrinsic",
155+
cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
156+
cl::Hidden);
157+
153158
namespace {
154159

155160
class LoopIdiomRecognize {
@@ -323,10 +328,15 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
323328
L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
324329

325330
HasMemset = TLI->has(LibFunc_memset);
331+
// TODO: Unconditionally enable use of the memset pattern intrinsic (or at
332+
// least, opt-in via target hook) once we are confident it will never result
333+
// in worse codegen than without. For now, use it only when the target
334+
// supports memset_pattern16 libcall (or unless this is overridden by
335+
// command line option).
326336
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
327337
HasMemcpy = TLI->has(LibFunc_memcpy);
328338

329-
if (HasMemset || HasMemsetPattern || HasMemcpy)
339+
if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || HasMemcpy)
330340
if (SE->hasLoopInvariantBackedgeTakenCount(L))
331341
return runOnCountableLoop();
332342

@@ -378,11 +388,13 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
378388
}
379389

380390
/// getMemSetPatternValue - If a strided store of the specified value is safe to
381-
/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
382-
/// be passed in. Otherwise, return null.
391+
/// turn into a memset.patternn intrinsic, return the Constant that should
392+
/// be passed in. Otherwise, return null.
383393
///
384-
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
385-
/// just replicate their input array and then pass on to memset_pattern16.
394+
/// TODO this function could allow more constants than it does today (e.g.
395+
/// those over 16 bytes) now it has transitioned to being used for the
396+
/// memset.pattern intrinsic rather than directly the memset_pattern16
397+
/// libcall.
386398
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
387399
// FIXME: This could check for UndefValue because it can be merged into any
388400
// other valid pattern.
@@ -411,14 +423,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
411423
if (Size > 16)
412424
return nullptr;
413425

414-
// If the constant is exactly 16 bytes, just use it.
415-
if (Size == 16)
416-
return C;
426+
// For now, don't handle types that aren't int, floats, or pointers.
427+
Type *CTy = C->getType();
428+
if (!CTy->isIntOrPtrTy() && !CTy->isFloatingPointTy())
429+
return nullptr;
417430

418-
// Otherwise, we'll use an array of the constants.
419-
unsigned ArraySize = 16 / Size;
420-
ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
421-
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
431+
return C;
422432
}
423433

424434
LoopIdiomRecognize::LegalStoreKind
@@ -479,7 +489,8 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
479489
// It looks like we can use SplatValue.
480490
return LegalStoreKind::Memset;
481491
}
482-
if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
492+
if (!UnorderedAtomic && (HasMemsetPattern || ForceMemsetPatternIntrinsic) &&
493+
!DisableLIRP::Memset &&
483494
// Don't create memset_pattern16s with address spaces.
484495
StorePtr->getType()->getPointerAddressSpace() == 0 &&
485496
getMemSetPatternValue(StoredVal, DL)) {
@@ -1061,50 +1072,81 @@ bool LoopIdiomRecognize::processLoopStridedStore(
10611072
return Changed;
10621073

10631074
// Okay, everything looks good, insert the memset.
1075+
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
1076+
Constant *PatternValue = nullptr;
1077+
if (!SplatValue)
1078+
PatternValue = getMemSetPatternValue(StoredVal, DL);
1079+
1080+
// MemsetArg is the number of bytes for the memset libcall, and the number
1081+
// of pattern repetitions if the memset.pattern intrinsic is being used.
1082+
Value *MemsetArg;
1083+
std::optional<int64_t> BytesWritten;
1084+
1085+
if (PatternValue && (HasMemsetPattern || ForceMemsetPatternIntrinsic)) {
1086+
const SCEV *TripCountS =
1087+
SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop);
1088+
if (!Expander.isSafeToExpand(TripCountS))
1089+
return Changed;
1090+
const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1091+
if (!ConstStoreSize)
1092+
return Changed;
1093+
Value *TripCount = Expander.expandCodeFor(TripCountS, IntIdxTy,
1094+
Preheader->getTerminator());
1095+
uint64_t PatternRepsPerTrip =
1096+
(ConstStoreSize->getValue()->getZExtValue() * 8) /
1097+
DL->getTypeSizeInBits(PatternValue->getType());
1098+
// If ConstStoreSize is not equal to the width of PatternValue, then
1099+
// MemsetArg is TripCount * (ConstStoreSize/PatternValueWidth). Else
1100+
// MemSetArg is just TripCount.
1101+
MemsetArg =
1102+
PatternRepsPerTrip == 1
1103+
? TripCount
1104+
: Builder.CreateMul(TripCount,
1105+
Builder.getIntN(IntIdxTy->getIntegerBitWidth(),
1106+
PatternRepsPerTrip));
1107+
if (auto *CI = dyn_cast<ConstantInt>(TripCount))
1108+
BytesWritten =
1109+
CI->getZExtValue() * ConstStoreSize->getValue()->getZExtValue();
10641110

1065-
const SCEV *NumBytesS =
1066-
getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1067-
1068-
// TODO: ideally we should still be able to generate memset if SCEV expander
1069-
// is taught to generate the dependencies at the latest point.
1070-
if (!Expander.isSafeToExpand(NumBytesS))
1071-
return Changed;
1111+
} else {
1112+
const SCEV *NumBytesS =
1113+
getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
10721114

1073-
Value *NumBytes =
1074-
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1115+
// TODO: ideally we should still be able to generate memset if SCEV expander
1116+
// is taught to generate the dependencies at the latest point.
1117+
if (!Expander.isSafeToExpand(NumBytesS))
1118+
return Changed;
1119+
MemsetArg =
1120+
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1121+
if (auto *CI = dyn_cast<ConstantInt>(MemsetArg))
1122+
BytesWritten = CI->getZExtValue();
1123+
}
1124+
assert(MemsetArg && "MemsetArg should have been set");
10751125

10761126
AAMDNodes AATags = TheStore->getAAMetadata();
10771127
for (Instruction *Store : Stores)
10781128
AATags = AATags.merge(Store->getAAMetadata());
1079-
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1080-
AATags = AATags.extendTo(CI->getZExtValue());
1129+
if (BytesWritten)
1130+
AATags = AATags.extendTo(BytesWritten.value());
10811131
else
10821132
AATags = AATags.extendTo(-1);
10831133

10841134
CallInst *NewCall;
1085-
if (Value *SplatValue = isBytewiseValue(StoredVal, *DL)) {
1086-
NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
1135+
if (SplatValue) {
1136+
NewCall = Builder.CreateMemSet(BasePtr, SplatValue, MemsetArg,
10871137
MaybeAlign(StoreAlignment),
10881138
/*isVolatile=*/false, AATags);
1089-
} else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
1090-
// Everything is emitted in default address space
1091-
Type *Int8PtrTy = DestInt8PtrTy;
1092-
1093-
StringRef FuncName = "memset_pattern16";
1094-
FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
1095-
Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1096-
inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
1097-
1098-
// Otherwise we should form a memset_pattern16. PatternValue is known to be
1099-
// an constant array of 16-bytes. Plop the value into a mergable global.
1100-
Constant *PatternValue = getMemSetPatternValue(StoredVal, DL);
1101-
assert(PatternValue && "Expected pattern value.");
1102-
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
1103-
GlobalValue::PrivateLinkage,
1104-
PatternValue, ".memset_pattern");
1105-
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
1106-
GV->setAlignment(Align(16));
1107-
NewCall = Builder.CreateCall(MSP, {BasePtr, GV, NumBytes});
1139+
} else if (ForceMemsetPatternIntrinsic ||
1140+
isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
1141+
assert(isa<SCEVConstant>(StoreSizeSCEV) && "Expected constant store size");
1142+
1143+
NewCall = Builder.CreateIntrinsic(
1144+
Intrinsic::experimental_memset_pattern,
1145+
{DestInt8PtrTy, PatternValue->getType(), IntIdxTy},
1146+
{BasePtr, PatternValue, MemsetArg,
1147+
ConstantInt::getFalse(M->getContext())});
1148+
if (StoreAlignment)
1149+
cast<MemSetPatternInst>(NewCall)->setDestAlignment(*StoreAlignment);
11081150
NewCall->setAAMetadata(AATags);
11091151
} else {
11101152
// Neither a memset, nor memset_pattern16
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
2+
; RUN: opt -passes=loop-idiom -mtriple=riscv64 < %s -S | FileCheck %s
3+
; RUN: opt -passes=loop-idiom -mtriple=riscv64 -loop-idiom-force-memset-pattern-intrinsic < %s -S \
4+
; RUN: | FileCheck -check-prefix=CHECK-INTRIN %s
5+
6+
define dso_local void @double_memset(ptr nocapture %p) {
7+
; CHECK-LABEL: @double_memset(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
10+
; CHECK: for.cond.cleanup:
11+
; CHECK-NEXT: ret void
12+
; CHECK: for.body:
13+
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
14+
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 [[I_07]]
15+
; CHECK-NEXT: store double 3.141590e+00, ptr [[PTR1]], align 1
16+
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
17+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
18+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
19+
;
20+
; CHECK-INTRIN-LABEL: @double_memset(
21+
; CHECK-INTRIN-NEXT: entry:
22+
; CHECK-INTRIN-NEXT: call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 16, i1 false)
23+
; CHECK-INTRIN-NEXT: br label [[FOR_BODY:%.*]]
24+
; CHECK-INTRIN: for.cond.cleanup:
25+
; CHECK-INTRIN-NEXT: ret void
26+
; CHECK-INTRIN: for.body:
27+
; CHECK-INTRIN-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
28+
; CHECK-INTRIN-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
29+
; CHECK-INTRIN-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
30+
; CHECK-INTRIN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
31+
; CHECK-INTRIN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
32+
;
33+
entry:
34+
br label %for.body
35+
36+
for.cond.cleanup:
37+
ret void
38+
39+
for.body:
40+
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
41+
%ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07
42+
store double 3.14159e+00, ptr %ptr1, align 1
43+
%inc = add nuw nsw i64 %i.07, 1
44+
%exitcond.not = icmp eq i64 %inc, 16
45+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
46+
}
47+
;.
48+
; CHECK-INTRIN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
49+
;.

llvm/test/Transforms/LoopIdiom/basic.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ target triple = "x86_64-apple-darwin10.0.0"
77
;.
88
; CHECK: @G = global i32 5
99
; CHECK: @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
10-
; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1], align 16
11-
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
1210
;.
1311
define void @test1(ptr %Base, i64 %Size) nounwind ssp {
1412
; CHECK-LABEL: @test1(
@@ -533,7 +531,7 @@ for.end13: ; preds = %for.inc10
533531
define void @test11_pattern(ptr nocapture %P) nounwind ssp {
534532
; CHECK-LABEL: @test11_pattern(
535533
; CHECK-NEXT: entry:
536-
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 40000)
534+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[P:%.*]], i32 1, i64 10000, i1 false)
537535
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
538536
; CHECK: for.body:
539537
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
@@ -596,7 +594,7 @@ for.end: ; preds = %for.body
596594
define void @test13_pattern(ptr nocapture %P) nounwind ssp {
597595
; CHECK-LABEL: @test13_pattern(
598596
; CHECK-NEXT: entry:
599-
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 80000)
597+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.p0.i64(ptr align 4 [[P:%.*]], ptr @G, i64 10000, i1 false)
600598
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
601599
; CHECK: for.body:
602600
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]

llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
66
target triple = "x86_64-apple-darwin10.0.0"
77

88

9-
;.
10-
; CHECK: @.memset_pattern = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
11-
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
12-
; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
13-
;.
149
define dso_local void @double_memset(ptr nocapture %p) {
1510
; CHECK-LABEL: @double_memset(
1611
; CHECK-NEXT: entry:
17-
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 128), !tbaa [[TBAA0:![0-9]+]]
12+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 16, i1 false), !tbaa [[TBAA0:![0-9]+]]
1813
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1914
; CHECK: for.cond.cleanup:
2015
; CHECK-NEXT: ret void
@@ -44,7 +39,7 @@ for.body:
4439
define dso_local void @struct_memset(ptr nocapture %p) {
4540
; CHECK-LABEL: @struct_memset(
4641
; CHECK-NEXT: entry:
47-
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 128), !tbaa [[TBAA4:![0-9]+]]
42+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 16, i1 false), !tbaa [[TBAA4:![0-9]+]]
4843
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
4944
; CHECK: for.cond.cleanup:
5045
; CHECK-NEXT: ret void
@@ -73,16 +68,15 @@ for.body:
7368
define dso_local void @var_memset(ptr nocapture %p, i64 %len) {
7469
; CHECK-LABEL: @var_memset(
7570
; CHECK-NEXT: entry:
76-
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[LEN:%.*]], 3
77-
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.2, i64 [[TMP0]])
71+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 [[TMP0:%.*]], i1 false)
7872
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
7973
; CHECK: for.cond.cleanup:
8074
; CHECK-NEXT: ret void
8175
; CHECK: for.body:
8276
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
8377
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
8478
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
85-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[LEN]]
79+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
8680
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
8781
;
8882
entry:
@@ -116,7 +110,7 @@ for.body:
116110
!21 = !{!22, !20, i64 0}
117111
!22 = !{!"B", !20, i64 0}
118112
;.
119-
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
113+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
120114
;.
121115
; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
122116
; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}

llvm/test/Transforms/LoopIdiom/struct_pattern.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,15 @@ target triple = "x86_64-apple-darwin10.0.0"
1616
;}
1717

1818

19-
;.
20-
; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
21-
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
22-
; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
23-
;.
2419
define void @bar1(ptr %f, i32 %n) nounwind ssp {
2520
; CHECK-LABEL: @bar1(
2621
; CHECK-NEXT: entry:
2722
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
2823
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
2924
; CHECK: for.body.preheader:
3025
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
31-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
32-
; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i64 [[TMP1]])
26+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
27+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
3328
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
3429
; CHECK: for.body:
3530
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -82,8 +77,8 @@ define void @bar2(ptr %f, i32 %n) nounwind ssp {
8277
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
8378
; CHECK: for.body.preheader:
8479
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
85-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
86-
; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern.1, i64 [[TMP1]])
80+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
81+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
8782
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
8883
; CHECK: for.body:
8984
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -142,7 +137,8 @@ define void @bar3(ptr nocapture %f, i32 %n) nounwind ssp {
142137
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
143138
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP1]], [[TMP4]]
144139
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[F:%.*]], i64 [[TMP5]]
145-
; CHECK-NEXT: call void @memset_pattern16(ptr [[UGLYGEP]], ptr @.memset_pattern.2, i64 [[TMP1]])
140+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP0]], 2
141+
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[UGLYGEP]], i32 2, i64 [[TMP7]], i1 false)
146142
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
147143
; CHECK: for.body:
148144
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -292,5 +288,5 @@ for.end: ; preds = %for.end.loopexit, %
292288
}
293289
;.
294290
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
295-
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
291+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
296292
;.

0 commit comments

Comments
 (0)