Skip to content

Commit 1f7cbd9

Browse files
committed
[RISCV] Add optimization for memset inline
1 parent c72f8ba commit 1f7cbd9

File tree

5 files changed

+91
-130
lines changed

5 files changed

+91
-130
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,7 +1664,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
16641664
PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
16651665

16661666
MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1667-
MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1667+
MaxStoresPerMemset = Subtarget.hasVInstructions()
1668+
? (Subtarget.getRealMinVLen() / 8 *
1669+
Subtarget.getMaxLMULForFixedLengthVectors() /
1670+
(Subtarget.is64Bit() ? 8 : 4))
1671+
: Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
16681672

16691673
MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
16701674
MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
@@ -23808,8 +23812,23 @@ EVT RISCVTargetLowering::getOptimalMemOpType(
2380823812
// a large scalar constant and instead use vmv.v.x/i to do the
2380923813
// broadcast. For everything else, prefer ELenVT to minimize VL and thus
2381023814
// maximize the chance we can encode the size in the vsetvli.
23811-
MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
23812-
MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
23815+
// If Op size is greater than LMUL8 memory operation, we don't support inline
23816+
// of memset. Return EVT based on Op size to avoid redundant splitting and
23817+
// merging operations if Op size is no greater than LMUL8 memory operation.
23818+
if (Op.isMemset()) {
23819+
if (!Op.isZeroMemset())
23820+
return EVT::getVectorVT(Context, MVT::i8, Op.size());
23821+
if (Op.size() >
23822+
Subtarget.getMaxLMULForFixedLengthVectors() * MinVLenInBytes)
23823+
return MVT::Other;
23824+
if (Subtarget.hasVInstructionsI64() && Op.size() % 8 == 0)
23825+
return EVT::getVectorVT(Context, MVT::i64, Op.size() / 8);
23826+
if (Op.size() % 4 == 0)
23827+
return EVT::getVectorVT(Context, MVT::i32, Op.size() / 4);
23828+
return EVT::getVectorVT(Context, MVT::i8, Op.size());
23829+
}
23830+
23831+
MVT PreferredVT = MVT::getIntegerVT(Subtarget.getELen());
2381323832

2381423833
// Do we have sufficient alignment for our preferred VT? If not, revert
2381523834
// to largest size allowed by our alignment criteria.

llvm/test/CodeGen/RISCV/pr135206.ll

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
1313
; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
1414
; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
1515
; CHECK-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill
16-
; CHECK-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill
17-
; CHECK-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill
1816
; CHECK-NEXT: lui a0, 7
1917
; CHECK-NEXT: sub t1, sp, a0
2018
; CHECK-NEXT: lui t2, 1
@@ -24,8 +22,9 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
2422
; CHECK-NEXT: bne sp, t1, .LBB0_1
2523
; CHECK-NEXT: # %bb.2:
2624
; CHECK-NEXT: addi sp, sp, -2048
27-
; CHECK-NEXT: addi sp, sp, -96
25+
; CHECK-NEXT: addi sp, sp, -80
2826
; CHECK-NEXT: csrr t1, vlenb
27+
; CHECK-NEXT: slli t1, t1, 2
2928
; CHECK-NEXT: lui t2, 1
3029
; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1
3130
; CHECK-NEXT: sub sp, sp, t2
@@ -34,45 +33,35 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
3433
; CHECK-NEXT: bge t1, t2, .LBB0_3
3534
; CHECK-NEXT: # %bb.4:
3635
; CHECK-NEXT: sub sp, sp, t1
37-
; CHECK-NEXT: li a0, 86
38-
; CHECK-NEXT: addi s0, sp, 48
39-
; CHECK-NEXT: addi s1, sp, 32
40-
; CHECK-NEXT: addi s2, sp, 16
41-
; CHECK-NEXT: lui a1, 353637
42-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
43-
; CHECK-NEXT: vmv.v.x v8, a0
36+
; CHECK-NEXT: li a0, 64
37+
; CHECK-NEXT: li a1, 86
38+
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
39+
; CHECK-NEXT: vmv.v.x v8, a1
4440
; CHECK-NEXT: lui a0, 8
4541
; CHECK-NEXT: addi a0, a0, 32
4642
; CHECK-NEXT: add a0, sp, a0
47-
; CHECK-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
48-
; CHECK-NEXT: addi a0, a1, 1622
49-
; CHECK-NEXT: vse8.v v8, (s0)
43+
; CHECK-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
44+
; CHECK-NEXT: li s0, 56
45+
; CHECK-NEXT: addi s1, sp, 16
46+
; CHECK-NEXT: vsetvli zero, s0, e8, m4, ta, ma
5047
; CHECK-NEXT: vse8.v v8, (s1)
51-
; CHECK-NEXT: vse8.v v8, (s2)
52-
; CHECK-NEXT: slli a1, a0, 32
53-
; CHECK-NEXT: add s3, a0, a1
54-
; CHECK-NEXT: sd s3, 64(sp)
5548
; CHECK-NEXT: call bar
5649
; CHECK-NEXT: lui a0, 8
5750
; CHECK-NEXT: addi a0, a0, 32
5851
; CHECK-NEXT: add a0, sp, a0
59-
; CHECK-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
60-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
61-
; CHECK-NEXT: vse8.v v8, (s0)
52+
; CHECK-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
53+
; CHECK-NEXT: vsetvli zero, s0, e8, m4, ta, ma
6254
; CHECK-NEXT: vse8.v v8, (s1)
63-
; CHECK-NEXT: vse8.v v8, (s2)
64-
; CHECK-NEXT: sd s3, 64(sp)
6555
; CHECK-NEXT: li a0, 0
6656
; CHECK-NEXT: csrr a1, vlenb
57+
; CHECK-NEXT: slli a1, a1, 2
6758
; CHECK-NEXT: add sp, sp, a1
6859
; CHECK-NEXT: lui a1, 8
69-
; CHECK-NEXT: addi a1, a1, -1952
60+
; CHECK-NEXT: addi a1, a1, -1968
7061
; CHECK-NEXT: add sp, sp, a1
7162
; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
7263
; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
7364
; CHECK-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload
74-
; CHECK-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload
75-
; CHECK-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload
7665
; CHECK-NEXT: addi sp, sp, 2032
7766
; CHECK-NEXT: ret
7867
%1 = alloca %"buff", align 8

0 commit comments

Comments
 (0)