Skip to content

Commit be19a27

Browse files
authored
[RISCV] Correct stride for strided load/store of vectors of pointers in lowerInterleavedLoad/lowerInterleavedStore. (#147598)
We need to use DataLayout to get the size if the element type is a pointer.
1 parent aa1829d commit be19a27

File tree

2 files changed

+50
-10
lines changed

2 files changed

+50
-10
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24111,10 +24111,11 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
2411124111

2411224112
IRBuilder<> Builder(LI);
2411324113

24114+
const DataLayout &DL = LI->getDataLayout();
24115+
2411424116
auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
2411524117
if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
24116-
LI->getPointerAddressSpace(),
24117-
LI->getDataLayout()))
24118+
LI->getPointerAddressSpace(), DL))
2411824119
return false;
2411924120

2412024121
auto *PtrTy = LI->getPointerOperandType();
@@ -24124,7 +24125,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
2412424125
// and there's only one element used, use a strided load instead. This
2412524126
// will be equally fast, and create less vector register pressure.
2412624127
if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
24127-
unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
24128+
unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
2412824129
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
2412924130
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
2413024131
Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
@@ -24187,14 +24188,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
2418724188
ShuffleVectorInst *SVI,
2418824189
unsigned Factor) const {
2418924190
IRBuilder<> Builder(SI);
24191+
const DataLayout &DL = SI->getDataLayout();
2419024192
auto Mask = SVI->getShuffleMask();
2419124193
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
2419224194
// Given SVI : <n*factor x ty>, then VTy : <n x ty>
2419324195
auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
2419424196
ShuffleVTy->getNumElements() / Factor);
2419524197
if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
24196-
SI->getPointerAddressSpace(),
24197-
SI->getDataLayout()))
24198+
SI->getPointerAddressSpace(), DL))
2419824199
return false;
2419924200

2420024201
auto *PtrTy = SI->getPointerOperandType();
@@ -24206,7 +24207,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
2420624207
// be equally fast, and create less vector register pressure.
2420724208
if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
2420824209
isSpreadMask(Mask, Factor, Index)) {
24209-
unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
24210+
unsigned ScalarSizeInBytes =
24211+
DL.getTypeStoreSize(ShuffleVTy->getElementType());
2421024212
Value *Data = SVI->getOperand(0);
2421124213
auto *DataVTy = cast<FixedVectorType>(Data->getType());
2421224214
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,25 @@ define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) {
16621662
ret <4 x i8> %v0
16631663
}
16641664

1665+
define <4 x ptr> @load_factor3_one_active_ptr(ptr %ptr) {
1666+
; RV32-LABEL: load_factor3_one_active_ptr:
1667+
; RV32: # %bb.0:
1668+
; RV32-NEXT: li a1, 12
1669+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1670+
; RV32-NEXT: vlse32.v v8, (a0), a1
1671+
; RV32-NEXT: ret
1672+
;
1673+
; RV64-LABEL: load_factor3_one_active_ptr:
1674+
; RV64: # %bb.0:
1675+
; RV64-NEXT: li a1, 24
1676+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1677+
; RV64-NEXT: vlse64.v v8, (a0), a1
1678+
; RV64-NEXT: ret
1679+
%interleaved.vec = load <12 x ptr>, ptr %ptr
1680+
%v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
1681+
ret <4 x ptr> %v0
1682+
}
1683+
16651684
define void @load_factor4_one_active_storeback(ptr %ptr) {
16661685
; CHECK-LABEL: load_factor4_one_active_storeback:
16671686
; CHECK: # %bb.0:
@@ -1748,6 +1767,25 @@ define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
17481767
ret void
17491768
}
17501769

1770+
define void @store_factor4_one_active_ptr(ptr %ptr, <4 x ptr> %v) {
1771+
; RV32-LABEL: store_factor4_one_active_ptr:
1772+
; RV32: # %bb.0:
1773+
; RV32-NEXT: li a1, 16
1774+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1775+
; RV32-NEXT: vsse32.v v8, (a0), a1
1776+
; RV32-NEXT: ret
1777+
;
1778+
; RV64-LABEL: store_factor4_one_active_ptr:
1779+
; RV64: # %bb.0:
1780+
; RV64-NEXT: li a1, 32
1781+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1782+
; RV64-NEXT: vsse64.v v8, (a0), a1
1783+
; RV64-NEXT: ret
1784+
%v0 = shufflevector <4 x ptr> %v, <4 x ptr> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
1785+
store <16 x ptr> %v0, ptr %ptr
1786+
ret void
1787+
}
1788+
17511789
; Negative tests
17521790

17531791
define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
@@ -1766,8 +1804,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
17661804
; RV32-NEXT: vle32.v v12, (a0), v0.t
17671805
; RV32-NEXT: li a0, 36
17681806
; RV32-NEXT: vmv.s.x v20, a1
1769-
; RV32-NEXT: lui a1, %hi(.LCPI49_0)
1770-
; RV32-NEXT: addi a1, a1, %lo(.LCPI49_0)
1807+
; RV32-NEXT: lui a1, %hi(.LCPI51_0)
1808+
; RV32-NEXT: addi a1, a1, %lo(.LCPI51_0)
17711809
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
17721810
; RV32-NEXT: vle16.v v21, (a1)
17731811
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1842,8 +1880,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
18421880
; RV32-NEXT: vmv.s.x v10, a0
18431881
; RV32-NEXT: li a0, 146
18441882
; RV32-NEXT: vmv.s.x v11, a0
1845-
; RV32-NEXT: lui a0, %hi(.LCPI50_0)
1846-
; RV32-NEXT: addi a0, a0, %lo(.LCPI50_0)
1883+
; RV32-NEXT: lui a0, %hi(.LCPI52_0)
1884+
; RV32-NEXT: addi a0, a0, %lo(.LCPI52_0)
18471885
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
18481886
; RV32-NEXT: vle16.v v20, (a0)
18491887
; RV32-NEXT: li a0, 36

0 commit comments

Comments
 (0)