Skip to content

Commit 143c33c

Browse files
authored
[RISCV] Consider only legally typed splats to be legal shuffles (#123415)
Given the comment, I'd expected test coverage. There was none so let's do the simple thing which benefits the one thing we have tests for.
1 parent 07d4965 commit 143c33c

File tree

2 files changed

+37
-83
lines changed

2 files changed

+37
-83
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5733,14 +5733,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
57335733
}
57345734

57355735
bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
5736-
// Support splats for any type. These should type legalize well.
5737-
if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5738-
return true;
5739-
57405736
// Only support legal VTs for other shuffles for now.
57415737
if (!isTypeLegal(VT))
57425738
return false;
57435739

5740+
// Support splats for any type. These should type legalize well.
5741+
if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5742+
return true;
5743+
57445744
MVT SVT = VT.getSimpleVT();
57455745

57465746
// Not for i1 vectors.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 33 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,37 +1287,17 @@ define void @shuffle_i64_splat(ptr %p) nounwind {
12871287
}
12881288

12891289
define void @shuffle_i128_splat(ptr %p) nounwind {
1290-
; RV32-LABEL: shuffle_i128_splat:
1291-
; RV32: # %bb.0:
1292-
; RV32-NEXT: lw a1, 0(a0)
1293-
; RV32-NEXT: lw a2, 4(a0)
1294-
; RV32-NEXT: lw a3, 8(a0)
1295-
; RV32-NEXT: lw a4, 12(a0)
1296-
; RV32-NEXT: sw a1, 48(a0)
1297-
; RV32-NEXT: sw a2, 52(a0)
1298-
; RV32-NEXT: sw a3, 56(a0)
1299-
; RV32-NEXT: sw a4, 60(a0)
1300-
; RV32-NEXT: sw a1, 16(a0)
1301-
; RV32-NEXT: sw a2, 20(a0)
1302-
; RV32-NEXT: sw a3, 24(a0)
1303-
; RV32-NEXT: sw a4, 28(a0)
1304-
; RV32-NEXT: sw a1, 32(a0)
1305-
; RV32-NEXT: sw a2, 36(a0)
1306-
; RV32-NEXT: sw a3, 40(a0)
1307-
; RV32-NEXT: sw a4, 44(a0)
1308-
; RV32-NEXT: ret
1309-
;
1310-
; RV64-LABEL: shuffle_i128_splat:
1311-
; RV64: # %bb.0:
1312-
; RV64-NEXT: ld a1, 0(a0)
1313-
; RV64-NEXT: ld a2, 8(a0)
1314-
; RV64-NEXT: sd a1, 48(a0)
1315-
; RV64-NEXT: sd a2, 56(a0)
1316-
; RV64-NEXT: sd a1, 16(a0)
1317-
; RV64-NEXT: sd a2, 24(a0)
1318-
; RV64-NEXT: sd a1, 32(a0)
1319-
; RV64-NEXT: sd a2, 40(a0)
1320-
; RV64-NEXT: ret
1290+
; CHECK-LABEL: shuffle_i128_splat:
1291+
; CHECK: # %bb.0:
1292+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1293+
; CHECK-NEXT: vle64.v v8, (a0)
1294+
; CHECK-NEXT: lui a1, 16
1295+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1296+
; CHECK-NEXT: vmv.v.x v12, a1
1297+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1298+
; CHECK-NEXT: vrgatherei16.vv v16, v8, v12
1299+
; CHECK-NEXT: vse64.v v16, (a0)
1300+
; CHECK-NEXT: ret
13211301
%a = load <4 x i128>, ptr %p
13221302
%res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13231303
store <4 x i128> %res, ptr %p
@@ -1327,58 +1307,32 @@ define void @shuffle_i128_splat(ptr %p) nounwind {
13271307
define void @shuffle_i256_splat(ptr %p) nounwind {
13281308
; RV32-LABEL: shuffle_i256_splat:
13291309
; RV32: # %bb.0:
1330-
; RV32-NEXT: lw a1, 0(a0)
1331-
; RV32-NEXT: lw a2, 4(a0)
1332-
; RV32-NEXT: lw a3, 8(a0)
1333-
; RV32-NEXT: lw a4, 12(a0)
1334-
; RV32-NEXT: lw a5, 16(a0)
1335-
; RV32-NEXT: lw a6, 20(a0)
1336-
; RV32-NEXT: lw a7, 24(a0)
1337-
; RV32-NEXT: lw t0, 28(a0)
1338-
; RV32-NEXT: sw a5, 112(a0)
1339-
; RV32-NEXT: sw a6, 116(a0)
1340-
; RV32-NEXT: sw a7, 120(a0)
1341-
; RV32-NEXT: sw t0, 124(a0)
1342-
; RV32-NEXT: sw a1, 96(a0)
1343-
; RV32-NEXT: sw a2, 100(a0)
1344-
; RV32-NEXT: sw a3, 104(a0)
1345-
; RV32-NEXT: sw a4, 108(a0)
1346-
; RV32-NEXT: sw a5, 80(a0)
1347-
; RV32-NEXT: sw a6, 84(a0)
1348-
; RV32-NEXT: sw a7, 88(a0)
1349-
; RV32-NEXT: sw t0, 92(a0)
1350-
; RV32-NEXT: sw a1, 64(a0)
1351-
; RV32-NEXT: sw a2, 68(a0)
1352-
; RV32-NEXT: sw a3, 72(a0)
1353-
; RV32-NEXT: sw a4, 76(a0)
1354-
; RV32-NEXT: sw a5, 48(a0)
1355-
; RV32-NEXT: sw a6, 52(a0)
1356-
; RV32-NEXT: sw a7, 56(a0)
1357-
; RV32-NEXT: sw t0, 60(a0)
1358-
; RV32-NEXT: sw a1, 32(a0)
1359-
; RV32-NEXT: sw a2, 36(a0)
1360-
; RV32-NEXT: sw a3, 40(a0)
1361-
; RV32-NEXT: sw a4, 44(a0)
1310+
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1311+
; RV32-NEXT: vle64.v v8, (a0)
1312+
; RV32-NEXT: lui a1, 12320
1313+
; RV32-NEXT: addi a1, a1, 256
1314+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1315+
; RV32-NEXT: vmv.v.x v16, a1
1316+
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1317+
; RV32-NEXT: vsext.vf2 v18, v16
1318+
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1319+
; RV32-NEXT: vrgatherei16.vv v24, v8, v18
1320+
; RV32-NEXT: vse64.v v24, (a0)
13621321
; RV32-NEXT: ret
13631322
;
13641323
; RV64-LABEL: shuffle_i256_splat:
13651324
; RV64: # %bb.0:
1366-
; RV64-NEXT: ld a1, 0(a0)
1367-
; RV64-NEXT: ld a2, 8(a0)
1368-
; RV64-NEXT: ld a3, 16(a0)
1369-
; RV64-NEXT: ld a4, 24(a0)
1370-
; RV64-NEXT: sd a1, 96(a0)
1371-
; RV64-NEXT: sd a2, 104(a0)
1372-
; RV64-NEXT: sd a3, 112(a0)
1373-
; RV64-NEXT: sd a4, 120(a0)
1374-
; RV64-NEXT: sd a1, 32(a0)
1375-
; RV64-NEXT: sd a2, 40(a0)
1376-
; RV64-NEXT: sd a3, 48(a0)
1377-
; RV64-NEXT: sd a4, 56(a0)
1378-
; RV64-NEXT: sd a1, 64(a0)
1379-
; RV64-NEXT: sd a2, 72(a0)
1380-
; RV64-NEXT: sd a3, 80(a0)
1381-
; RV64-NEXT: sd a4, 88(a0)
1325+
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1326+
; RV64-NEXT: vle64.v v8, (a0)
1327+
; RV64-NEXT: lui a1, 98305
1328+
; RV64-NEXT: slli a1, a1, 5
1329+
; RV64-NEXT: addi a1, a1, 1
1330+
; RV64-NEXT: slli a1, a1, 16
1331+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1332+
; RV64-NEXT: vmv.v.x v16, a1
1333+
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1334+
; RV64-NEXT: vrgatherei16.vv v24, v8, v16
1335+
; RV64-NEXT: vse64.v v24, (a0)
13821336
; RV64-NEXT: ret
13831337
%a = load <4 x i256>, ptr %p
13841338
%res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>

0 commit comments

Comments
 (0)