@@ -1287,37 +1287,17 @@ define void @shuffle_i64_splat(ptr %p) nounwind {
1287
1287
}
1288
1288
1289
1289
define void @shuffle_i128_splat (ptr %p ) nounwind {
1290
- ; RV32-LABEL: shuffle_i128_splat:
1291
- ; RV32: # %bb.0:
1292
- ; RV32-NEXT: lw a1, 0(a0)
1293
- ; RV32-NEXT: lw a2, 4(a0)
1294
- ; RV32-NEXT: lw a3, 8(a0)
1295
- ; RV32-NEXT: lw a4, 12(a0)
1296
- ; RV32-NEXT: sw a1, 48(a0)
1297
- ; RV32-NEXT: sw a2, 52(a0)
1298
- ; RV32-NEXT: sw a3, 56(a0)
1299
- ; RV32-NEXT: sw a4, 60(a0)
1300
- ; RV32-NEXT: sw a1, 16(a0)
1301
- ; RV32-NEXT: sw a2, 20(a0)
1302
- ; RV32-NEXT: sw a3, 24(a0)
1303
- ; RV32-NEXT: sw a4, 28(a0)
1304
- ; RV32-NEXT: sw a1, 32(a0)
1305
- ; RV32-NEXT: sw a2, 36(a0)
1306
- ; RV32-NEXT: sw a3, 40(a0)
1307
- ; RV32-NEXT: sw a4, 44(a0)
1308
- ; RV32-NEXT: ret
1309
- ;
1310
- ; RV64-LABEL: shuffle_i128_splat:
1311
- ; RV64: # %bb.0:
1312
- ; RV64-NEXT: ld a1, 0(a0)
1313
- ; RV64-NEXT: ld a2, 8(a0)
1314
- ; RV64-NEXT: sd a1, 48(a0)
1315
- ; RV64-NEXT: sd a2, 56(a0)
1316
- ; RV64-NEXT: sd a1, 16(a0)
1317
- ; RV64-NEXT: sd a2, 24(a0)
1318
- ; RV64-NEXT: sd a1, 32(a0)
1319
- ; RV64-NEXT: sd a2, 40(a0)
1320
- ; RV64-NEXT: ret
1290
+ ; CHECK-LABEL: shuffle_i128_splat:
1291
+ ; CHECK: # %bb.0:
1292
+ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1293
+ ; CHECK-NEXT: vle64.v v8, (a0)
1294
+ ; CHECK-NEXT: lui a1, 16
1295
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1296
+ ; CHECK-NEXT: vmv.v.x v12, a1
1297
+ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1298
+ ; CHECK-NEXT: vrgatherei16.vv v16, v8, v12
1299
+ ; CHECK-NEXT: vse64.v v16, (a0)
1300
+ ; CHECK-NEXT: ret
1321
1301
%a = load <4 x i128 >, ptr %p
1322
1302
%res = shufflevector <4 x i128 > %a , <4 x i128 > poison, <4 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 >
1323
1303
store <4 x i128 > %res , ptr %p
@@ -1327,58 +1307,32 @@ define void @shuffle_i128_splat(ptr %p) nounwind {
1327
1307
define void @shuffle_i256_splat (ptr %p ) nounwind {
1328
1308
; RV32-LABEL: shuffle_i256_splat:
1329
1309
; RV32: # %bb.0:
1330
- ; RV32-NEXT: lw a1, 0(a0)
1331
- ; RV32-NEXT: lw a2, 4(a0)
1332
- ; RV32-NEXT: lw a3, 8(a0)
1333
- ; RV32-NEXT: lw a4, 12(a0)
1334
- ; RV32-NEXT: lw a5, 16(a0)
1335
- ; RV32-NEXT: lw a6, 20(a0)
1336
- ; RV32-NEXT: lw a7, 24(a0)
1337
- ; RV32-NEXT: lw t0, 28(a0)
1338
- ; RV32-NEXT: sw a5, 112(a0)
1339
- ; RV32-NEXT: sw a6, 116(a0)
1340
- ; RV32-NEXT: sw a7, 120(a0)
1341
- ; RV32-NEXT: sw t0, 124(a0)
1342
- ; RV32-NEXT: sw a1, 96(a0)
1343
- ; RV32-NEXT: sw a2, 100(a0)
1344
- ; RV32-NEXT: sw a3, 104(a0)
1345
- ; RV32-NEXT: sw a4, 108(a0)
1346
- ; RV32-NEXT: sw a5, 80(a0)
1347
- ; RV32-NEXT: sw a6, 84(a0)
1348
- ; RV32-NEXT: sw a7, 88(a0)
1349
- ; RV32-NEXT: sw t0, 92(a0)
1350
- ; RV32-NEXT: sw a1, 64(a0)
1351
- ; RV32-NEXT: sw a2, 68(a0)
1352
- ; RV32-NEXT: sw a3, 72(a0)
1353
- ; RV32-NEXT: sw a4, 76(a0)
1354
- ; RV32-NEXT: sw a5, 48(a0)
1355
- ; RV32-NEXT: sw a6, 52(a0)
1356
- ; RV32-NEXT: sw a7, 56(a0)
1357
- ; RV32-NEXT: sw t0, 60(a0)
1358
- ; RV32-NEXT: sw a1, 32(a0)
1359
- ; RV32-NEXT: sw a2, 36(a0)
1360
- ; RV32-NEXT: sw a3, 40(a0)
1361
- ; RV32-NEXT: sw a4, 44(a0)
1310
+ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1311
+ ; RV32-NEXT: vle64.v v8, (a0)
1312
+ ; RV32-NEXT: lui a1, 12320
1313
+ ; RV32-NEXT: addi a1, a1, 256
1314
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1315
+ ; RV32-NEXT: vmv.v.x v16, a1
1316
+ ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1317
+ ; RV32-NEXT: vsext.vf2 v18, v16
1318
+ ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1319
+ ; RV32-NEXT: vrgatherei16.vv v24, v8, v18
1320
+ ; RV32-NEXT: vse64.v v24, (a0)
1362
1321
; RV32-NEXT: ret
1363
1322
;
1364
1323
; RV64-LABEL: shuffle_i256_splat:
1365
1324
; RV64: # %bb.0:
1366
- ; RV64-NEXT: ld a1, 0(a0)
1367
- ; RV64-NEXT: ld a2, 8(a0)
1368
- ; RV64-NEXT: ld a3, 16(a0)
1369
- ; RV64-NEXT: ld a4, 24(a0)
1370
- ; RV64-NEXT: sd a1, 96(a0)
1371
- ; RV64-NEXT: sd a2, 104(a0)
1372
- ; RV64-NEXT: sd a3, 112(a0)
1373
- ; RV64-NEXT: sd a4, 120(a0)
1374
- ; RV64-NEXT: sd a1, 32(a0)
1375
- ; RV64-NEXT: sd a2, 40(a0)
1376
- ; RV64-NEXT: sd a3, 48(a0)
1377
- ; RV64-NEXT: sd a4, 56(a0)
1378
- ; RV64-NEXT: sd a1, 64(a0)
1379
- ; RV64-NEXT: sd a2, 72(a0)
1380
- ; RV64-NEXT: sd a3, 80(a0)
1381
- ; RV64-NEXT: sd a4, 88(a0)
1325
+ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1326
+ ; RV64-NEXT: vle64.v v8, (a0)
1327
+ ; RV64-NEXT: lui a1, 98305
1328
+ ; RV64-NEXT: slli a1, a1, 5
1329
+ ; RV64-NEXT: addi a1, a1, 1
1330
+ ; RV64-NEXT: slli a1, a1, 16
1331
+ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1332
+ ; RV64-NEXT: vmv.v.x v16, a1
1333
+ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1334
+ ; RV64-NEXT: vrgatherei16.vv v24, v8, v16
1335
+ ; RV64-NEXT: vse64.v v24, (a0)
1382
1336
; RV64-NEXT: ret
1383
1337
%a = load <4 x i256 >, ptr %p
1384
1338
%res = shufflevector <4 x i256 > %a , <4 x i256 > poison, <4 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 >
0 commit comments