@@ -1227,118 +1227,6 @@ class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
1227
1227
(inst $vaddr, $saddr, $offset, 0, $in)
1228
1228
>;
1229
1229
1230
- let OtherPredicates = [HasFlatAddressSpace] in {
1231
-
1232
- def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1233
- def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1234
- def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1235
- def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1236
- def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1237
- def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1238
- def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1239
- def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1240
- def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1241
- def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1242
- def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1243
- def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1244
- def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1245
- def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1246
- def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1247
-
1248
- def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1249
- def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1250
-
1251
- def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1252
- def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1253
-
1254
- foreach vt = Reg32Types.types in {
1255
- def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1256
- def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1257
- }
1258
-
1259
- foreach vt = VReg_64.RegTypes in {
1260
- def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1261
- def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1262
- }
1263
-
1264
- def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1265
-
1266
- foreach vt = VReg_128.RegTypes in {
1267
- def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1268
- def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1269
- }
1270
-
1271
- def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1272
- def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1273
- def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1274
- def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1275
- def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1276
- def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1277
-
1278
- foreach as = [ "flat", "global" ] in {
1279
- defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1280
- defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1281
- defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1282
- defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1283
- defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1284
- defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1285
- defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1286
- defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1287
- defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1288
- defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1289
- defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1290
- defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1291
- defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1292
-
1293
- defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1294
- defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1295
- defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1296
- defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1297
- defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1298
- defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1299
- defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1300
- defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1301
- defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1302
- defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1303
- defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1304
- defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1305
- defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1306
- } // end foreach as
1307
-
1308
- let SubtargetPredicate = isGFX12Plus in {
1309
- defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1310
-
1311
- let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1312
- defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1313
- }
1314
-
1315
- def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1316
- def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1317
-
1318
- let OtherPredicates = [HasD16LoadStore] in {
1319
- def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1320
- def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1321
- }
1322
-
1323
- let OtherPredicates = [D16PreservesUnusedBits] in {
1324
- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1325
- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1326
- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1327
- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1328
- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1329
- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1330
-
1331
- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1332
- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1333
- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1334
- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1335
- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1336
- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1337
- }
1338
-
1339
- } // End OtherPredicates = [HasFlatAddressSpace]
1340
-
1341
-
1342
1230
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1343
1231
def : FlatLoadSignedPat <inst, node, vt> {
1344
1232
let AddedComplexity = 10;
@@ -1467,6 +1355,117 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
1467
1355
}
1468
1356
}
1469
1357
1358
+ let OtherPredicates = [HasFlatAddressSpace] in {
1359
+
1360
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1361
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1362
+ def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1363
+ def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1364
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1365
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1366
+ def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1367
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1368
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1369
+ def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1370
+ def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1371
+ def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1372
+ def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1373
+ def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1374
+ def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1375
+
1376
+ def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1377
+ def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1378
+
1379
+ def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1380
+ def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1381
+
1382
+ foreach vt = Reg32Types.types in {
1383
+ def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1384
+ def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1385
+ }
1386
+
1387
+ foreach vt = VReg_64.RegTypes in {
1388
+ def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1389
+ def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1390
+ }
1391
+
1392
+ def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1393
+
1394
+ foreach vt = VReg_128.RegTypes in {
1395
+ def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1396
+ def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1397
+ }
1398
+
1399
+ def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1400
+ def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1401
+ def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1402
+ def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1403
+ def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1404
+ def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1405
+
1406
+ foreach as = [ "flat", "global" ] in {
1407
+ defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1408
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1409
+ defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1410
+ defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1411
+ defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1412
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1413
+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1414
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1415
+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1416
+ defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1417
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1418
+ defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1419
+ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1420
+
1421
+ defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1422
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1423
+ defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1424
+ defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1425
+ defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1426
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1427
+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1428
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1429
+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1430
+ defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1431
+ defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1432
+ defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1433
+ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1434
+ } // end foreach as
1435
+
1436
+ let SubtargetPredicate = isGFX12Plus in {
1437
+ defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1438
+
1439
+ let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1440
+ defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1441
+ }
1442
+
1443
+ def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1444
+ def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1445
+
1446
+ let OtherPredicates = [HasD16LoadStore] in {
1447
+ def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1448
+ def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1449
+ }
1450
+
1451
+ let OtherPredicates = [D16PreservesUnusedBits] in {
1452
+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1453
+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1454
+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1455
+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1456
+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1457
+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1458
+
1459
+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1460
+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1461
+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1462
+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1463
+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1464
+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1465
+ }
1466
+
1467
+ } // End OtherPredicates = [HasFlatAddressSpace]
1468
+
1470
1469
let OtherPredicates = [HasFlatGlobalInsts] in {
1471
1470
1472
1471
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
0 commit comments