Skip to content

Commit c8c3b8b

Browse files
authored
[AMDGPU] Move flat patterns instantiation down. NFC. (#94409)
I want to reuse some of the global patterns for the flat, so move instantiation past the declarations.
1 parent 37cf047 commit c8c3b8b

File tree

1 file changed

+111
-112
lines changed

1 file changed

+111
-112
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 111 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,118 +1227,6 @@ class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
12271227
(inst $vaddr, $saddr, $offset, 0, $in)
12281228
>;
12291229

1230-
let OtherPredicates = [HasFlatAddressSpace] in {
1231-
1232-
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1233-
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1234-
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1235-
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1236-
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1237-
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1238-
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1239-
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1240-
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1241-
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1242-
def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1243-
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1244-
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1245-
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1246-
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1247-
1248-
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1249-
def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1250-
1251-
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1252-
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1253-
1254-
foreach vt = Reg32Types.types in {
1255-
def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1256-
def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1257-
}
1258-
1259-
foreach vt = VReg_64.RegTypes in {
1260-
def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1261-
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1262-
}
1263-
1264-
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1265-
1266-
foreach vt = VReg_128.RegTypes in {
1267-
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1268-
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1269-
}
1270-
1271-
def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1272-
def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1273-
def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1274-
def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1275-
def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1276-
def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1277-
1278-
foreach as = [ "flat", "global" ] in {
1279-
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1280-
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1281-
defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1282-
defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1283-
defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1284-
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1285-
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1286-
defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1287-
defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1288-
defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1289-
defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1290-
defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1291-
defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1292-
1293-
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1294-
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1295-
defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1296-
defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1297-
defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1298-
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1299-
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1300-
defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1301-
defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1302-
defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1303-
defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1304-
defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1305-
defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1306-
} // end foreach as
1307-
1308-
let SubtargetPredicate = isGFX12Plus in {
1309-
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1310-
1311-
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1312-
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1313-
}
1314-
1315-
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1316-
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1317-
1318-
let OtherPredicates = [HasD16LoadStore] in {
1319-
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1320-
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1321-
}
1322-
1323-
let OtherPredicates = [D16PreservesUnusedBits] in {
1324-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1325-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1326-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1327-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1328-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1329-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1330-
1331-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1332-
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1333-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1334-
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1335-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1336-
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1337-
}
1338-
1339-
} // End OtherPredicates = [HasFlatAddressSpace]
1340-
1341-
13421230
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
13431231
def : FlatLoadSignedPat <inst, node, vt> {
13441232
let AddedComplexity = 10;
@@ -1467,6 +1355,117 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
14671355
}
14681356
}
14691357

1358+
let OtherPredicates = [HasFlatAddressSpace] in {
1359+
1360+
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1361+
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1362+
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1363+
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1364+
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1365+
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1366+
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1367+
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1368+
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1369+
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1370+
def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1371+
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1372+
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1373+
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1374+
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1375+
1376+
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1377+
def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1378+
1379+
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1380+
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1381+
1382+
foreach vt = Reg32Types.types in {
1383+
def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1384+
def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1385+
}
1386+
1387+
foreach vt = VReg_64.RegTypes in {
1388+
def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1389+
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1390+
}
1391+
1392+
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1393+
1394+
foreach vt = VReg_128.RegTypes in {
1395+
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1396+
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1397+
}
1398+
1399+
def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1400+
def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1401+
def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1402+
def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1403+
def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1404+
def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1405+
1406+
foreach as = [ "flat", "global" ] in {
1407+
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1408+
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1409+
defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1410+
defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1411+
defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1412+
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1413+
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1414+
defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1415+
defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1416+
defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1417+
defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1418+
defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1419+
defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1420+
1421+
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1422+
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1423+
defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1424+
defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1425+
defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1426+
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1427+
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1428+
defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1429+
defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1430+
defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1431+
defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1432+
defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1433+
defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1434+
} // end foreach as
1435+
1436+
let SubtargetPredicate = isGFX12Plus in {
1437+
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1438+
1439+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1440+
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1441+
}
1442+
1443+
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1444+
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1445+
1446+
let OtherPredicates = [HasD16LoadStore] in {
1447+
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1448+
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1449+
}
1450+
1451+
let OtherPredicates = [D16PreservesUnusedBits] in {
1452+
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1453+
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1454+
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1455+
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1456+
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1457+
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1458+
1459+
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1460+
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1461+
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1462+
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1463+
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1464+
def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1465+
}
1466+
1467+
} // End OtherPredicates = [HasFlatAddressSpace]
1468+
14701469
let OtherPredicates = [HasFlatGlobalInsts] in {
14711470

14721471
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;

0 commit comments

Comments
 (0)