@@ -711,17 +711,17 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
711
711
} // End SubtargetPredicate = isGFX7GFX10
712
712
713
713
let SubtargetPredicate = isGFX90APlus in {
714
- defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64, int_amdgcn_flat_atomic_fadd >;
715
- defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmin >;
716
- defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmax >;
717
- defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd >;
718
- defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin >;
719
- defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax >;
714
+ defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
715
+ defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
716
+ defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
717
+ defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
718
+ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
719
+ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
720
720
} // End SubtargetPredicate = isGFX90APlus
721
721
722
722
let SubtargetPredicate = isGFX940Plus in {
723
- defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32, int_amdgcn_flat_atomic_fadd >;
724
- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16, int_amdgcn_flat_atomic_fadd >;
723
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>;
724
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
725
725
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>;
726
726
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>;
727
727
} // End SubtargetPredicate = isGFX940Plus
@@ -897,15 +897,15 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
897
897
defm GLOBAL_ATOMIC_FCMPSWAP :
898
898
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
899
899
defm GLOBAL_ATOMIC_FMIN :
900
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin >;
900
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
901
901
defm GLOBAL_ATOMIC_FMAX :
902
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax >;
902
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
903
903
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
904
904
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
905
905
defm GLOBAL_ATOMIC_FMIN_X2 :
906
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin >;
906
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
907
907
defm GLOBAL_ATOMIC_FMAX_X2 :
908
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64, int_amdgcn_global_atomic_fmax >;
908
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
909
909
} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
910
910
911
911
let is_flat_global = 1 in {
@@ -920,10 +920,10 @@ let OtherPredicates = [HasAtomicFaddInsts] in {
920
920
921
921
let OtherPredicates = [isGFX90APlus] in {
922
922
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
923
- "global_atomic_add_f32", VGPR_32, f32, int_amdgcn_global_atomic_fadd
923
+ "global_atomic_add_f32", VGPR_32, f32
924
924
>;
925
925
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
926
- "global_atomic_pk_add_f16", VGPR_32, v2f16, int_amdgcn_global_atomic_fadd
926
+ "global_atomic_pk_add_f16", VGPR_32, v2f16
927
927
>;
928
928
} // End OtherPredicates = [isGFX90APlus]
929
929
} // End is_flat_global = 1
@@ -1029,13 +1029,30 @@ multiclass FlatAtomicPat <string inst, string node, ValueType vt,
1029
1029
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1030
1030
}
1031
1031
1032
+ multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
1033
+ ValueType data_vt = vt, bit isIntr = 0> {
1034
+ defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size));
1035
+ defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
1036
+
1037
+ def : GCNPat <(vt (rtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
1038
+ (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1039
+
1040
+ def : GCNPat <(vt (noRtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
1041
+ (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1042
+ }
1043
+
1044
+ multiclass FlatSignedIntrPat <string inst, string node, ValueType vt,
1045
+ ValueType data_vt = vt> {
1046
+ defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1047
+ }
1048
+
1032
1049
class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1033
1050
(node (GlobalOffset i64:$vaddr, i16:$offset), vt:$data),
1034
1051
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1035
1052
>;
1036
1053
1037
- class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
1038
- ValueType data_vt = vt> : GCNPat <
1054
+ class FlatSignedAtomicPatRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
1055
+ ValueType data_vt = vt> : GCNPat <
1039
1056
(vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
1040
1057
(inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1041
1058
>;
@@ -1237,7 +1254,7 @@ multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, V
1237
1254
1238
1255
multiclass GlobalFLATAtomicPatsRtn<string nortn_inst_name, SDPatternOperator node,
1239
1256
ValueType vt, ValueType data_vt = vt> {
1240
- def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> {
1257
+ def : FlatSignedAtomicPatRtn <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> {
1241
1258
let AddedComplexity = 10;
1242
1259
}
1243
1260
@@ -1247,13 +1264,12 @@ multiclass GlobalFLATAtomicPatsRtn<string nortn_inst_name, SDPatternOperator nod
1247
1264
}
1248
1265
1249
1266
multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
1250
- ValueType data_vt = vt> {
1251
- defvar rtnNode = !cast<PatFrags>(node#"_ret_"# vt.Size);
1252
- defvar noRtnNode = !cast<PatFrags>(node#"_noret_"# vt.Size);
1267
+ ValueType data_vt = vt, bit isIntr = 0 > {
1268
+ defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size) );
1269
+ defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size) );
1253
1270
1254
1271
let AddedComplexity = 10 in {
1255
- def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1256
- def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1272
+ defm : FlatSignedAtomicPat <inst, node, vt, data_vt, isIntr>;
1257
1273
}
1258
1274
1259
1275
let AddedComplexity = 11 in {
@@ -1262,6 +1278,11 @@ multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
1262
1278
}
1263
1279
}
1264
1280
1281
+ multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
1282
+ ValueType data_vt = vt> {
1283
+ defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
1284
+ }
1285
+
1265
1286
multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node,
1266
1287
ValueType vt> {
1267
1288
def : FlatSignedAtomicPatNoRtn <inst, node, vt> {
@@ -1427,6 +1448,10 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f3
1427
1448
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1428
1449
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
1429
1450
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
1451
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
1452
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1453
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>;
1454
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
1430
1455
}
1431
1456
1432
1457
let OtherPredicates = [HasAtomicFaddInsts] in {
@@ -1440,19 +1465,26 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16
1440
1465
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1441
1466
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1442
1467
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1443
- def : FlatSignedAtomicPat <FLAT_ATOMIC_ADD_F64_RTN, atomic_load_fadd_flat_ret_64, f64>;
1444
- def : FlatSignedAtomicPat <FLAT_ATOMIC_ADD_F64, atomic_load_fadd_flat_noret_64, f64>;
1445
- def : FlatSignedAtomicPat <FLAT_ATOMIC_MIN_F64_RTN, atomic_load_fmin_flat_ret_64, f64>;
1446
- def : FlatSignedAtomicPat <FLAT_ATOMIC_MIN_F64, atomic_load_fmin_flat_noret_64, f64>;
1447
- def : FlatSignedAtomicPat <FLAT_ATOMIC_MAX_F64_RTN, atomic_load_fmax_flat_ret_64, f64>;
1448
- def : FlatSignedAtomicPat <FLAT_ATOMIC_MAX_F64, atomic_load_fmax_flat_noret_64, f64>;
1468
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", f32>;
1469
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", f64>;
1470
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", v2f16>;
1471
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1472
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1473
+ defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1474
+ defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1475
+ defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1476
+ defm : FlatSignedIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
1477
+ defm : FlatSignedIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1478
+ defm : FlatSignedIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1449
1479
}
1450
1480
1451
1481
let OtherPredicates = [isGFX940Plus] in {
1452
- def : FlatSignedAtomicPat <FLAT_ATOMIC_ADD_F32_RTN, atomic_load_fadd_flat_32, f32>;
1453
- def : FlatSignedAtomicPat <FLAT_ATOMIC_PK_ADD_F16_RTN, atomic_load_fadd_v2f16_flat_32, v2f16>;
1454
- def : FlatSignedAtomicPat <FLAT_ATOMIC_PK_ADD_BF16_RTN, int_amdgcn_flat_atomic_fadd_v2bf16, v2i16>;
1455
- defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_PK_ADD_BF16", int_amdgcn_global_atomic_fadd_v2bf16, v2i16>;
1482
+ defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1483
+ defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_flat", v2f16>;
1484
+ defm : FlatSignedIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;
1485
+ defm : FlatSignedIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>;
1486
+ defm : FlatSignedIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
1487
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
1456
1488
}
1457
1489
1458
1490
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
0 commit comments