Skip to content

Commit 165ae72

Browse files
committed
[AMDGPU] Remove atomic pattern args in FLAT_[Global_]Atomic_Pseudo defs
We already have explicit patterns for these. Differential Revision: https://reviews.llvm.org/D124084
1 parent f935908 commit 165ae72

File tree

1 file changed

+64
-75
lines changed

1 file changed

+64
-75
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 64 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,6 @@ multiclass FLAT_Atomic_Pseudo<
464464
string opName,
465465
RegisterClass vdst_rc,
466466
ValueType vt,
467-
SDPatternOperator atomic = null_frag,
468467
ValueType data_vt = vt,
469468
RegisterClass data_rc = vdst_rc,
470469
bit isFP = isFloatType<data_vt>.ret,
@@ -483,11 +482,9 @@ multiclass FLAT_Atomic_Pseudo<
483482
def _RTN : FLAT_AtomicRet_Pseudo <opName,
484483
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
485484
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
486-
" $vdst, $vaddr, $vdata$offset$cpol",
487-
[(set vt:$vdst,
488-
(atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
489-
GlobalSaddrTable<0, opName#"_rtn">,
490-
AtomicNoRet <opName, 1>{
485+
" $vdst, $vaddr, $vdata$offset$cpol">,
486+
GlobalSaddrTable<0, opName#"_rtn">,
487+
AtomicNoRet <opName, 1> {
491488
let FPAtomic = isFP;
492489
let AddedComplexity = -1; // Prefer global atomics if available
493490
}
@@ -530,7 +527,6 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
530527
string opName,
531528
RegisterClass vdst_rc,
532529
ValueType vt,
533-
SDPatternOperator atomic = null_frag,
534530
ValueType data_vt = vt,
535531
RegisterClass data_rc = vdst_rc,
536532
bit isFP = isFloatType<data_vt>.ret,
@@ -540,11 +536,9 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
540536
def _RTN : FLAT_AtomicRet_Pseudo <opName,
541537
(outs vdst_op:$vdst),
542538
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
543-
" $vdst, $vaddr, $vdata, off$offset$cpol",
544-
[(set vt:$vdst,
545-
(atomic (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
546-
GlobalSaddrTable<0, opName#"_rtn">,
547-
AtomicNoRet <opName, 1> {
539+
" $vdst, $vaddr, $vdata, off$offset$cpol">,
540+
GlobalSaddrTable<0, opName#"_rtn">,
541+
AtomicNoRet <opName, 1> {
548542
let has_saddr = 1;
549543
let FPAtomic = isFP;
550544
}
@@ -566,12 +560,11 @@ multiclass FLAT_Global_Atomic_Pseudo<
566560
string opName,
567561
RegisterClass vdst_rc,
568562
ValueType vt,
569-
SDPatternOperator atomic_rtn = null_frag,
570563
ValueType data_vt = vt,
571564
RegisterClass data_rc = vdst_rc> {
572565
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
573566
defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
574-
defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
567+
defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
575568
}
576569
}
577570

@@ -608,93 +601,91 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
608601
}
609602

610603
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
611-
VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
612-
v2i32, VReg_64>;
604+
VGPR_32, i32, v2i32, VReg_64>;
613605

614606
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
615-
VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
616-
v2i64, VReg_128>;
607+
VReg_64, i64, v2i64, VReg_128>;
617608

618609
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
619-
VGPR_32, i32, atomic_swap_flat_32>;
610+
VGPR_32, i32>;
620611

621612
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
622-
VReg_64, i64, atomic_swap_flat_64>;
613+
VReg_64, i64>;
623614

624615
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
625-
VGPR_32, i32, atomic_load_add_flat_32>;
616+
VGPR_32, i32>;
626617

627618
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
628-
VGPR_32, i32, atomic_load_sub_flat_32>;
619+
VGPR_32, i32>;
629620

630621
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
631-
VGPR_32, i32, atomic_load_min_flat_32>;
622+
VGPR_32, i32>;
632623

633624
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
634-
VGPR_32, i32, atomic_load_umin_flat_32>;
625+
VGPR_32, i32>;
635626

636627
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
637-
VGPR_32, i32, atomic_load_max_flat_32>;
628+
VGPR_32, i32>;
638629

639630
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
640-
VGPR_32, i32, atomic_load_umax_flat_32>;
631+
VGPR_32, i32>;
641632

642633
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
643-
VGPR_32, i32, atomic_load_and_flat_32>;
634+
VGPR_32, i32>;
644635

645636
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
646-
VGPR_32, i32, atomic_load_or_flat_32>;
637+
VGPR_32, i32>;
647638

648639
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
649-
VGPR_32, i32, atomic_load_xor_flat_32>;
640+
VGPR_32, i32>;
650641

651642
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
652-
VGPR_32, i32, atomic_inc_flat_32>;
643+
VGPR_32, i32>;
653644

654645
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
655-
VGPR_32, i32, atomic_dec_flat_32>;
646+
VGPR_32, i32>;
656647

657648
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
658-
VReg_64, i64, atomic_load_add_flat_64>;
649+
VReg_64, i64>;
659650

660651
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
661-
VReg_64, i64, atomic_load_sub_flat_64>;
652+
VReg_64, i64>;
662653

663654
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
664-
VReg_64, i64, atomic_load_min_flat_64>;
655+
VReg_64, i64>;
665656

666657
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
667-
VReg_64, i64, atomic_load_umin_flat_64>;
658+
VReg_64, i64>;
668659

669660
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
670-
VReg_64, i64, atomic_load_max_flat_64>;
661+
VReg_64, i64>;
671662

672663
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
673-
VReg_64, i64, atomic_load_umax_flat_64>;
664+
VReg_64, i64>;
674665

675666
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
676-
VReg_64, i64, atomic_load_and_flat_64>;
667+
VReg_64, i64>;
677668

678669
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
679-
VReg_64, i64, atomic_load_or_flat_64>;
670+
VReg_64, i64>;
680671

681672
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
682-
VReg_64, i64, atomic_load_xor_flat_64>;
673+
VReg_64, i64>;
683674

684675
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
685-
VReg_64, i64, atomic_inc_flat_64>;
676+
VReg_64, i64>;
686677

687678
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
688-
VReg_64, i64, atomic_dec_flat_64>;
679+
VReg_64, i64>;
689680

690681
// GFX7-, GFX10-only flat instructions.
691682
let SubtargetPredicate = isGFX7GFX10 in {
692683

693684
defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
694-
VGPR_32, f32, null_frag, v2f32, VReg_64>;
685+
VGPR_32, f32, v2f32, VReg_64>;
695686

696687
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
697-
VReg_64, f64, null_frag, v2f64, VReg_128>;
688+
VReg_64, f64, v2f64, VReg_128>;
698689

699690
defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
700691
VGPR_32, f32>;
@@ -758,88 +749,86 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
758749

759750
let is_flat_global = 1 in {
760751
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
761-
VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
762-
v2i32, VReg_64>;
752+
VGPR_32, i32, v2i32, VReg_64>;
763753

764754
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
765-
VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
766-
v2i64, VReg_128>;
755+
VReg_64, i64, v2i64, VReg_128>;
767756

768757
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
769-
VGPR_32, i32, atomic_swap_global_32>;
758+
VGPR_32, i32>;
770759

771760
defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
772-
VReg_64, i64, atomic_swap_global_64>;
761+
VReg_64, i64>;
773762

774763
defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
775-
VGPR_32, i32, atomic_load_add_global_32>;
764+
VGPR_32, i32>;
776765

777766
defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
778-
VGPR_32, i32, atomic_load_sub_global_32>;
767+
VGPR_32, i32>;
779768

780769
defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
781-
VGPR_32, i32, atomic_load_min_global_32>;
770+
VGPR_32, i32>;
782771

783772
defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
784-
VGPR_32, i32, atomic_load_umin_global_32>;
773+
VGPR_32, i32>;
785774

786775
defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
787-
VGPR_32, i32, atomic_load_max_global_32>;
776+
VGPR_32, i32>;
788777

789778
defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
790-
VGPR_32, i32, atomic_load_umax_global_32>;
779+
VGPR_32, i32>;
791780

792781
defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
793-
VGPR_32, i32, atomic_load_and_global_32>;
782+
VGPR_32, i32>;
794783

795784
defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
796-
VGPR_32, i32, atomic_load_or_global_32>;
785+
VGPR_32, i32>;
797786

798787
defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
799-
VGPR_32, i32, atomic_load_xor_global_32>;
788+
VGPR_32, i32>;
800789

801790
defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
802-
VGPR_32, i32, atomic_inc_global_32>;
791+
VGPR_32, i32>;
803792

804793
defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
805-
VGPR_32, i32, atomic_dec_global_32>;
794+
VGPR_32, i32>;
806795

807796
defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
808-
VReg_64, i64, atomic_load_add_global_64>;
797+
VReg_64, i64>;
809798

810799
defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
811-
VReg_64, i64, atomic_load_sub_global_64>;
800+
VReg_64, i64>;
812801

813802
defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
814-
VReg_64, i64, atomic_load_min_global_64>;
803+
VReg_64, i64>;
815804

816805
defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
817-
VReg_64, i64, atomic_load_umin_global_64>;
806+
VReg_64, i64>;
818807

819808
defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
820-
VReg_64, i64, atomic_load_max_global_64>;
809+
VReg_64, i64>;
821810

822811
defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
823-
VReg_64, i64, atomic_load_umax_global_64>;
812+
VReg_64, i64>;
824813

825814
defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
826-
VReg_64, i64, atomic_load_and_global_64>;
815+
VReg_64, i64>;
827816

828817
defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
829-
VReg_64, i64, atomic_load_or_global_64>;
818+
VReg_64, i64>;
830819

831820
defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
832-
VReg_64, i64, atomic_load_xor_global_64>;
821+
VReg_64, i64>;
833822

834823
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
835-
VReg_64, i64, atomic_inc_global_64>;
824+
VReg_64, i64>;
836825

837826
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
838-
VReg_64, i64, atomic_dec_global_64>;
827+
VReg_64, i64>;
839828

840829
let SubtargetPredicate = HasGFX10_BEncoding in
841830
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
842-
VGPR_32, i32, int_amdgcn_global_atomic_csub>;
831+
VGPR_32, i32>;
843832

844833
let SubtargetPredicate = isGFX940Plus in {
845834

@@ -895,13 +884,13 @@ defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_d
895884

896885
let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
897886
defm GLOBAL_ATOMIC_FCMPSWAP :
898-
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
887+
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
899888
defm GLOBAL_ATOMIC_FMIN :
900889
FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
901890
defm GLOBAL_ATOMIC_FMAX :
902891
FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
903892
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
904-
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
893+
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
905894
defm GLOBAL_ATOMIC_FMIN_X2 :
906895
FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
907896
defm GLOBAL_ATOMIC_FMAX_X2 :

0 commit comments

Comments
 (0)