@@ -464,7 +464,6 @@ multiclass FLAT_Atomic_Pseudo<
464
464
string opName,
465
465
RegisterClass vdst_rc,
466
466
ValueType vt,
467
- SDPatternOperator atomic = null_frag,
468
467
ValueType data_vt = vt,
469
468
RegisterClass data_rc = vdst_rc,
470
469
bit isFP = isFloatType<data_vt>.ret,
@@ -483,11 +482,9 @@ multiclass FLAT_Atomic_Pseudo<
483
482
def _RTN : FLAT_AtomicRet_Pseudo <opName,
484
483
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
485
484
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
486
- " $vdst, $vaddr, $vdata$offset$cpol",
487
- [(set vt:$vdst,
488
- (atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
489
- GlobalSaddrTable<0, opName#"_rtn">,
490
- AtomicNoRet <opName, 1>{
485
+ " $vdst, $vaddr, $vdata$offset$cpol">,
486
+ GlobalSaddrTable<0, opName#"_rtn">,
487
+ AtomicNoRet <opName, 1> {
491
488
let FPAtomic = isFP;
492
489
let AddedComplexity = -1; // Prefer global atomics if available
493
490
}
@@ -530,7 +527,6 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
530
527
string opName,
531
528
RegisterClass vdst_rc,
532
529
ValueType vt,
533
- SDPatternOperator atomic = null_frag,
534
530
ValueType data_vt = vt,
535
531
RegisterClass data_rc = vdst_rc,
536
532
bit isFP = isFloatType<data_vt>.ret,
@@ -540,11 +536,9 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
540
536
def _RTN : FLAT_AtomicRet_Pseudo <opName,
541
537
(outs vdst_op:$vdst),
542
538
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
543
- " $vdst, $vaddr, $vdata, off$offset$cpol",
544
- [(set vt:$vdst,
545
- (atomic (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
546
- GlobalSaddrTable<0, opName#"_rtn">,
547
- AtomicNoRet <opName, 1> {
539
+ " $vdst, $vaddr, $vdata, off$offset$cpol">,
540
+ GlobalSaddrTable<0, opName#"_rtn">,
541
+ AtomicNoRet <opName, 1> {
548
542
let has_saddr = 1;
549
543
let FPAtomic = isFP;
550
544
}
@@ -566,12 +560,11 @@ multiclass FLAT_Global_Atomic_Pseudo<
566
560
string opName,
567
561
RegisterClass vdst_rc,
568
562
ValueType vt,
569
- SDPatternOperator atomic_rtn = null_frag,
570
563
ValueType data_vt = vt,
571
564
RegisterClass data_rc = vdst_rc> {
572
565
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
573
566
defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
574
- defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
567
+ defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
575
568
}
576
569
}
577
570
@@ -608,93 +601,91 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
608
601
}
609
602
610
603
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
611
- VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
612
- v2i32, VReg_64>;
604
+ VGPR_32, i32, v2i32, VReg_64>;
613
605
614
606
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
615
- VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
616
- v2i64, VReg_128>;
607
+ VReg_64, i64, v2i64, VReg_128>;
617
608
618
609
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
619
- VGPR_32, i32, atomic_swap_flat_32 >;
610
+ VGPR_32, i32>;
620
611
621
612
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
622
- VReg_64, i64, atomic_swap_flat_64 >;
613
+ VReg_64, i64>;
623
614
624
615
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
625
- VGPR_32, i32, atomic_load_add_flat_32 >;
616
+ VGPR_32, i32>;
626
617
627
618
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
628
- VGPR_32, i32, atomic_load_sub_flat_32 >;
619
+ VGPR_32, i32>;
629
620
630
621
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
631
- VGPR_32, i32, atomic_load_min_flat_32 >;
622
+ VGPR_32, i32>;
632
623
633
624
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
634
- VGPR_32, i32, atomic_load_umin_flat_32 >;
625
+ VGPR_32, i32>;
635
626
636
627
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
637
- VGPR_32, i32, atomic_load_max_flat_32 >;
628
+ VGPR_32, i32>;
638
629
639
630
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
640
- VGPR_32, i32, atomic_load_umax_flat_32 >;
631
+ VGPR_32, i32>;
641
632
642
633
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
643
- VGPR_32, i32, atomic_load_and_flat_32 >;
634
+ VGPR_32, i32>;
644
635
645
636
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
646
- VGPR_32, i32, atomic_load_or_flat_32 >;
637
+ VGPR_32, i32>;
647
638
648
639
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
649
- VGPR_32, i32, atomic_load_xor_flat_32 >;
640
+ VGPR_32, i32>;
650
641
651
642
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
652
- VGPR_32, i32, atomic_inc_flat_32 >;
643
+ VGPR_32, i32>;
653
644
654
645
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
655
- VGPR_32, i32, atomic_dec_flat_32 >;
646
+ VGPR_32, i32>;
656
647
657
648
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
658
- VReg_64, i64, atomic_load_add_flat_64 >;
649
+ VReg_64, i64>;
659
650
660
651
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
661
- VReg_64, i64, atomic_load_sub_flat_64 >;
652
+ VReg_64, i64>;
662
653
663
654
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
664
- VReg_64, i64, atomic_load_min_flat_64 >;
655
+ VReg_64, i64>;
665
656
666
657
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
667
- VReg_64, i64, atomic_load_umin_flat_64 >;
658
+ VReg_64, i64>;
668
659
669
660
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
670
- VReg_64, i64, atomic_load_max_flat_64 >;
661
+ VReg_64, i64>;
671
662
672
663
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
673
- VReg_64, i64, atomic_load_umax_flat_64 >;
664
+ VReg_64, i64>;
674
665
675
666
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
676
- VReg_64, i64, atomic_load_and_flat_64 >;
667
+ VReg_64, i64>;
677
668
678
669
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
679
- VReg_64, i64, atomic_load_or_flat_64 >;
670
+ VReg_64, i64>;
680
671
681
672
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
682
- VReg_64, i64, atomic_load_xor_flat_64 >;
673
+ VReg_64, i64>;
683
674
684
675
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
685
- VReg_64, i64, atomic_inc_flat_64 >;
676
+ VReg_64, i64>;
686
677
687
678
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
688
- VReg_64, i64, atomic_dec_flat_64 >;
679
+ VReg_64, i64>;
689
680
690
681
// GFX7-, GFX10-only flat instructions.
691
682
let SubtargetPredicate = isGFX7GFX10 in {
692
683
693
684
defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
694
- VGPR_32, f32, null_frag, v2f32, VReg_64>;
685
+ VGPR_32, f32, v2f32, VReg_64>;
695
686
696
687
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
697
- VReg_64, f64, null_frag, v2f64, VReg_128>;
688
+ VReg_64, f64, v2f64, VReg_128>;
698
689
699
690
defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
700
691
VGPR_32, f32>;
@@ -758,88 +749,86 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
758
749
759
750
let is_flat_global = 1 in {
760
751
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
761
- VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
762
- v2i32, VReg_64>;
752
+ VGPR_32, i32, v2i32, VReg_64>;
763
753
764
754
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
765
- VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
766
- v2i64, VReg_128>;
755
+ VReg_64, i64, v2i64, VReg_128>;
767
756
768
757
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
769
- VGPR_32, i32, atomic_swap_global_32 >;
758
+ VGPR_32, i32>;
770
759
771
760
defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
772
- VReg_64, i64, atomic_swap_global_64 >;
761
+ VReg_64, i64>;
773
762
774
763
defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
775
- VGPR_32, i32, atomic_load_add_global_32 >;
764
+ VGPR_32, i32>;
776
765
777
766
defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
778
- VGPR_32, i32, atomic_load_sub_global_32 >;
767
+ VGPR_32, i32>;
779
768
780
769
defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
781
- VGPR_32, i32, atomic_load_min_global_32 >;
770
+ VGPR_32, i32>;
782
771
783
772
defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
784
- VGPR_32, i32, atomic_load_umin_global_32 >;
773
+ VGPR_32, i32>;
785
774
786
775
defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
787
- VGPR_32, i32, atomic_load_max_global_32 >;
776
+ VGPR_32, i32>;
788
777
789
778
defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
790
- VGPR_32, i32, atomic_load_umax_global_32 >;
779
+ VGPR_32, i32>;
791
780
792
781
defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
793
- VGPR_32, i32, atomic_load_and_global_32 >;
782
+ VGPR_32, i32>;
794
783
795
784
defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
796
- VGPR_32, i32, atomic_load_or_global_32 >;
785
+ VGPR_32, i32>;
797
786
798
787
defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
799
- VGPR_32, i32, atomic_load_xor_global_32 >;
788
+ VGPR_32, i32>;
800
789
801
790
defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
802
- VGPR_32, i32, atomic_inc_global_32 >;
791
+ VGPR_32, i32>;
803
792
804
793
defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
805
- VGPR_32, i32, atomic_dec_global_32 >;
794
+ VGPR_32, i32>;
806
795
807
796
defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
808
- VReg_64, i64, atomic_load_add_global_64 >;
797
+ VReg_64, i64>;
809
798
810
799
defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
811
- VReg_64, i64, atomic_load_sub_global_64 >;
800
+ VReg_64, i64>;
812
801
813
802
defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
814
- VReg_64, i64, atomic_load_min_global_64 >;
803
+ VReg_64, i64>;
815
804
816
805
defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
817
- VReg_64, i64, atomic_load_umin_global_64 >;
806
+ VReg_64, i64>;
818
807
819
808
defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
820
- VReg_64, i64, atomic_load_max_global_64 >;
809
+ VReg_64, i64>;
821
810
822
811
defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
823
- VReg_64, i64, atomic_load_umax_global_64 >;
812
+ VReg_64, i64>;
824
813
825
814
defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
826
- VReg_64, i64, atomic_load_and_global_64 >;
815
+ VReg_64, i64>;
827
816
828
817
defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
829
- VReg_64, i64, atomic_load_or_global_64 >;
818
+ VReg_64, i64>;
830
819
831
820
defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
832
- VReg_64, i64, atomic_load_xor_global_64 >;
821
+ VReg_64, i64>;
833
822
834
823
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
835
- VReg_64, i64, atomic_inc_global_64 >;
824
+ VReg_64, i64>;
836
825
837
826
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
838
- VReg_64, i64, atomic_dec_global_64 >;
827
+ VReg_64, i64>;
839
828
840
829
let SubtargetPredicate = HasGFX10_BEncoding in
841
830
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
842
- VGPR_32, i32, int_amdgcn_global_atomic_csub >;
831
+ VGPR_32, i32>;
843
832
844
833
let SubtargetPredicate = isGFX940Plus in {
845
834
@@ -895,13 +884,13 @@ defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_d
895
884
896
885
let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
897
886
defm GLOBAL_ATOMIC_FCMPSWAP :
898
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
887
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
899
888
defm GLOBAL_ATOMIC_FMIN :
900
889
FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
901
890
defm GLOBAL_ATOMIC_FMAX :
902
891
FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
903
892
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
904
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
893
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
905
894
defm GLOBAL_ATOMIC_FMIN_X2 :
906
895
FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
907
896
defm GLOBAL_ATOMIC_FMAX_X2 :
0 commit comments