Skip to content

Commit eeac0ff

Browse files
committed
Revert "[MachineLICM] Use RegisterClassInfo::getRegPressureSetLimit (llvm#119826)"
This reverts commit b4e17d4. This causes a large compile-time regression.
1 parent e0f14e1 commit eeac0ff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+32037
-32470
lines changed

llvm/lib/CodeGen/MachineLICM.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ namespace {
124124
const TargetRegisterInfo *TRI = nullptr;
125125
const MachineFrameInfo *MFI = nullptr;
126126
MachineRegisterInfo *MRI = nullptr;
127-
RegisterClassInfo RegClassInfo;
128127
TargetSchedModel SchedModel;
129128
bool PreRegAlloc = false;
130129
bool HasProfileData = false;
@@ -393,7 +392,6 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
393392
MFI = &MF.getFrameInfo();
394393
MRI = &MF.getRegInfo();
395394
SchedModel.init(&ST);
396-
RegClassInfo.runOnMachineFunction(MF);
397395

398396
HasProfileData = MF.getFunction().hasProfileData();
399397

@@ -410,7 +408,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
410408
std::fill(RegPressure.begin(), RegPressure.end(), 0);
411409
RegLimit.resize(NumRPS);
412410
for (unsigned i = 0, e = NumRPS; i != e; ++i)
413-
RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i);
411+
RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
414412
}
415413

416414
if (HoistConstLoads)

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

Lines changed: 325 additions & 345 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll

Lines changed: 325 additions & 345 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -557,11 +557,11 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
557557
; GFX908-NEXT: s_mul_hi_u32 s9, s0, s7
558558
; GFX908-NEXT: s_mul_i32 s0, s0, s7
559559
; GFX908-NEXT: s_add_i32 s1, s9, s1
560-
; GFX908-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
560+
; GFX908-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
561561
; GFX908-NEXT: s_branch .LBB3_2
562562
; GFX908-NEXT: .LBB3_1: ; %Flow20
563563
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
564-
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[14:15]
564+
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[0:1]
565565
; GFX908-NEXT: s_cbranch_vccz .LBB3_12
566566
; GFX908-NEXT: .LBB3_2: ; %bb9
567567
; GFX908-NEXT: ; =>This Loop Header: Depth=1
@@ -571,15 +571,17 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
571571
; GFX908-NEXT: ; %bb.3: ; %bb14
572572
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
573573
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
574+
; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
574575
; GFX908-NEXT: s_mov_b32 s7, s6
576+
; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
575577
; GFX908-NEXT: v_mov_b32_e32 v4, s6
578+
; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v6
576579
; GFX908-NEXT: v_mov_b32_e32 v6, s6
577580
; GFX908-NEXT: v_mov_b32_e32 v9, s7
578581
; GFX908-NEXT: v_mov_b32_e32 v5, s7
579582
; GFX908-NEXT: v_mov_b32_e32 v7, s7
580583
; GFX908-NEXT: v_mov_b32_e32 v8, s6
581-
; GFX908-NEXT: v_cmp_lt_i64_e64 s[14:15], s[4:5], 0
582-
; GFX908-NEXT: v_cmp_gt_i64_e64 s[16:17], s[4:5], -1
584+
; GFX908-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
583585
; GFX908-NEXT: v_mov_b32_e32 v11, v5
584586
; GFX908-NEXT: s_mov_b64 s[18:19], s[10:11]
585587
; GFX908-NEXT: v_mov_b32_e32 v10, v4
@@ -599,9 +601,9 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
599601
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
600602
; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
601603
; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
602-
; GFX908-NEXT: s_add_u32 s18, s18, s0
604+
; GFX908-NEXT: s_add_u32 s18, s18, s14
603605
; GFX908-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[2:3]
604-
; GFX908-NEXT: s_addc_u32 s19, s19, s1
606+
; GFX908-NEXT: s_addc_u32 s19, s19, s15
605607
; GFX908-NEXT: s_mov_b64 s[20:21], 0
606608
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
607609
; GFX908-NEXT: s_cbranch_vccz .LBB3_9
@@ -620,7 +622,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
620622
; GFX908-NEXT: s_waitcnt vmcnt(0)
621623
; GFX908-NEXT: ds_read_b64 v[12:13], v19
622624
; GFX908-NEXT: ds_read_b64 v[14:15], v0
623-
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[16:17]
625+
; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1]
624626
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
625627
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
626628
; GFX908-NEXT: ; %bb.6: ; %bb51
@@ -648,7 +650,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
648650
; GFX908-NEXT: s_mov_b64 s[20:21], -1
649651
; GFX908-NEXT: s_branch .LBB3_4
650652
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
651-
; GFX908-NEXT: s_mov_b64 s[20:21], s[14:15]
653+
; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
652654
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
653655
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
654656
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
@@ -659,7 +661,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
659661
; GFX908-NEXT: s_xor_b64 s[16:17], s[20:21], -1
660662
; GFX908-NEXT: .LBB3_10: ; %Flow19
661663
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
662-
; GFX908-NEXT: s_mov_b64 s[14:15], -1
664+
; GFX908-NEXT: s_mov_b64 s[0:1], -1
663665
; GFX908-NEXT: s_and_b64 vcc, exec, s[16:17]
664666
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
665667
; GFX908-NEXT: ; %bb.11: ; %bb12
@@ -668,7 +670,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
668670
; GFX908-NEXT: s_addc_u32 s5, s5, 0
669671
; GFX908-NEXT: s_add_u32 s10, s10, s12
670672
; GFX908-NEXT: s_addc_u32 s11, s11, s13
671-
; GFX908-NEXT: s_mov_b64 s[14:15], 0
673+
; GFX908-NEXT: s_mov_b64 s[0:1], 0
672674
; GFX908-NEXT: s_branch .LBB3_1
673675
; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock
674676
; GFX908-NEXT: s_endpgm
@@ -718,11 +720,11 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
718720
; GFX90A-NEXT: s_mul_hi_u32 s9, s0, s7
719721
; GFX90A-NEXT: s_mul_i32 s0, s0, s7
720722
; GFX90A-NEXT: s_add_i32 s1, s9, s1
721-
; GFX90A-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
723+
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
722724
; GFX90A-NEXT: s_branch .LBB3_2
723725
; GFX90A-NEXT: .LBB3_1: ; %Flow20
724726
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
725-
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[14:15]
727+
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[0:1]
726728
; GFX90A-NEXT: s_cbranch_vccz .LBB3_12
727729
; GFX90A-NEXT: .LBB3_2: ; %bb9
728730
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
@@ -732,12 +734,14 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
732734
; GFX90A-NEXT: ; %bb.3: ; %bb14
733735
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
734736
; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
737+
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
735738
; GFX90A-NEXT: s_mov_b32 s7, s6
739+
; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1]
736740
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
741+
; GFX90A-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v8
737742
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[6:7], s[6:7] op_sel:[0,1]
738743
; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
739-
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[14:15], s[4:5], 0
740-
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[16:17], s[4:5], -1
744+
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
741745
; GFX90A-NEXT: s_mov_b64 s[18:19], s[10:11]
742746
; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1]
743747
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -756,8 +760,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
756760
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
757761
; GFX90A-NEXT: v_add_co_u32_sdwa v4, vcc, v4, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
758762
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
759-
; GFX90A-NEXT: s_add_u32 s18, s18, s0
760-
; GFX90A-NEXT: s_addc_u32 s19, s19, s1
763+
; GFX90A-NEXT: s_add_u32 s18, s18, s14
764+
; GFX90A-NEXT: s_addc_u32 s19, s19, s15
761765
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[4:5]
762766
; GFX90A-NEXT: s_mov_b64 s[20:21], 0
763767
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
@@ -777,7 +781,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
777781
; GFX90A-NEXT: s_waitcnt vmcnt(0)
778782
; GFX90A-NEXT: ds_read_b64 v[14:15], v19
779783
; GFX90A-NEXT: ds_read_b64 v[16:17], v0
780-
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[16:17]
784+
; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1]
781785
; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21
782786
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
783787
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
@@ -798,7 +802,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
798802
; GFX90A-NEXT: s_mov_b64 s[20:21], -1
799803
; GFX90A-NEXT: s_branch .LBB3_4
800804
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
801-
; GFX90A-NEXT: s_mov_b64 s[20:21], s[14:15]
805+
; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
802806
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
803807
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
804808
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
@@ -809,7 +813,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
809813
; GFX90A-NEXT: s_xor_b64 s[16:17], s[20:21], -1
810814
; GFX90A-NEXT: .LBB3_10: ; %Flow19
811815
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
812-
; GFX90A-NEXT: s_mov_b64 s[14:15], -1
816+
; GFX90A-NEXT: s_mov_b64 s[0:1], -1
813817
; GFX90A-NEXT: s_and_b64 vcc, exec, s[16:17]
814818
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
815819
; GFX90A-NEXT: ; %bb.11: ; %bb12
@@ -818,7 +822,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
818822
; GFX90A-NEXT: s_addc_u32 s5, s5, 0
819823
; GFX90A-NEXT: s_add_u32 s10, s10, s12
820824
; GFX90A-NEXT: s_addc_u32 s11, s11, s13
821-
; GFX90A-NEXT: s_mov_b64 s[14:15], 0
825+
; GFX90A-NEXT: s_mov_b64 s[0:1], 0
822826
; GFX90A-NEXT: s_branch .LBB3_1
823827
; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock
824828
; GFX90A-NEXT: s_endpgm

0 commit comments

Comments
 (0)