Skip to content

Commit 5137c74

Browse files
committed
Also update priorities for VGPR32/VGPR16
Change-Id: I2c56c9148c68fe820895d6eabb0a9058d04d7b4d
1 parent bad32ca commit 5137c74

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3161
-3106
lines changed

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2290,6 +2290,7 @@ MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
22902290
LLVMContext &Ctx = MF->getFunction().getContext();
22912291
SmallVirtRegSet FixedRegisters;
22922292
RecoloringStack RecolorStack;
2293+
22932294
MCRegister Reg =
22942295
selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
22952296
if (Reg == ~0U && (CutOffInfo != CO_None)) {

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
112112

113113
// RegisterClass (e.g. AGPR / VGPR) priority for allocation
114114
field int RegClassPriority = 1;
115+
field int RegClassBit = 5;
115116

116117
}
117118

@@ -579,7 +580,7 @@ let HasVGPR = 1 in {
579580
def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
580581
(add (interleave (sequence "VGPR%u_LO16", 0, 255),
581582
(sequence "VGPR%u_HI16", 0, 255)))> {
582-
let AllocationPriority = 2;
583+
let AllocationPriority = !add(2, !mul(RegClassPriority, !shl(1, RegClassBit)));
583584
let Size = 16;
584585
let GeneratePressureSet = 0;
585586

@@ -605,7 +606,7 @@ def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
605606
// i16/f16 only on VI+
606607
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
607608
(add (sequence "VGPR%u", 0, 255))> {
608-
let AllocationPriority = 0;
609+
let AllocationPriority = !add(0, !mul(RegClassPriority, !shl(1, RegClassBit)));
609610
let Size = 32;
610611
let Weight = 1;
611612
let BaseClassOrder = 32;
@@ -614,7 +615,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
614615
// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
615616
def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
616617
(add (sequence "VGPR%u", 0, 127))> {
617-
let AllocationPriority = 0;
618+
let AllocationPriority = !add(0, !mul(RegClassPriority, !shl(1, RegClassBit)));
618619
let GeneratePressureSet = 0;
619620
let Size = 32;
620621
let Weight = 1;
@@ -945,7 +946,7 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
945946
// Requires n v_mov_b32 to copy
946947
let CopyCost = numRegs;
947948
defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15));
948-
let AllocationPriority = !add(SizePrioriity, !mul(RegClassPriority, 16));
949+
let AllocationPriority = !add(SizePrioriity, !mul(RegClassPriority, !shl(1, RegClassBit)));
949950
let Weight = numRegs;
950951
}
951952

@@ -1071,6 +1072,7 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6
10711072
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
10721073
let HasVGPR = 1;
10731074
let HasAGPR = 1;
1075+
let RegClassPriority = 0;
10741076
let Size = 32;
10751077
}
10761078
} // End GeneratePressureSet = 0

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 120 additions & 120 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4299,15 +4299,15 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
42994299
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
43004300
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
43014301
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
4302-
; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4
4302+
; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4
43034303
; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo
43044304
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4305-
; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4
4306-
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, -4, v4
4307-
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 3, v6
4305+
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 3, v6
4306+
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, -4, v6
4307+
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 3, v4
43084308
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4309-
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v8, 0xffff
4310-
; GFX12-TRUE16-NEXT: v_not_b32_e32 v10, v6
4309+
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff
4310+
; GFX12-TRUE16-NEXT: v_not_b32_e32 v10, v7
43114311
; GFX12-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1
43124312
; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
43134313
; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1
@@ -4326,24 +4326,24 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
43264326
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_1
43274327
; GFX12-TRUE16-NEXT: ; %bb.2:
43284328
; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1
4329-
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
43304329
; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0
43314330
; GFX12-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start
43324331
; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1
43334332
; GFX12-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2
43344333
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
4335-
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v8, v6
4334+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v8, v6
43364335
; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo
43374336
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
43384337
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4339-
; GFX12-TRUE16-NEXT: v_add_f16_e32 v4.l, v4.l, v7.l
4340-
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v4
4338+
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v6, v4, v8
4339+
; GFX12-TRUE16-NEXT: v_add_f16_e32 v6.l, v6.l, v5.l
43414340
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4342-
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, v8, v4
4343-
; GFX12-TRUE16-NEXT: v_and_or_b32 v5, v6, v10, v4
4344-
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4345-
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, v5
4346-
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v5, v6
4341+
; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff, v6
4342+
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6
4343+
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4344+
; GFX12-TRUE16-NEXT: v_and_or_b32 v7, v8, v10, v6
4345+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v7
4346+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v7, v8
43474347
; GFX12-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1
43484348
; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2
43494349
; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
@@ -4358,22 +4358,21 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
43584358
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffe
43594359
; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0
43604360
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
4361-
; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v9, s[4:7], null offen th:TH_ATOMIC_RETURN
4361+
; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[6:7], v9, s[4:7], null offen th:TH_ATOMIC_RETURN
43624362
; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0
43634363
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_4
43644364
; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
43654365
; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2
43664366
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
4367-
; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
4368-
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v4
4367+
; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v6, v8
43694368
; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV
43704369
; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1
43714370
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffe
43724371
; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1
43734372
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_3
43744373
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
43754374
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
4376-
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v8, v4
4375+
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v6
43774376
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
43784377
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
43794378
;
@@ -4530,16 +4529,16 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
45304529
; GFX11-TRUE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
45314530
; GFX11-TRUE16: ; %bb.0:
45324531
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4533-
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4
4532+
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4
45344533
; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
45354534
; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo
45364535
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4537-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4
4538-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, -4, v4
4539-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 3, v6
4536+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 3, v6
4537+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, -4, v6
4538+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 3, v4
45404539
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4541-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v8, 0xffff
4542-
; GFX11-TRUE16-NEXT: v_not_b32_e32 v10, v6
4540+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff
4541+
; GFX11-TRUE16-NEXT: v_not_b32_e32 v10, v7
45434542
; GFX11-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1
45444543
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
45454544
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1
@@ -4556,24 +4555,24 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
45564555
; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_1
45574556
; GFX11-TRUE16-NEXT: ; %bb.2:
45584557
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2
4559-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
45604558
; GFX11-TRUE16-NEXT: .p2align 6
45614559
; GFX11-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start
45624560
; GFX11-TRUE16-NEXT: ; =>This Loop Header: Depth=1
45634561
; GFX11-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2
45644562
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
4565-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v8, v6
4563+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v6
45664564
; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo
45674565
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
45684566
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4569-
; GFX11-TRUE16-NEXT: v_add_f16_e32 v4.l, v4.l, v7.l
4570-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v4
4567+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, v4, v8
4568+
; GFX11-TRUE16-NEXT: v_add_f16_e32 v6.l, v6.l, v5.l
45714569
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4572-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, v8, v4
4573-
; GFX11-TRUE16-NEXT: v_and_or_b32 v5, v6, v10, v4
4574-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4575-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, v5
4576-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, v6
4570+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff, v6
4571+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6
4572+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4573+
; GFX11-TRUE16-NEXT: v_and_or_b32 v7, v8, v10, v6
4574+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v7
4575+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v8
45774576
; GFX11-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1
45784577
; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2
45794578
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
@@ -4587,14 +4586,13 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
45874586
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
45884587
; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0
45894588
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
4590-
; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v9, s[4:7], 0 offen glc
4589+
; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[6:7], v9, s[4:7], 0 offen glc
45914590
; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0
45924591
; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_4
45934592
; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
45944593
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2
45954594
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
4596-
; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
4597-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v4
4595+
; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v6, v8
45984596
; GFX11-TRUE16-NEXT: buffer_gl1_inv
45994597
; GFX11-TRUE16-NEXT: buffer_gl0_inv
46004598
; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1
@@ -4603,7 +4601,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
46034601
; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_3
46044602
; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
46054603
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
4606-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v8, v4
4604+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v6
46074605
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
46084606
;
46094607
; GFX11-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:

0 commit comments

Comments
 (0)