Skip to content

Commit 1f0d0c6

Browse files
committed
Also update priorities for VGPR32/VGPR16
Change-Id: I2c56c9148c68fe820895d6eabb0a9058d04d7b4d
1 parent b8d576c commit 1f0d0c6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3155
-3107
lines changed

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2290,6 +2290,7 @@ MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
22902290
LLVMContext &Ctx = MF->getFunction().getContext();
22912291
SmallVirtRegSet FixedRegisters;
22922292
RecoloringStack RecolorStack;
2293+
22932294
MCRegister Reg =
22942295
selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
22952296
if (Reg == ~0U && (CutOffInfo != CO_None)) {

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
112112

113113
// RegisterClass (e.g. AGPR / VGPR) priority for allocation
114114
field int RegClassPriority = 1;
115+
field int RegClassBit = 5;
115116

116117
}
117118

@@ -578,7 +579,7 @@ let HasVGPR = 1 in {
578579
def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
579580
(add (interleave (sequence "VGPR%u_LO16", 0, 255),
580581
(sequence "VGPR%u_HI16", 0, 255)))> {
581-
let AllocationPriority = 2;
582+
let AllocationPriority = !add(2, !mul(RegClassPriority, !shl(1, RegClassBit)));
582583
let Size = 16;
583584
let GeneratePressureSet = 0;
584585

@@ -604,7 +605,7 @@ def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
604605
// i16/f16 only on VI+
605606
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
606607
(add (sequence "VGPR%u", 0, 255))> {
607-
let AllocationPriority = 0;
608+
let AllocationPriority = !add(0, !mul(RegClassPriority, !shl(1, RegClassBit)));
608609
let Size = 32;
609610
let Weight = 1;
610611
let BaseClassOrder = 32;
@@ -613,7 +614,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
613614
// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
614615
def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
615616
(add (sequence "VGPR%u", 0, 127))> {
616-
let AllocationPriority = 0;
617+
let AllocationPriority = !add(0, !mul(RegClassPriority, !shl(1, RegClassBit)));
617618
let GeneratePressureSet = 0;
618619
let Size = 32;
619620
let Weight = 1;
@@ -944,7 +945,7 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
944945
// Requires n v_mov_b32 to copy
945946
let CopyCost = numRegs;
946947
defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15));
947-
let AllocationPriority = !add(SizePrioriity, !mul(RegClassPriority, 16));
948+
let AllocationPriority = !add(SizePrioriity, !mul(RegClassPriority, !shl(1, RegClassBit)));
948949
let Weight = numRegs;
949950
}
950951

@@ -1070,6 +1071,7 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6
10701071
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
10711072
let HasVGPR = 1;
10721073
let HasAGPR = 1;
1074+
let RegClassPriority = 0;
10731075
let Size = 32;
10741076
}
10751077
} // End GeneratePressureSet = 0

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 120 additions & 120 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4295,15 +4295,15 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
42954295
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
42964296
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
42974297
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
4298-
; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4
4298+
; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4
42994299
; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo
43004300
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4301-
; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4
4302-
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, -4, v4
4303-
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 3, v6
4301+
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 3, v6
4302+
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, -4, v6
4303+
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 3, v4
43044304
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4305-
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v8, 0xffff
4306-
; GFX12-TRUE16-NEXT: v_not_b32_e32 v10, v6
4305+
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff
4306+
; GFX12-TRUE16-NEXT: v_not_b32_e32 v10, v7
43074307
; GFX12-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1
43084308
; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
43094309
; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1
@@ -4317,30 +4317,29 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
43174317
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffe
43184318
; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0
43194319
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
4320-
; GFX12-TRUE16-NEXT: buffer_load_b32 v4, v9, s[4:7], null offen
4320+
; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v9, s[4:7], null offen
43214321
; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0
43224322
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_1
43234323
; GFX12-TRUE16-NEXT: ; %bb.2:
43244324
; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1
4325-
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
43264325
; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0
43274326
; GFX12-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start
43284327
; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1
43294328
; GFX12-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2
43304329
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
4331-
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v4
4330+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v8, v6
43324331
; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo
43334332
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
43344333
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4335-
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v8, v6
4336-
; GFX12-TRUE16-NEXT: v_add_f16_e32 v4.l, v4.l, v7.l
4334+
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v6, v4, v8
4335+
; GFX12-TRUE16-NEXT: v_add_f16_e32 v6.l, v6.l, v5.l
43374336
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4338-
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v4
4339-
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, v8, v4
4337+
; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff, v6
4338+
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6
43404339
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4341-
; GFX12-TRUE16-NEXT: v_and_or_b32 v5, v6, v10, v4
4342-
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, v5
4343-
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v5, v6
4340+
; GFX12-TRUE16-NEXT: v_and_or_b32 v7, v8, v10, v6
4341+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v7
4342+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v7, v8
43444343
; GFX12-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1
43454344
; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2
43464345
; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
@@ -4355,21 +4354,21 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
43554354
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffe
43564355
; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0
43574356
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
4358-
; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v9, s[4:7], null offen th:TH_ATOMIC_RETURN
4357+
; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[6:7], v9, s[4:7], null offen th:TH_ATOMIC_RETURN
43594358
; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0
43604359
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_4
43614360
; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
43624361
; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2
43634362
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
4364-
; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
4363+
; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v6, v8
43654364
; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV
43664365
; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1
43674366
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffe
43684367
; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1
43694368
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_3
43704369
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
43714370
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
4372-
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v8, v4
4371+
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v6
43734372
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
43744373
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
43754374
;
@@ -4527,16 +4526,16 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
45274526
; GFX11-TRUE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
45284527
; GFX11-TRUE16: ; %bb.0:
45294528
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4530-
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4
4529+
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4
45314530
; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
45324531
; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo
45334532
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4534-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4
4535-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, -4, v4
4536-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 3, v6
4533+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 3, v6
4534+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, -4, v6
4535+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 3, v4
45374536
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4538-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v8, 0xffff
4539-
; GFX11-TRUE16-NEXT: v_not_b32_e32 v10, v6
4537+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff
4538+
; GFX11-TRUE16-NEXT: v_not_b32_e32 v10, v7
45404539
; GFX11-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1
45414540
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
45424541
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1
@@ -4548,30 +4547,29 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
45484547
; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
45494548
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
45504549
; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0
4551-
; GFX11-TRUE16-NEXT: buffer_load_b32 v4, v9, s[4:7], 0 offen
4550+
; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v9, s[4:7], 0 offen
45524551
; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0
45534552
; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_1
45544553
; GFX11-TRUE16-NEXT: ; %bb.2:
45554554
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2
4556-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
45574555
; GFX11-TRUE16-NEXT: .p2align 6
45584556
; GFX11-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start
45594557
; GFX11-TRUE16-NEXT: ; =>This Loop Header: Depth=1
45604558
; GFX11-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2
45614559
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
4562-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v4
4560+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, v6
45634561
; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo
45644562
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
45654563
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4566-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v8, v6
4567-
; GFX11-TRUE16-NEXT: v_add_f16_e32 v4.l, v4.l, v7.l
4564+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, v4, v8
4565+
; GFX11-TRUE16-NEXT: v_add_f16_e32 v6.l, v6.l, v5.l
45684566
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4569-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v4
4570-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, v8, v4
4567+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff, v6
4568+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6
45714569
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4572-
; GFX11-TRUE16-NEXT: v_and_or_b32 v5, v6, v10, v4
4573-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, v5
4574-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, v6
4570+
; GFX11-TRUE16-NEXT: v_and_or_b32 v7, v8, v10, v6
4571+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v7
4572+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v8
45754573
; GFX11-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1
45764574
; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2
45774575
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
@@ -4585,13 +4583,13 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
45854583
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
45864584
; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0
45874585
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
4588-
; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v9, s[4:7], 0 offen glc
4586+
; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[6:7], v9, s[4:7], 0 offen glc
45894587
; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0
45904588
; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_4
45914589
; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
45924590
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2
45934591
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
4594-
; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
4592+
; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v6, v8
45954593
; GFX11-TRUE16-NEXT: buffer_gl1_inv
45964594
; GFX11-TRUE16-NEXT: buffer_gl0_inv
45974595
; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1
@@ -4600,7 +4598,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
46004598
; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_3
46014599
; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
46024600
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
4603-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v8, v4
4601+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v6
46044602
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
46054603
;
46064604
; GFX11-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:

0 commit comments

Comments
 (0)