Skip to content

Commit da8a439

Browse files
committed
[AMDGPU] Allocate AVRegClass last
Change-Id: Iace3462f27ea276b22716793ebfa13b5026b0e58
1 parent c9b7fa8 commit da8a439

36 files changed

+2423
-2450
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
109109
let TSFlags{2} = HasVGPR;
110110
let TSFlags{3} = HasAGPR;
111111
let TSFlags{4} = HasSGPR;
112+
113+
// RegisterClass (e.g. AGPR / VGPR) priority for allocation
114+
field int RegClassPriority = 1;
115+
112116
}
113117

114118
multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
@@ -940,14 +944,15 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
940944

941945
// Requires n v_mov_b32 to copy
942946
let CopyCost = numRegs;
943-
let AllocationPriority = !sub(numRegs, 1);
947+
defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15));
948+
let AllocationPriority = !add(SizePrioriity, !mul(RegClassPriority, 16));
944949
let Weight = numRegs;
945950
}
946951

947952
// Define a register tuple class, along with one requiring an even
948953
// aligned base register.
949954
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
950-
let HasVGPR = 1 in {
955+
let HasVGPR = 1, RegClassPriority = 1 in {
951956
// Define the regular class.
952957
def "" : VRegClassBase<numRegs, regTypes, regList> {
953958
let BaseClassOrder = !mul(numRegs, 32);
@@ -981,7 +986,7 @@ defm VReg_1024 : VRegClass<32, Reg1024Types.types, (add VGPR_1024)>;
981986
}
982987

983988
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
984-
let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
989+
let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1, RegClassPriority = 1 in {
985990
// Define the regular class.
986991
def "" : VRegClassBase<numRegs, regTypes, regList> {
987992
let BaseClassOrder = !mul(numRegs, 32);
@@ -1074,7 +1079,7 @@ def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_3
10741079
// aligned base register.
10751080
multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
10761081
dag vregList, dag aregList> {
1077-
let HasVGPR = 1, HasAGPR = 1 in {
1082+
let HasVGPR = 1, HasAGPR = 1, RegClassPriority = 0 in {
10781083
// Define the regular class.
10791084
def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
10801085

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll

Lines changed: 86 additions & 78 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll

Lines changed: 84 additions & 82 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll

Lines changed: 84 additions & 82 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -463,12 +463,13 @@ define <2 x half> @chain_hi_to_lo_flat(ptr inreg %ptr) {
463463
; GFX11-TRUE16: ; %bb.0: ; %bb
464464
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465465
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
466-
; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] offset:2
466+
; GFX11-TRUE16-NEXT: flat_load_d16_b16 v2, v[0:1] offset:2
467+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0
467468
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
468-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
469469
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
470-
; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2]
470+
; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v2, v[0:1]
471471
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
472+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2
472473
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
473474
;
474475
; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat:

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll

Lines changed: 114 additions & 104 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll

Lines changed: 240 additions & 261 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll

Lines changed: 240 additions & 261 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll

Lines changed: 114 additions & 104 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat_atomics.ll

Lines changed: 62 additions & 71 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)