@@ -110,9 +110,21 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
110
110
let TSFlags{3} = HasAGPR;
111
111
let TSFlags{4} = HasSGPR;
112
112
113
- // RegisterClass (e.g. AGPR / VGPR) priority for allocation
114
- field int RegClassPriority = 1;
115
- field int RegClassBit = 5;
113
+ // RA will use RegisterClass AllocationPriority amongst other info (e.g. ordering in the basic block)
114
+ // to decide which registers to try to assign first. Usually, this RegisterClass priority is given
115
+ // very high priority, if not the highest priority, when considering which VirtReg to allocate next.
116
+ //
117
+ // We have 5 bits to assign AllocationPriorities to RegisterClasses. Generally, it is beneficial to
118
+ // assign more constrained RegisterClasses first. As a result, we prioritize larger register classes
119
+ // over smaller register classes.
120
+ //
121
+ // The interesting case is the vector register case on architectures which have ARegs, VRegs, AVRegs.
122
+ // In this case, we would like to assign ARegs and VRegs before AVRegs, as AVRegs are less constrained
123
+ // and can be assigned to both AGPRs and VGPRs. We use the 5th bit to encode this into the
124
+ // RegisterClass AllocationPriority. BaseClassPriority is used to turn the bit on, and BaseClassScaleFactor
125
+ // is used for scaling of the bit (i.e. 1 << 4).
126
+ field int BaseClassPriority = 1;
127
+ field int BaseClassScaleFactor = 16;
116
128
117
129
}
118
130
@@ -576,7 +588,7 @@ let HasVGPR = 1 in {
576
588
def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
577
589
(add (interleave (sequence "VGPR%u_LO16", 0, 255),
578
590
(sequence "VGPR%u_HI16", 0, 255)))> {
579
- let AllocationPriority = !add(2, !mul(RegClassPriority, !shl(1, RegClassBit) ));
591
+ let AllocationPriority = !add(2, !mul(BaseClassPriority, BaseClassScaleFactor ));
580
592
let Size = 16;
581
593
let GeneratePressureSet = 0;
582
594
@@ -602,7 +614,7 @@ def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
602
614
// i16/f16 only on VI+
603
615
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
604
616
(add (sequence "VGPR%u", 0, 255))> {
605
- let AllocationPriority = !add(0, !mul(RegClassPriority, !shl(1, RegClassBit) ));
617
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor ));
606
618
let Size = 32;
607
619
let Weight = 1;
608
620
let BaseClassOrder = 32;
@@ -611,7 +623,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
611
623
// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
612
624
def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
613
625
(add (sequence "VGPR%u", 0, 127))> {
614
- let AllocationPriority = !add(0, !mul(RegClassPriority, !shl(1, RegClassBit) ));
626
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor ));
615
627
let GeneratePressureSet = 0;
616
628
let Size = 32;
617
629
let Weight = 1;
@@ -941,15 +953,23 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
941
953
942
954
// Requires n v_mov_b32 to copy
943
955
let CopyCost = numRegs;
956
+
957
+ // Since we only have 5 bits for the RegisterClass Allocation Priorty, and since we use the
958
+ // 5th bit for BaseClassPriority, we need to encode the SizePriority into 4 bits. As a result
959
+ // of this encoding, for registers with numRegs 15 or 16, we give SizePriority of 14, and for
960
+ // regsters with numRegs 17+ we give SizePriority of 15. In practice, there is only one
961
+ // RegClass per Vector Register type in each of these groups (i.e. numRegs = 15,16 : {VReg_512},
962
+ // and numRegs = 17+ : {VReg_1024}). Therefore, we have not lost any info by compressing.
944
963
defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15));
945
- let AllocationPriority = !add(SizePrioriity, !mul(RegClassPriority, !shl(1, RegClassBit)));
964
+
965
+ let AllocationPriority = !add(SizePrioriity, !mul(BaseClassPriority, BaseClassScaleFactor));
946
966
let Weight = numRegs;
947
967
}
948
968
949
969
// Define a register tuple class, along with one requiring an even
950
970
// aligned base register.
951
971
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
952
- let HasVGPR = 1, RegClassPriority = 1 in {
972
+ let HasVGPR = 1, BaseClassPriority = 1 in {
953
973
// Define the regular class.
954
974
def "" : VRegClassBase<numRegs, regTypes, regList> {
955
975
let BaseClassOrder = !mul(numRegs, 32);
@@ -983,7 +1003,7 @@ defm VReg_1024 : VRegClass<32, Reg1024Types.types, (add VGPR_1024)>;
983
1003
}
984
1004
985
1005
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
986
- let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1, RegClassPriority = 1 in {
1006
+ let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1, BaseClassPriority = 1 in {
987
1007
// Define the regular class.
988
1008
def "" : VRegClassBase<numRegs, regTypes, regList> {
989
1009
let BaseClassOrder = !mul(numRegs, 32);
@@ -1068,7 +1088,7 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6
1068
1088
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
1069
1089
let HasVGPR = 1;
1070
1090
let HasAGPR = 1;
1071
- let RegClassPriority = 0;
1091
+ let BaseClassPriority = 0;
1072
1092
let Size = 32;
1073
1093
}
1074
1094
} // End GeneratePressureSet = 0
@@ -1077,7 +1097,7 @@ def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_3
1077
1097
// aligned base register.
1078
1098
multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
1079
1099
dag vregList, dag aregList> {
1080
- let HasVGPR = 1, HasAGPR = 1, RegClassPriority = 0 in {
1100
+ let HasVGPR = 1, HasAGPR = 1, BaseClassPriority = 0 in {
1081
1101
// Define the regular class.
1082
1102
def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
1083
1103
0 commit comments