@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
117
117
return LLT::scalar (32 );
118
118
}
119
119
120
- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
121
- const RegisterBankInfo &RBI);
120
+ using ReadLaneFnTy =
121
+ function_ref<MachineInstrBuilder(MachineIRBuilder &, Register, Register)>;
122
+
123
+ static Register buildReadLane (MachineIRBuilder &, Register,
124
+ const RegisterBankInfo &, ReadLaneFnTy);
122
125
123
126
static void unmergeReadAnyLane (MachineIRBuilder &B,
124
127
SmallVectorImpl<Register> &SgprDstParts,
125
128
LLT UnmergeTy, Register VgprSrc,
126
- const RegisterBankInfo &RBI) {
129
+ const RegisterBankInfo &RBI,
130
+ ReadLaneFnTy BuildRL) {
127
131
const RegisterBank *VgprRB = &RBI.getRegBank (AMDGPU::VGPRRegBankID);
128
132
auto Unmerge = B.buildUnmerge ({VgprRB, UnmergeTy}, VgprSrc);
129
133
for (unsigned i = 0 ; i < Unmerge->getNumOperands () - 1 ; ++i) {
130
- SgprDstParts.push_back (buildReadAnyLane (B, Unmerge.getReg (i), RBI));
134
+ SgprDstParts.push_back (buildReadLane (B, Unmerge.getReg (i), RBI, BuildRL ));
131
135
}
132
136
}
133
137
134
- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
135
- const RegisterBankInfo &RBI) {
138
+ static Register buildReadLane (MachineIRBuilder &B, Register VgprSrc,
139
+ const RegisterBankInfo &RBI,
140
+ ReadLaneFnTy BuildRL) {
136
141
LLT Ty = B.getMRI ()->getType (VgprSrc);
137
142
const RegisterBank *SgprRB = &RBI.getRegBank (AMDGPU::SGPRRegBankID);
138
143
if (Ty.getSizeInBits () == 32 ) {
139
- return B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {{ SgprRB, Ty}}, {VgprSrc})
140
- .getReg (0 );
144
+ Register SgprDst = B. getMRI ()-> createVirtualRegister ({ SgprRB, Ty});
145
+ return BuildRL (B, SgprDst, VgprSrc) .getReg (0 );
141
146
}
142
147
143
148
SmallVector<Register, 8 > SgprDstParts;
144
- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
149
+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
150
+ BuildRL);
145
151
146
152
return B.buildMergeLikeInstr ({SgprRB, Ty}, SgprDstParts).getReg (0 );
147
153
}
148
154
149
- void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
150
- Register VgprSrc, const RegisterBankInfo &RBI) {
155
+ static void buildReadLane (MachineIRBuilder &B, Register SgprDst,
156
+ Register VgprSrc, const RegisterBankInfo &RBI,
157
+ ReadLaneFnTy BuildReadLane) {
151
158
LLT Ty = B.getMRI ()->getType (VgprSrc);
152
159
if (Ty.getSizeInBits () == 32 ) {
153
- B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, { SgprDst}, { VgprSrc} );
160
+ BuildReadLane (B, SgprDst, VgprSrc);
154
161
return ;
155
162
}
156
163
157
164
SmallVector<Register, 8 > SgprDstParts;
158
- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
165
+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
166
+ BuildReadLane);
159
167
160
168
B.buildMergeLikeInstr (SgprDst, SgprDstParts).getReg (0 );
161
169
}
170
+
171
+ void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
172
+ Register VgprSrc, const RegisterBankInfo &RBI) {
173
+ return buildReadLane (
174
+ B, SgprDst, VgprSrc, RBI,
175
+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
176
+ return B.buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
177
+ });
178
+ }
179
+
180
+ void AMDGPU::buildReadFirstLane (MachineIRBuilder &B, Register SgprDst,
181
+ Register VgprSrc, const RegisterBankInfo &RBI) {
182
+ return buildReadLane (
183
+ B, SgprDst, VgprSrc, RBI,
184
+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
185
+ return B.buildIntrinsic (Intrinsic::amdgcn_readfirstlane, SgprDst)
186
+ .addReg (VgprSrc);
187
+ });
188
+ }
0 commit comments