@@ -5213,7 +5213,8 @@ static SDValue PerformLoadCombine(SDNode *N,
5213
5213
5214
5214
SmallDenseMap<SDNode *, unsigned > ExtractElts;
5215
5215
SmallVector<SDNode *> ProxyRegs (OrigNumResults, nullptr );
5216
- SmallVector<std::pair<SDNode *, unsigned /* offset*/ >> WorkList{{N, 0 }};
5216
+ SmallVector<std::pair<SDNode *, unsigned >> WorkList{{N, {}}};
5217
+ bool ProcessingInitialLoad = true ;
5217
5218
while (!WorkList.empty ()) {
5218
5219
auto [V, Offset] = WorkList.pop_back_val ();
5219
5220
@@ -5223,10 +5224,12 @@ static SDValue PerformLoadCombine(SDNode *N,
5223
5224
if (U.getValueType () == MVT::Other || U.getValueType () == MVT::Glue)
5224
5225
continue ; // we'll process chain/glue later
5225
5226
5227
+ if (ProcessingInitialLoad)
5228
+ Offset = U.getResNo ();
5229
+
5226
5230
SDNode *User = U.getUser ();
5227
5231
if (User->getOpcode () == NVPTXISD::ProxyReg) {
5228
- Offset = U.getResNo () * 2 ;
5229
- SDNode *&ProxyReg = ProxyRegs[Offset / 2 ];
5232
+ SDNode *&ProxyReg = ProxyRegs[Offset];
5230
5233
5231
5234
// We shouldn't have multiple proxy regs for the same value from the
5232
5235
// load, but bail out anyway since we don't handle this.
@@ -5238,13 +5241,13 @@ static SDValue PerformLoadCombine(SDNode *N,
5238
5241
User->getValueType (0 ) == MVT::v2f32 &&
5239
5242
U.getValueType () == MVT::i64 ) {
5240
5243
// match v2f32 = bitcast i64
5241
- Offset = U. getResNo () * 2 ;
5244
+ // continue and push the instruction
5242
5245
} else if (User->getOpcode () == ISD::EXTRACT_VECTOR_ELT &&
5243
5246
User->getValueType (0 ) == MVT::f32 ) {
5244
5247
// match f32 = extractelt v2f32
5245
5248
if (auto *CI = dyn_cast<ConstantSDNode>(User->getOperand (1 ))) {
5246
5249
unsigned Index = CI->getZExtValue ();
5247
- ExtractElts[User] = Offset + Index;
5250
+ ExtractElts[User] = 2 * Offset + Index;
5248
5251
continue ; // don't search
5249
5252
}
5250
5253
return SDValue (); // could not match
@@ -5254,6 +5257,9 @@ static SDValue PerformLoadCombine(SDNode *N,
5254
5257
// enqueue this to visit its uses
5255
5258
WorkList.push_back ({User, Offset});
5256
5259
}
5260
+
5261
+ // After we're done with the load, propagate the result offsets.
5262
+ ProcessingInitialLoad = false ;
5257
5263
}
5258
5264
5259
5265
// (2) If the load's value is only used as f32 elements, replace all
0 commit comments