@@ -5341,7 +5341,8 @@ static SDValue PerformLoadCombine(SDNode *N,
5341
5341
5342
5342
SmallDenseMap<SDNode *, unsigned > ExtractElts;
5343
5343
SmallVector<SDNode *> ProxyRegs (OrigNumResults, nullptr );
5344
- SmallVector<std::pair<SDNode *, unsigned /* offset*/ >> WorkList{{N, 0 }};
5344
+ SmallVector<std::pair<SDNode *, unsigned >> WorkList{{N, {}}};
5345
+ bool ProcessingInitialLoad = true ;
5345
5346
while (!WorkList.empty ()) {
5346
5347
auto [V, Offset] = WorkList.pop_back_val ();
5347
5348
@@ -5351,10 +5352,12 @@ static SDValue PerformLoadCombine(SDNode *N,
5351
5352
if (U.getValueType () == MVT::Other || U.getValueType () == MVT::Glue)
5352
5353
continue ; // we'll process chain/glue later
5353
5354
5355
+ if (ProcessingInitialLoad)
5356
+ Offset = U.getResNo ();
5357
+
5354
5358
SDNode *User = U.getUser ();
5355
5359
if (User->getOpcode () == NVPTXISD::ProxyReg) {
5356
- Offset = U.getResNo () * 2 ;
5357
- SDNode *&ProxyReg = ProxyRegs[Offset / 2 ];
5360
+ SDNode *&ProxyReg = ProxyRegs[Offset];
5358
5361
5359
5362
// We shouldn't have multiple proxy regs for the same value from the
5360
5363
// load, but bail out anyway since we don't handle this.
@@ -5366,13 +5369,13 @@ static SDValue PerformLoadCombine(SDNode *N,
5366
5369
User->getValueType (0 ) == MVT::v2f32 &&
5367
5370
U.getValueType () == MVT::i64 ) {
5368
5371
// match v2f32 = bitcast i64
5369
- Offset = U. getResNo () * 2 ;
5372
+ // continue and push the instruction
5370
5373
} else if (User->getOpcode () == ISD::EXTRACT_VECTOR_ELT &&
5371
5374
User->getValueType (0 ) == MVT::f32 ) {
5372
5375
// match f32 = extractelt v2f32
5373
5376
if (auto *CI = dyn_cast<ConstantSDNode>(User->getOperand (1 ))) {
5374
5377
unsigned Index = CI->getZExtValue ();
5375
- ExtractElts[User] = Offset + Index;
5378
+ ExtractElts[User] = 2 * Offset + Index;
5376
5379
continue ; // don't search
5377
5380
}
5378
5381
return SDValue (); // could not match
@@ -5382,6 +5385,9 @@ static SDValue PerformLoadCombine(SDNode *N,
5382
5385
// enqueue this to visit its uses
5383
5386
WorkList.push_back ({User, Offset});
5384
5387
}
5388
+
5389
+ // After we're done with the load, propagate the result offsets.
5390
+ ProcessingInitialLoad = false ;
5385
5391
}
5386
5392
5387
5393
// (2) If the load's value is only used as f32 elements, replace all
0 commit comments