Skip to content

Commit 94a88a6

Browse files
committed
[NVPTX] in combiner rule, fix propagation of offset into load results
1 parent fd94002 commit 94a88a6

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5341,7 +5341,8 @@ static SDValue PerformLoadCombine(SDNode *N,
53415341

53425342
SmallDenseMap<SDNode *, unsigned> ExtractElts;
53435343
SmallVector<SDNode *> ProxyRegs(OrigNumResults, nullptr);
5344-
SmallVector<std::pair<SDNode *, unsigned /*offset*/>> WorkList{{N, 0}};
5344+
SmallVector<std::pair<SDNode *, unsigned>> WorkList{{N, {}}};
5345+
bool ProcessingInitialLoad = true;
53455346
while (!WorkList.empty()) {
53465347
auto [V, Offset] = WorkList.pop_back_val();
53475348

@@ -5351,10 +5352,12 @@ static SDValue PerformLoadCombine(SDNode *N,
53515352
if (U.getValueType() == MVT::Other || U.getValueType() == MVT::Glue)
53525353
continue; // we'll process chain/glue later
53535354

5355+
if (ProcessingInitialLoad)
5356+
Offset = U.getResNo();
5357+
53545358
SDNode *User = U.getUser();
53555359
if (User->getOpcode() == NVPTXISD::ProxyReg) {
5356-
Offset = U.getResNo() * 2;
5357-
SDNode *&ProxyReg = ProxyRegs[Offset / 2];
5360+
SDNode *&ProxyReg = ProxyRegs[Offset];
53585361

53595362
// We shouldn't have multiple proxy regs for the same value from the
53605363
// load, but bail out anyway since we don't handle this.
@@ -5366,13 +5369,13 @@ static SDValue PerformLoadCombine(SDNode *N,
53665369
User->getValueType(0) == MVT::v2f32 &&
53675370
U.getValueType() == MVT::i64) {
53685371
// match v2f32 = bitcast i64
5369-
Offset = U.getResNo() * 2;
5372+
// continue and push the instruction
53705373
} else if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
53715374
User->getValueType(0) == MVT::f32) {
53725375
// match f32 = extractelt v2f32
53735376
if (auto *CI = dyn_cast<ConstantSDNode>(User->getOperand(1))) {
53745377
unsigned Index = CI->getZExtValue();
5375-
ExtractElts[User] = Offset + Index;
5378+
ExtractElts[User] = 2 * Offset + Index;
53765379
continue; // don't search
53775380
}
53785381
return SDValue(); // could not match
@@ -5382,6 +5385,9 @@ static SDValue PerformLoadCombine(SDNode *N,
53825385
// enqueue this to visit its uses
53835386
WorkList.push_back({User, Offset});
53845387
}
5388+
5389+
// After we're done with the load, propagate the result offsets.
5390+
ProcessingInitialLoad = false;
53855391
}
53865392

53875393
// (2) If the load's value is only used as f32 elements, replace all

0 commit comments

Comments
 (0)