Skip to content

Commit 96acb3e

Browse files
committed
[NVPTX] in combiner rule, fix propagation of offset into load results
1 parent e946c24 commit 96acb3e

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5213,7 +5213,8 @@ static SDValue PerformLoadCombine(SDNode *N,
52135213

52145214
SmallDenseMap<SDNode *, unsigned> ExtractElts;
52155215
SmallVector<SDNode *> ProxyRegs(OrigNumResults, nullptr);
5216-
SmallVector<std::pair<SDNode *, unsigned /*offset*/>> WorkList{{N, 0}};
5216+
SmallVector<std::pair<SDNode *, unsigned>> WorkList{{N, {}}};
5217+
bool ProcessingInitialLoad = true;
52175218
while (!WorkList.empty()) {
52185219
auto [V, Offset] = WorkList.pop_back_val();
52195220

@@ -5223,10 +5224,12 @@ static SDValue PerformLoadCombine(SDNode *N,
52235224
if (U.getValueType() == MVT::Other || U.getValueType() == MVT::Glue)
52245225
continue; // we'll process chain/glue later
52255226

5227+
if (ProcessingInitialLoad)
5228+
Offset = U.getResNo();
5229+
52265230
SDNode *User = U.getUser();
52275231
if (User->getOpcode() == NVPTXISD::ProxyReg) {
5228-
Offset = U.getResNo() * 2;
5229-
SDNode *&ProxyReg = ProxyRegs[Offset / 2];
5232+
SDNode *&ProxyReg = ProxyRegs[Offset];
52305233

52315234
// We shouldn't have multiple proxy regs for the same value from the
52325235
// load, but bail out anyway since we don't handle this.
@@ -5238,13 +5241,13 @@ static SDValue PerformLoadCombine(SDNode *N,
52385241
User->getValueType(0) == MVT::v2f32 &&
52395242
U.getValueType() == MVT::i64) {
52405243
// match v2f32 = bitcast i64
5241-
Offset = U.getResNo() * 2;
5244+
// continue and push the instruction
52425245
} else if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
52435246
User->getValueType(0) == MVT::f32) {
52445247
// match f32 = extractelt v2f32
52455248
if (auto *CI = dyn_cast<ConstantSDNode>(User->getOperand(1))) {
52465249
unsigned Index = CI->getZExtValue();
5247-
ExtractElts[User] = Offset + Index;
5250+
ExtractElts[User] = 2 * Offset + Index;
52485251
continue; // don't search
52495252
}
52505253
return SDValue(); // could not match
@@ -5254,6 +5257,9 @@ static SDValue PerformLoadCombine(SDNode *N,
52545257
// enqueue this to visit its uses
52555258
WorkList.push_back({User, Offset});
52565259
}
5260+
5261+
// After we're done with the load, propagate the result offsets.
5262+
ProcessingInitialLoad = false;
52575263
}
52585264

52595265
// (2) If the load's value is only used as f32 elements, replace all

0 commit comments

Comments
 (0)