Skip to content

Commit 4b73838

Browse files
authored
[CodeGen] Do not use subsituteRegister to update implicit def (#148068)
It seems `subsituteRegister` checks `FromReg == ToReg` instead of `TRI->isSubRegisterEq`. This PR simply reverts the original PR (#131361) to its initial implementation (without using `subsituteRegister`). Not sure whether it is a desired fix (and by no means that I am an expert on LLVM backend), but it does fix a numeric error on our internal workload. Original author: @sdesmalen-arm
1 parent 67588d3 commit 4b73838

File tree

2 files changed

+42
-8
lines changed

2 files changed

+42
-8
lines changed

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,24 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
214214
Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false;
215215
bool Reg2IsRenamable =
216216
Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false;
217+
218+
// For a case like this:
219+
// %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
220+
// we need to update the implicit-def after commuting to result in:
221+
// %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
222+
SmallVector<unsigned> UpdateImplicitDefIdx;
223+
if (HasDef && MI.hasImplicitDef()) {
224+
const TargetRegisterInfo *TRI =
225+
MI.getMF()->getSubtarget().getRegisterInfo();
226+
for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) {
227+
Register ImplReg = MO.getReg();
228+
if ((ImplReg.isVirtual() && ImplReg == Reg0) ||
229+
(ImplReg.isPhysical() && Reg0.isPhysical() &&
230+
TRI->isSubRegisterEq(ImplReg, Reg0)))
231+
UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands());
232+
}
233+
}
234+
217235
// If destination is tied to either of the commuted source register, then
218236
// it must be updated.
219237
if (HasDef && Reg0 == Reg1 &&
@@ -238,15 +256,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
238256
}
239257

240258
if (HasDef) {
241-
// Use `substituteRegister` so that for a case like this:
242-
// %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
243-
// the implicit-def is also updated, to result in:
244-
// %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
245-
const TargetRegisterInfo &TRI =
246-
*MI.getMF()->getSubtarget().getRegisterInfo();
247-
Register FromReg = CommutedMI->getOperand(0).getReg();
248-
CommutedMI->substituteRegister(FromReg, Reg0, /*SubRegIdx=*/0, TRI);
259+
CommutedMI->getOperand(0).setReg(Reg0);
249260
CommutedMI->getOperand(0).setSubReg(SubReg0);
261+
for (unsigned Idx : UpdateImplicitDefIdx)
262+
CommutedMI->getOperand(Idx).setReg(Reg0);
250263
}
251264
CommutedMI->getOperand(Idx2).setReg(Reg1);
252265
CommutedMI->getOperand(Idx1).setReg(Reg2);

llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,24 @@ body: |
3535
%0:gr64_with_sub_8bit = COPY %1:gr64_with_sub_8bit
3636
RET 0, implicit %0
3737
...
38+
# Commuting instruction with 3 ops is handled correctly.
39+
---
40+
name: commuting_3_ops
41+
tracksRegLiveness: true
42+
body: |
43+
bb.0:
44+
liveins: $ymm0, $ymm1
45+
46+
; CHECK-LABEL: name: commuting_3_ops
47+
; CHECK: liveins: $ymm0, $ymm1
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr256 = COPY $ymm1
50+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0
51+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = contract nofpexcept VFMADD213PSYr [[COPY1]], [[COPY]], [[COPY]], implicit $mxcsr
52+
; CHECK-NEXT: RET 0, implicit [[COPY1]]
53+
%0:vr256 = COPY $ymm1
54+
%1:vr256 = COPY $ymm0
55+
%0:vr256 = contract nofpexcept VFMADD231PSYr %0:vr256, %0:vr256, %1:vr256, implicit $mxcsr
56+
%1:vr256 = COPY %0:vr256
57+
RET 0, implicit %1
58+
...

0 commit comments

Comments
 (0)