Skip to content

[AArch64] Use mov imm pseudo instructions in madd combine. #147510

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 21 additions & 96 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7868,62 +7868,48 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case AArch64MachineCombinerPattern::MULADDWI_OP1:
case AArch64MachineCombinerPattern::MULADDXI_OP1: {
case AArch64MachineCombinerPattern::MULADDXI_OP1:
case AArch64MachineCombinerPattern::MULSUBWI_OP1:
case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// ADD R,I,Imm
// ==> MOV V, Imm
// ADD/SUB R,I,Imm
// ==> MOV V, Imm/-Imm
// ==> MADD R,A,B,V
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
const TargetRegisterClass *RC;
unsigned BitSize, MovImm;
if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1 ||
Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
MovImm = AArch64::MOVi32imm;
RC = &AArch64::GPR32spRegClass;
BitSize = 32;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
OrrOpc = AArch64::ORRXri;
OrrRC = &AArch64::GPR64spRegClass;
MovImm = AArch64::MOVi64imm;
RC = &AArch64::GPR64spRegClass;
BitSize = 64;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
Register NewVR = MRI.createVirtualRegister(OrrRC);
Register NewVR = MRI.createVirtualRegister(RC);
uint64_t Imm = Root.getOperand(2).getImm();

if (Root.getOperand(3).isImm()) {
unsigned Val = Root.getOperand(3).getImm();
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(Imm, BitSize);
// The immediate can be composed via a single instruction.
bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
Pattern == AArch64MachineCombinerPattern::MULSUBXI_OP1;
uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
// Check that the immediate can be composed via a single instruction.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
if (Insn.size() != 1)
return;
auto MovI = Insn.begin();
MachineInstrBuilder MIB1;
// MOV is an alias for one of three instructions: movz, movn, and orr.
if (MovI->Opcode == OrrOpc)
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
.addReg(ZeroReg)
.addImm(MovI->Op2);
else {
if (BitSize == 32)
assert((MovI->Opcode == AArch64::MOVNWi ||
MovI->Opcode == AArch64::MOVZWi) &&
"Expected opcode");
else
assert((MovI->Opcode == AArch64::MOVNXi ||
MovI->Opcode == AArch64::MOVZXi) &&
"Expected opcode");
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
.addImm(MovI->Op1)
.addImm(MovI->Op2);
}
MachineInstrBuilder MIB1 =
BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
.addImm(IsSub ? -Imm : Imm);
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Expand Down Expand Up @@ -7977,67 +7963,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case AArch64MachineCombinerPattern::MULSUBWI_OP1:
case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
// ==> MOV V, -Imm
// ==> MADD R,A,B,V // = -Imm + A*B
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
OrrOpc = AArch64::ORRXri;
OrrRC = &AArch64::GPR64spRegClass;
BitSize = 64;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
Register NewVR = MRI.createVirtualRegister(OrrRC);
uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned Val = Root.getOperand(3).getImm();
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(-Imm, BitSize);
// The immediate can be composed via a single instruction.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
if (Insn.size() != 1)
return;
auto MovI = Insn.begin();
MachineInstrBuilder MIB1;
// MOV is an alias for one of three instructions: movz, movn, and orr.
if (MovI->Opcode == OrrOpc)
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
.addReg(ZeroReg)
.addImm(MovI->Op2);
else {
if (BitSize == 32)
assert((MovI->Opcode == AArch64::MOVNWi ||
MovI->Opcode == AArch64::MOVZWi) &&
"Expected opcode");
else
assert((MovI->Opcode == AArch64::MOVNXi ||
MovI->Opcode == AArch64::MOVZXi) &&
"Expected opcode");
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
.addImm(MovI->Op1)
.addImm(MovI->Op2);
}
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[MOVZWi:%[0-9]+]]:gpr32common = nsw MOVZWi 79, 0
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVZWi]]
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 79
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr32 = COPY $w0
Expand All @@ -38,8 +38,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[MOVZXi:%[0-9]+]]:gpr64common = nsw MOVZXi 79, 0
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVZXi]]
; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 79
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
Expand All @@ -62,8 +62,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[MOVNWi:%[0-9]+]]:gpr32common = nsw MOVNWi 0, 0
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVNWi]]
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm -1
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr32 = COPY $w0
Expand All @@ -86,8 +86,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[MOVNXi:%[0-9]+]]:gpr64common = nsw MOVNXi 0, 0
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVNXi]]
; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm -1
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
Expand All @@ -110,8 +110,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[ORRWri:%[0-9]+]]:gpr32common = nsw ORRWri $wzr, 1291
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[ORRWri]]
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 16773120
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr32 = COPY $w0
Expand All @@ -134,8 +134,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[ORRXri:%[0-9]+]]:gpr64common = nsw ORRXri $xzr, 7435
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[ORRXri]]
; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 16773120
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/madd-combiner.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,8 @@ define void @mul_add_imm2() {
; CHECK-FAST-LABEL: mul_add_imm2:
; CHECK-FAST: ; %bb.0: ; %entry
; CHECK-FAST-NEXT: mov x8, #-3 ; =0xfffffffffffffffd
; CHECK-FAST-NEXT: mov x9, #-3 ; =0xfffffffffffffffd
; CHECK-FAST-NEXT: madd x8, x8, x8, x9
; CHECK-FAST-NEXT: mov x9, #45968 ; =0xb390
; CHECK-FAST-NEXT: madd x8, x8, x8, x8
; CHECK-FAST-NEXT: movk x9, #48484, lsl #16
; CHECK-FAST-NEXT: movk x9, #323, lsl #32
; CHECK-FAST-NEXT: LBB2_1: ; %for.body8
Expand Down
Loading