Skip to content

Commit ec945d9

Browse files
committed
[AArch64] Use mov imm pseudo instructions in madd combine.
The usual path for lowering immediates in AArch64 is to generate a MOVi32imm or MOVi64imm pseudo instruction, that can be moved / rematerialized around as required, being expanded into one or multiple instructions after register allocation. The code for the MachineCombiner was generating MOVN/ORR/MOVZ directly. This converts them to use the pseudos, allowing the generated immediates to be materialized if required. The code is hopefully simpler as a result, and the Sub and Add patterns have been combined to reduce duplication.
1 parent 71f6bfe commit ec945d9

File tree

3 files changed

+33
-110
lines changed

3 files changed

+33
-110
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 20 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -7868,62 +7868,47 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
78687868
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
78697869
break;
78707870
case AArch64MachineCombinerPattern::MULADDWI_OP1:
7871-
case AArch64MachineCombinerPattern::MULADDXI_OP1: {
7871+
case AArch64MachineCombinerPattern::MULADDXI_OP1:
7872+
case AArch64MachineCombinerPattern::MULSUBWI_OP1:
7873+
case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
78727874
// MUL I=A,B,0
7873-
// ADD R,I,Imm
7874-
// ==> MOV V, Imm
7875+
// ADD/SUB R,I,Imm
7876+
// ==> MOV V, Imm/-Imm
78757877
// ==> MADD R,A,B,V
78767878
// --- Create(MADD);
7877-
const TargetRegisterClass *OrrRC;
7878-
unsigned BitSize, OrrOpc, ZeroReg;
7879-
if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
7880-
OrrOpc = AArch64::ORRWri;
7881-
OrrRC = &AArch64::GPR32spRegClass;
7879+
const TargetRegisterClass *RC;
7880+
unsigned BitSize, MovImm;
7881+
if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1 ||
7882+
Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
7883+
MovImm = AArch64::MOVi32imm;
7884+
RC = &AArch64::GPR32spRegClass;
78827885
BitSize = 32;
7883-
ZeroReg = AArch64::WZR;
78847886
Opc = AArch64::MADDWrrr;
78857887
RC = &AArch64::GPR32RegClass;
78867888
} else {
7887-
OrrOpc = AArch64::ORRXri;
7888-
OrrRC = &AArch64::GPR64spRegClass;
7889+
MovImm = AArch64::MOVi64imm;
7890+
RC = &AArch64::GPR64spRegClass;
78897891
BitSize = 64;
7890-
ZeroReg = AArch64::XZR;
78917892
Opc = AArch64::MADDXrrr;
78927893
RC = &AArch64::GPR64RegClass;
78937894
}
7894-
Register NewVR = MRI.createVirtualRegister(OrrRC);
7895+
Register NewVR = MRI.createVirtualRegister(RC);
78957896
uint64_t Imm = Root.getOperand(2).getImm();
78967897

78977898
if (Root.getOperand(3).isImm()) {
78987899
unsigned Val = Root.getOperand(3).getImm();
78997900
Imm = Imm << Val;
79007901
}
7901-
uint64_t UImm = SignExtend64(Imm, BitSize);
7902-
// The immediate can be composed via a single instruction.
7902+
bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
7903+
Pattern == AArch64MachineCombinerPattern::MULSUBXI_OP1;
7904+
uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
7905+
// Check that the immediate can be composed via a single instruction.
79037906
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
79047907
AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
79057908
if (Insn.size() != 1)
79067909
return;
7907-
auto MovI = Insn.begin();
7908-
MachineInstrBuilder MIB1;
7909-
// MOV is an alias for one of three instructions: movz, movn, and orr.
7910-
if (MovI->Opcode == OrrOpc)
7911-
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7912-
.addReg(ZeroReg)
7913-
.addImm(MovI->Op2);
7914-
else {
7915-
if (BitSize == 32)
7916-
assert((MovI->Opcode == AArch64::MOVNWi ||
7917-
MovI->Opcode == AArch64::MOVZWi) &&
7918-
"Expected opcode");
7919-
else
7920-
assert((MovI->Opcode == AArch64::MOVNXi ||
7921-
MovI->Opcode == AArch64::MOVZXi) &&
7922-
"Expected opcode");
7923-
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7924-
.addImm(MovI->Op1)
7925-
.addImm(MovI->Op2);
7926-
}
7910+
MachineInstrBuilder MIB1 =
7911+
BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR).addImm(IsSub ? -Imm : Imm);
79277912
InsInstrs.push_back(MIB1);
79287913
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
79297914
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
@@ -7977,67 +7962,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
79777962
}
79787963
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
79797964
break;
7980-
case AArch64MachineCombinerPattern::MULSUBWI_OP1:
7981-
case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
7982-
// MUL I=A,B,0
7983-
// SUB R,I, Imm
7984-
// ==> MOV V, -Imm
7985-
// ==> MADD R,A,B,V // = -Imm + A*B
7986-
// --- Create(MADD);
7987-
const TargetRegisterClass *OrrRC;
7988-
unsigned BitSize, OrrOpc, ZeroReg;
7989-
if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
7990-
OrrOpc = AArch64::ORRWri;
7991-
OrrRC = &AArch64::GPR32spRegClass;
7992-
BitSize = 32;
7993-
ZeroReg = AArch64::WZR;
7994-
Opc = AArch64::MADDWrrr;
7995-
RC = &AArch64::GPR32RegClass;
7996-
} else {
7997-
OrrOpc = AArch64::ORRXri;
7998-
OrrRC = &AArch64::GPR64spRegClass;
7999-
BitSize = 64;
8000-
ZeroReg = AArch64::XZR;
8001-
Opc = AArch64::MADDXrrr;
8002-
RC = &AArch64::GPR64RegClass;
8003-
}
8004-
Register NewVR = MRI.createVirtualRegister(OrrRC);
8005-
uint64_t Imm = Root.getOperand(2).getImm();
8006-
if (Root.getOperand(3).isImm()) {
8007-
unsigned Val = Root.getOperand(3).getImm();
8008-
Imm = Imm << Val;
8009-
}
8010-
uint64_t UImm = SignExtend64(-Imm, BitSize);
8011-
// The immediate can be composed via a single instruction.
8012-
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
8013-
AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8014-
if (Insn.size() != 1)
8015-
return;
8016-
auto MovI = Insn.begin();
8017-
MachineInstrBuilder MIB1;
8018-
// MOV is an alias for one of three instructions: movz, movn, and orr.
8019-
if (MovI->Opcode == OrrOpc)
8020-
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
8021-
.addReg(ZeroReg)
8022-
.addImm(MovI->Op2);
8023-
else {
8024-
if (BitSize == 32)
8025-
assert((MovI->Opcode == AArch64::MOVNWi ||
8026-
MovI->Opcode == AArch64::MOVZWi) &&
8027-
"Expected opcode");
8028-
else
8029-
assert((MovI->Opcode == AArch64::MOVNXi ||
8030-
MovI->Opcode == AArch64::MOVZXi) &&
8031-
"Expected opcode");
8032-
MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
8033-
.addImm(MovI->Op1)
8034-
.addImm(MovI->Op2);
8035-
}
8036-
InsInstrs.push_back(MIB1);
8037-
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8038-
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8039-
break;
8040-
}
80417965
case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
80427966
Opc = AArch64::MLAv8i8;
80437967
RC = &AArch64::FPR64RegClass;

llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ body: |
1414
; CHECK-NEXT: {{ $}}
1515
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
17-
; CHECK-NEXT: [[MOVZWi:%[0-9]+]]:gpr32common = nsw MOVZWi 79, 0
18-
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVZWi]]
17+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 79
18+
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
1919
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
2020
; CHECK-NEXT: RET_ReallyLR implicit $w0
2121
%0:gpr32 = COPY $w0
@@ -38,8 +38,8 @@ body: |
3838
; CHECK-NEXT: {{ $}}
3939
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
4040
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
41-
; CHECK-NEXT: [[MOVZXi:%[0-9]+]]:gpr64common = nsw MOVZXi 79, 0
42-
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVZXi]]
41+
; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 79
42+
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
4343
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
4444
; CHECK-NEXT: RET_ReallyLR implicit $x0
4545
%0:gpr64 = COPY $x0
@@ -62,8 +62,8 @@ body: |
6262
; CHECK-NEXT: {{ $}}
6363
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
6464
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
65-
; CHECK-NEXT: [[MOVNWi:%[0-9]+]]:gpr32common = nsw MOVNWi 0, 0
66-
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVNWi]]
65+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm -1
66+
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
6767
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
6868
; CHECK-NEXT: RET_ReallyLR implicit $w0
6969
%0:gpr32 = COPY $w0
@@ -86,8 +86,8 @@ body: |
8686
; CHECK-NEXT: {{ $}}
8787
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
8888
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
89-
; CHECK-NEXT: [[MOVNXi:%[0-9]+]]:gpr64common = nsw MOVNXi 0, 0
90-
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVNXi]]
89+
; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm -1
90+
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
9191
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
9292
; CHECK-NEXT: RET_ReallyLR implicit $x0
9393
%0:gpr64 = COPY $x0
@@ -110,8 +110,8 @@ body: |
110110
; CHECK-NEXT: {{ $}}
111111
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
112112
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
113-
; CHECK-NEXT: [[ORRWri:%[0-9]+]]:gpr32common = nsw ORRWri $wzr, 1291
114-
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[ORRWri]]
113+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 16773120
114+
; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
115115
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
116116
; CHECK-NEXT: RET_ReallyLR implicit $w0
117117
%0:gpr32 = COPY $w0
@@ -134,8 +134,8 @@ body: |
134134
; CHECK-NEXT: {{ $}}
135135
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
136136
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
137-
; CHECK-NEXT: [[ORRXri:%[0-9]+]]:gpr64common = nsw ORRXri $xzr, 7435
138-
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[ORRXri]]
137+
; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 16773120
138+
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
139139
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
140140
; CHECK-NEXT: RET_ReallyLR implicit $x0
141141
%0:gpr64 = COPY $x0

llvm/test/CodeGen/AArch64/madd-combiner.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,8 @@ define void @mul_add_imm2() {
3939
; CHECK-FAST-LABEL: mul_add_imm2:
4040
; CHECK-FAST: ; %bb.0: ; %entry
4141
; CHECK-FAST-NEXT: mov x8, #-3 ; =0xfffffffffffffffd
42-
; CHECK-FAST-NEXT: mov x9, #-3 ; =0xfffffffffffffffd
43-
; CHECK-FAST-NEXT: madd x8, x8, x8, x9
4442
; CHECK-FAST-NEXT: mov x9, #45968 ; =0xb390
43+
; CHECK-FAST-NEXT: madd x8, x8, x8, x8
4544
; CHECK-FAST-NEXT: movk x9, #48484, lsl #16
4645
; CHECK-FAST-NEXT: movk x9, #323, lsl #32
4746
; CHECK-FAST-NEXT: LBB2_1: ; %for.body8

0 commit comments

Comments
 (0)