From c15bf091a2843a6ccc2d9aae77d337ebf23d9718 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 8 Jul 2025 13:48:34 +0100 Subject: [PATCH] [AArch64] Use mov imm pseudo instructions in madd combine. The usual path for lowering immediates in AArch64 is to generate a MOVi32imm or MOVi64imm pseudo instruction, that can be moved / rematerialized around as required, being expanded into one or multiple instructions after register allocation. The code for the MachineCombiner was generating MOVN/ORR/MOVZ directly. This converts them to use the pseudos, allowing the generated immediates to be materialized if required. The code is hopefully simpler as a result, and the Sub and Add patterns have been combined to reduce duplication. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 117 ++++-------------- .../AArch64/machine-combiner-maddimm.mir | 24 ++-- llvm/test/CodeGen/AArch64/madd-combiner.ll | 3 +- 3 files changed, 34 insertions(+), 110 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 8847c62690714..c1474773faa76 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -7868,62 +7868,48 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case AArch64MachineCombinerPattern::MULADDWI_OP1: - case AArch64MachineCombinerPattern::MULADDXI_OP1: { + case AArch64MachineCombinerPattern::MULADDXI_OP1: + case AArch64MachineCombinerPattern::MULSUBWI_OP1: + case AArch64MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 - // ADD R,I,Imm - // ==> MOV V, Imm + // ADD/SUB R,I,Imm + // ==> MOV V, Imm/-Imm // ==> MADD R,A,B,V // --- Create(MADD); - const TargetRegisterClass *OrrRC; - unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) { - OrrOpc = AArch64::ORRWri; - OrrRC = &AArch64::GPR32spRegClass; + const TargetRegisterClass *RC; + unsigned BitSize, MovImm; + if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1 || + Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) { + MovImm = AArch64::MOVi32imm; + RC = &AArch64::GPR32spRegClass; BitSize = 32; - ZeroReg = AArch64::WZR; Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { - OrrOpc = AArch64::ORRXri; - OrrRC = &AArch64::GPR64spRegClass; + MovImm = AArch64::MOVi64imm; + RC = &AArch64::GPR64spRegClass; BitSize = 64; - ZeroReg = AArch64::XZR; Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - Register NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(RC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { unsigned Val = Root.getOperand(3).getImm(); Imm = Imm << Val; } - uint64_t UImm = SignExtend64(Imm, BitSize); - // The immediate can be composed via a single instruction. + bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 || + Pattern == AArch64MachineCombinerPattern::MULSUBXI_OP1; + uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize); + // Check that the immediate can be composed via a single instruction. SmallVector Insn; AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); if (Insn.size() != 1) return; - auto MovI = Insn.begin(); - MachineInstrBuilder MIB1; - // MOV is an alias for one of three instructions: movz, movn, and orr. - if (MovI->Opcode == OrrOpc) - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(MovI->Op2); - else { - if (BitSize == 32) - assert((MovI->Opcode == AArch64::MOVNWi || - MovI->Opcode == AArch64::MOVZWi) && - "Expected opcode"); - else - assert((MovI->Opcode == AArch64::MOVNXi || - MovI->Opcode == AArch64::MOVZXi) && - "Expected opcode"); - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR) - .addImm(MovI->Op1) - .addImm(MovI->Op2); - } + MachineInstrBuilder MIB1 = + BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR) + .addImm(IsSub ? -Imm : Imm); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); @@ -7977,67 +7963,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case AArch64MachineCombinerPattern::MULSUBWI_OP1: - case AArch64MachineCombinerPattern::MULSUBXI_OP1: { - // MUL I=A,B,0 - // SUB R,I, Imm - // ==> MOV V, -Imm - // ==> MADD R,A,B,V // = -Imm + A*B - // --- Create(MADD); - const TargetRegisterClass *OrrRC; - unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) { - OrrOpc = AArch64::ORRWri; - OrrRC = &AArch64::GPR32spRegClass; - BitSize = 32; - ZeroReg = AArch64::WZR; - Opc = AArch64::MADDWrrr; - RC = &AArch64::GPR32RegClass; - } else { - OrrOpc = AArch64::ORRXri; - OrrRC = &AArch64::GPR64spRegClass; - BitSize = 64; - ZeroReg = AArch64::XZR; - Opc = AArch64::MADDXrrr; - RC = &AArch64::GPR64RegClass; - } - Register NewVR = MRI.createVirtualRegister(OrrRC); - uint64_t Imm = Root.getOperand(2).getImm(); - if (Root.getOperand(3).isImm()) { - unsigned Val = Root.getOperand(3).getImm(); - Imm = Imm << Val; - } - uint64_t UImm = SignExtend64(-Imm, BitSize); - // The immediate can be composed via a single instruction. - SmallVector Insn; - AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); - if (Insn.size() != 1) - return; - auto MovI = Insn.begin(); - MachineInstrBuilder MIB1; - // MOV is an alias for one of three instructions: movz, movn, and orr. - if (MovI->Opcode == OrrOpc) - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(MovI->Op2); - else { - if (BitSize == 32) - assert((MovI->Opcode == AArch64::MOVNWi || - MovI->Opcode == AArch64::MOVZWi) && - "Expected opcode"); - else - assert((MovI->Opcode == AArch64::MOVNXi || - MovI->Opcode == AArch64::MOVZXi) && - "Expected opcode"); - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR) - .addImm(MovI->Op1) - .addImm(MovI->Op2); - } - InsInstrs.push_back(MIB1); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); - break; - } case AArch64MachineCombinerPattern::MULADDv8i8_OP1: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir b/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir index dc75c8c61c53c..c944889ede695 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir @@ -14,8 +14,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK-NEXT: [[MOVZWi:%[0-9]+]]:gpr32common = nsw MOVZWi 79, 0 - ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVZWi]] + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 79 + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]] ; CHECK-NEXT: $w0 = COPY [[MADDWrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr32 = COPY $w0 @@ -38,8 +38,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[MOVZXi:%[0-9]+]]:gpr64common = nsw MOVZXi 79, 0 - ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVZXi]] + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 79 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]] ; CHECK-NEXT: $x0 = COPY [[MADDXrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr64 = COPY $x0 @@ -62,8 +62,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK-NEXT: [[MOVNWi:%[0-9]+]]:gpr32common = nsw MOVNWi 0, 0 - ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVNWi]] + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm -1 + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]] ; CHECK-NEXT: $w0 = COPY [[MADDWrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr32 = COPY $w0 @@ -86,8 +86,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[MOVNXi:%[0-9]+]]:gpr64common = nsw MOVNXi 0, 0 - ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVNXi]] + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm -1 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]] ; CHECK-NEXT: $x0 = COPY [[MADDXrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr64 = COPY $x0 @@ -110,8 +110,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK-NEXT: [[ORRWri:%[0-9]+]]:gpr32common = nsw ORRWri $wzr, 1291 - ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[ORRWri]] + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 16773120 + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]] ; CHECK-NEXT: $w0 = COPY [[MADDWrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr32 = COPY $w0 @@ -134,8 +134,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[ORRXri:%[0-9]+]]:gpr64common = nsw ORRXri $xzr, 7435 - ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[ORRXri]] + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 16773120 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]] ; CHECK-NEXT: $x0 = COPY [[MADDXrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr64 = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll index 6e510712fbd21..cc7fc8fc98629 100644 --- a/llvm/test/CodeGen/AArch64/madd-combiner.ll +++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll @@ -39,9 +39,8 @@ define void @mul_add_imm2() { ; CHECK-FAST-LABEL: mul_add_imm2: ; CHECK-FAST: ; %bb.0: ; %entry ; CHECK-FAST-NEXT: mov x8, #-3 ; =0xfffffffffffffffd -; CHECK-FAST-NEXT: mov x9, #-3 ; =0xfffffffffffffffd -; CHECK-FAST-NEXT: madd x8, x8, x8, x9 ; CHECK-FAST-NEXT: mov x9, #45968 ; =0xb390 +; CHECK-FAST-NEXT: madd x8, x8, x8, x8 ; CHECK-FAST-NEXT: movk x9, #48484, lsl #16 ; CHECK-FAST-NEXT: movk x9, #323, lsl #32 ; CHECK-FAST-NEXT: LBB2_1: ; %for.body8