diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp index 3283a5bb69404..8f2e570299b3b 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp @@ -65,6 +65,9 @@ class PPCInstructionSelector : public InstructionSelector { bool selectI64Imm(MachineInstr &I, MachineBasicBlock &MBB, MachineRegisterInfo &MRI) const; + bool selectUCMP(MachineInstr &I, MachineBasicBlock &MBB, + MachineRegisterInfo &MRI) const; + const PPCTargetMachine &TM; const PPCSubtarget &STI; const PPCInstrInfo &TII; @@ -705,6 +708,53 @@ bool PPCInstructionSelector::selectConstantPool( return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); } +bool PPCInstructionSelector::selectUCMP(MachineInstr &I, MachineBasicBlock &MBB, + MachineRegisterInfo &MRI) const { + const DebugLoc &DbgLoc = I.getDebugLoc(); + Register DstReg = I.getOperand(0).getReg(); + Register LHS = I.getOperand(1).getReg(); + Register RHS = I.getOperand(2).getReg(); + + LLT Ty = MRI.getType(LHS); + bool Is64Bit = Ty.getSizeInBits() == 64; + + // Select appropriate opcodes based on operand size + unsigned SubfOp = Is64Bit ? PPC::SUBF8 : PPC::SUBF; + unsigned SubfcOp = Is64Bit ? PPC::SUBFC8 : PPC::SUBFC; + unsigned SubfeOp = Is64Bit ? PPC::SUBFE8 : PPC::SUBFE; + + const TargetRegisterClass *RC = + Is64Bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + + // diff = LHS - RHS (subf RHS, LHS -> LHS - RHS) + Register DiffReg = MRI.createVirtualRegister(RC); + auto Diff = + BuildMI(MBB, I, DbgLoc, TII.get(SubfOp), DiffReg).addReg(RHS).addReg(LHS); + + // t1 = RHS - LHS, set carry (subfc LHS, RHS -> RHS - LHS) + Register T1Reg = MRI.createVirtualRegister(RC); + auto T1 = + BuildMI(MBB, I, DbgLoc, TII.get(SubfcOp), T1Reg).addReg(LHS).addReg(RHS); + + // t2 = LHS - RHS + carry (subfe RHS, LHS -> LHS - RHS + CA) + Register T2Reg = MRI.createVirtualRegister(RC); + auto T2 = + BuildMI(MBB, I, DbgLoc, TII.get(SubfeOp), T2Reg).addReg(RHS).addReg(LHS); + + // result = diff - t2 + carry (subfe T2Reg, DiffReg -> diff - t2 + CA) + auto Result = BuildMI(MBB, I, DbgLoc, TII.get(SubfeOp), DstReg) + .addReg(T2Reg) + .addReg(DiffReg); + + I.eraseFromParent(); + + // Constrain registers + return constrainSelectedInstRegOperands(*Diff, TII, TRI, RBI) && + constrainSelectedInstRegOperands(*T1, TII, TRI, RBI) && + constrainSelectedInstRegOperands(*T2, TII, TRI, RBI) && + constrainSelectedInstRegOperands(*Result, TII, TRI, RBI); +} + bool PPCInstructionSelector::select(MachineInstr &I) { auto &MBB = *I.getParent(); auto &MF = *MBB.getParent(); @@ -775,6 +825,8 @@ bool PPCInstructionSelector::select(MachineInstr &I) { return selectI64Imm(I, MBB, MRI); case TargetOpcode::G_CONSTANT_POOL: return selectConstantPool(I, MBB, MRI); + case TargetOpcode::G_UCMP: + return selectUCMP(I, MBB, MRI); } return false; } diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp index afc8f6bbde1b7..9f48333b5be8d 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp @@ -72,6 +72,11 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) { getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({S1}, {S32, S64}); + // Add unsigned 3-way comparison support + getActionDefinitionsBuilder(G_UCMP) + .legalFor({S32, S64}) + .clampScalar(0, S32, S64); + getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) .legalForCartesianProduct({S64}, {S32, S64}); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5a4a63469ad6e..b70ef182a2a72 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1409,6 +1409,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); + // Custom handling for PowerPC ucmp instruction + setOperationAction(ISD::UCMP, MVT::i32, Custom); + if (Subtarget.isPPC64()) + setOperationAction(ISD::UCMP, MVT::i64, Custom); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR}); @@ -12470,6 +12475,38 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Sub, OverflowTrunc}, dl); } +// Lower unsigned 3-way compare producing -1/0/1. +SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + EVT OpVT = A.getValueType(); // operand type (i32 or i64) + EVT ResVT = Op.getValueType(); // result type (should be i32) + + // First compute diff = A - B (will become subf). + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B); + + // Generate B - A using SUBC to capture carry. + SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A); + SDValue CA0 = SubC.getValue(1); + + // t2 = A - B + CA0 using SUBE. + SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0); + SDValue CA1 = SubE1.getValue(1); + + // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1). + SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1); + + // Extract the first result and truncate to result type if needed + SDValue Result = ResPair.getValue(0); + if (OpVT != ResVT) { + Result = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Result); + } + + return Result; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12574,6 +12611,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UADDO_CARRY: case ISD::USUBO_CARRY: return LowerADDSUBO_CARRY(Op, DAG); + case ISD::UCMP: + return LowerUCMP(Op, DAG); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 4c88bd372b106..7e8dd166ad724 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1329,6 +1329,7 @@ namespace llvm { SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToLibCall(const char *LibCallName, SDValue Op, SelectionDAG &DAG) const; SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll index 39f9269997315..4998d87cf397b 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp.ll @@ -6,12 +6,10 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado ; CHECK: # %bb.0: ; CHECK-NEXT: ldbrx 3, 0, 3 ; CHECK-NEXT: ldbrx 4, 0, 4 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8) @@ -23,11 +21,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado ; CHECK: # %bb.0: ; CHECK-NEXT: lwbrx 3, 0, 3 ; CHECK-NEXT: lwbrx 4, 0, 4 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 +; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4) ret i32 %call diff --git a/llvm/test/CodeGen/PowerPC/ucmp.ll b/llvm/test/CodeGen/PowerPC/ucmp.ll index d2dff6e7e05c8..4d393dd00e3db 100644 --- a/llvm/test/CodeGen/PowerPC/ucmp.ll +++ b/llvm/test/CodeGen/PowerPC/ucmp.ll @@ -4,12 +4,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) ret i8 %1 @@ -18,12 +16,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_16: ; CHECK: # %bb.0: -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) ret i8 %1 @@ -32,14 +28,10 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_8_32: ; CHECK: # %bb.0: -; CHECK-NEXT: clrldi 5, 4, 32 -; CHECK-NEXT: clrldi 6, 3, 32 -; CHECK-NEXT: sub 5, 5, 6 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) ret i8 %1 @@ -48,12 +40,10 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_8_64: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i64 %x, i64 %y) ret i8 %1 @@ -82,14 +72,10 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_32_32: ; CHECK: # %bb.0: -; CHECK-NEXT: clrldi 5, 4, 32 -; CHECK-NEXT: clrldi 6, 3, 32 -; CHECK-NEXT: sub 5, 5, 6 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 @@ -98,12 +84,10 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_32_64: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i32 @llvm.ucmp(i64 %x, i64 %y) ret i32 %1 @@ -112,12 +96,10 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_64_64: ; CHECK: # %bb.0: -; CHECK-NEXT: subc 5, 4, 3 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: subfe 5, 4, 4 -; CHECK-NEXT: neg 5, 5 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i64 @llvm.ucmp(i64 %x, i64 %y) ret i64 %1