From b7ede522fe3007672fdd9bcbc1ea937aeb28297d Mon Sep 17 00:00:00 2001 From: bababuck Date: Tue, 8 Jul 2025 13:59:59 -0700 Subject: [PATCH] [RISCV] Optimize (and (icmp x, 0, eq), (icmp y, 0, eq)) utilizing zicond extension %1 = icmp x, 0, eq %2 = icmp y, 0, eq %3 = and %1, %2 Origionally lowered to: %1 = seqz x %2 = seqz y %3 = and %1, %2 With optimiztion: %1 = seqz x %3 = czero.eqz %1, y --- llvm/include/llvm/CodeGen/TargetLowering.h | 8 ++- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +-- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 54 +++++++++++++++++++ llvm/lib/Target/RISCV/RISCVISelLowering.h | 11 ++-- llvm/test/CodeGen/RISCV/zicond-opts.ll | 20 +++---- 7 files changed, 80 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index fee94cc167363..1cbddb0f470e6 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2449,8 +2449,8 @@ class LLVM_ABI TargetLoweringBase { /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely /// that it saves us from materializing N0 and N1 in an integer register. /// Targets that are able to perform and/or on flags should return false here. - virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, - EVT VT) const { + virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT, + SDNode *) const { // If a target has multiple condition registers, then it likely has logical // operations on those registers. if (hasMultipleConditionRegisters()) @@ -2462,6 +2462,10 @@ class LLVM_ABI TargetLoweringBase { Action != TypeSplitVector; } + // Return true is targets has a conditional zero-ing instruction + // i.e. select cond, x, 0 + virtual bool hasConditionalZero() const { return false; } + virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } /// Return true if a select of constants (select Cond, C1, C2) should be diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9ffdda28f7899..691389b9d19f0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12050,7 +12050,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, // select Cond, T, Cond --> and Cond, freeze(T) // select Cond, T, 0 --> and Cond, freeze(T) - if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true)) + // select Cond, T, 0 is a conditional zero + if (Cond == F || (!TLI.hasConditionalZero() && + isNullOrNullSplat(F, /* AllowUndefs */ true))) return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T)); // select Cond, T, 1 --> or (not Cond), freeze(T) @@ -12061,7 +12063,7 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, } // select Cond, 0, F --> and (not Cond), freeze(F) - if (isNullOrNullSplat(T, /* AllowUndefs */ true)) { + if (!TLI.hasConditionalZero() && isNullOrNullSplat(T, /* AllowUndefs */ true)) { SDValue NotCond = matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT)); return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F)); @@ -12214,7 +12216,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // and we always transform to the left side if we know that we can further // optimize the combination of the conditions. bool normalizeToSequence = - TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT, N); // select (and Cond0, Cond1), X, Y // -> select Cond0, (select Cond1, X, Y), Y if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6b7e9357aab5a..d2763a3f74b89 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -28262,8 +28262,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( return all_equal(ValueVTs); } -bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, - EVT) const { +bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT, + SDNode *) const { return false; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 89f90ee2b7707..504360ac8b7f5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -836,7 +836,8 @@ class AArch64TargetLowering : public TargetLowering { SmallVectorImpl &Results, SelectionDAG &DAG) const; - bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; + bool shouldNormalizeToSelectSequence(LLVMContext &, EVT, + SDNode *) const override; void finalizeLowering(MachineFunction &MF) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5126ab6c31c28..3f73a095d6009 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2688,6 +2688,31 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); } +// Disable normalizing for most cases +// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and +// select(N0|N1, X, Y) => select(N0, Y, select(N1, X, Y)) +// If y == 0 and N0 == setcc(eqz || nez) -> czero (select(N1, X, 0), N0) +bool RISCVTargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT VT, + SDNode *N) const { + if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) { + assert( + N->getOpcode() == ISD::SELECT && + "shouldNormalizeTooSelectSequence() called with non-SELECT operation"); + const SDValue &CondV = N->getOperand(0); + if (CondV.getOpcode() == ISD::SETCC && isNullConstant(N->getOperand(2))) { + ISD::CondCode CondCode = cast(CondV.getOperand(2))->get(); + if (CondCode == ISD::SETNE || CondCode == ISD::SETEQ) { + return true; + } + } + } + return false; +} + +bool RISCVTargetLowering::hasConditionalZero() const { + return Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps(); +} + bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { if (!ScalarTy.isSimple()) return false; @@ -15731,6 +15756,35 @@ static SDValue performANDCombine(SDNode *N, if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget)) return V; + if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) { + auto IsCzeroCompatible = [](const SDValue &Op0, + const SDValue &Op1) -> bool { + if (Op0.getValueType() == MVT::i1 && Op1.getOpcode() == ISD::SETCC && + isNullConstant(Op1.getOperand(1))) { + ISD::CondCode CondCode = cast(Op1.getOperand(2))->get(); + return CondCode == ISD::SETNE || CondCode == ISD::SETEQ; + } + return false; + }; + // (and (i1) f, (setcc c, 0, ne)) -> (select c, f, 0) -> (czero.nez f, c) + // (and (i1) f, (setcc c, 0, eq)) -> (select c, 0, f) -> (czero.eqz f, c) + // (and (setcc c, 0, ne), (i1) g) -> (select c, g, 0) -> (czero.nez g, c) + // (and (setcc c, 0, eq), (i1) g) -> (select c, 0, g) -> (czero.eqz g, c) + if (IsCzeroCompatible(N->getOperand(0), N->getOperand(1)) || + IsCzeroCompatible(N->getOperand(1), N->getOperand(0))) { + const bool CzeroOp1 = + IsCzeroCompatible(N->getOperand(0), N->getOperand(1)); + const SDValue &I1Op = CzeroOp1 ? N->getOperand(0) : N->getOperand(1); + const SDValue &SetCCOp = CzeroOp1 ? N->getOperand(1) : N->getOperand(0); + + ISD::CondCode CondCode = + cast(SetCCOp.getOperand(2))->get(); + SDLoc DL(N); + const SDValue &Condition = SetCCOp.getOperand(0); + return DAG.getNode(ISD::SELECT, DL, MVT::i1, SetCCOp, I1Op, DAG.getConstant(0, DL, MVT::i1)); + } + } + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index f67d7f155c9d0..11726fb732ac5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -598,13 +598,10 @@ class RISCVTargetLowering : public TargetLowering { /// this override can be removed. bool mergeStoresAfterLegalization(EVT VT) const override; - /// Disable normalizing - /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and - /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) - /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR. - bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override { - return false; - } + bool shouldNormalizeToSelectSequence(LLVMContext &, EVT VT, + SDNode *N) const override; + + bool hasConditionalZero() const override; /// Disables storing and loading vectors by default when there are function /// calls between the load and store, since these are more expensive than just diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll index 35b06c4f4fb41..394fd8202d0dd 100644 --- a/llvm/test/CodeGen/RISCV/zicond-opts.ll +++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll @@ -8,16 +8,14 @@ define i32 @icmp_and(i64 %x, i64 %y) { ; RV32ZICOND: # %bb.0: ; RV32ZICOND-NEXT: or a2, a2, a3 ; RV32ZICOND-NEXT: or a0, a0, a1 -; RV32ZICOND-NEXT: snez a1, a2 ; RV32ZICOND-NEXT: snez a0, a0 -; RV32ZICOND-NEXT: and a0, a0, a1 +; RV32ZICOND-NEXT: czero.eqz a0, a0, a2 ; RV32ZICOND-NEXT: ret ; ; RV64ZICOND-LABEL: icmp_and: ; RV64ZICOND: # %bb.0: -; RV64ZICOND-NEXT: snez a1, a1 ; RV64ZICOND-NEXT: snez a0, a0 -; RV64ZICOND-NEXT: and a0, a0, a1 +; RV64ZICOND-NEXT: czero.eqz a0, a0, a1 ; RV64ZICOND-NEXT: ret %3 = icmp ne i64 %y, 0 %4 = icmp ne i64 %x, 0 @@ -32,21 +30,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) { ; RV32ZICOND: # %bb.0: ; RV32ZICOND-NEXT: or a2, a2, a3 ; RV32ZICOND-NEXT: or a0, a0, a1 -; RV32ZICOND-NEXT: or a4, a4, a5 ; RV32ZICOND-NEXT: snez a1, a2 -; RV32ZICOND-NEXT: snez a0, a0 -; RV32ZICOND-NEXT: and a0, a1, a0 -; RV32ZICOND-NEXT: snez a1, a4 -; RV32ZICOND-NEXT: and a0, a1, a0 +; RV32ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZICOND-NEXT: or a4, a4, a5 +; RV32ZICOND-NEXT: czero.eqz a0, a0, a4 ; RV32ZICOND-NEXT: ret ; ; RV64ZICOND-LABEL: icmp_and_and: ; RV64ZICOND: # %bb.0: ; RV64ZICOND-NEXT: snez a1, a1 -; RV64ZICOND-NEXT: snez a0, a0 -; RV64ZICOND-NEXT: and a0, a1, a0 -; RV64ZICOND-NEXT: snez a1, a2 -; RV64ZICOND-NEXT: and a0, a1, a0 +; RV64ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZICOND-NEXT: czero.eqz a0, a0, a2 ; RV64ZICOND-NEXT: ret %4 = icmp ne i64 %y, 0 %5 = icmp ne i64 %x, 0