Skip to content

Commit 7af3d39

Browse files
committed
[SystemZ] Optimize vector comparison reductions
Generate efficient code using the condition code set by the VECTOR (FP) COMPARE family of instructions to implement vector comparison reductions, e.g. as resulting from __builtin_reduce_and/or of some vector comparsion. Fixes: llvm#129434
1 parent 86ae25d commit 7af3d39

File tree

3 files changed

+3896
-2
lines changed

3 files changed

+3896
-2
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
774774
ISD::UINT_TO_FP,
775775
ISD::STRICT_FP_EXTEND,
776776
ISD::BSWAP,
777+
ISD::SETCC,
777778
ISD::SDIV,
778779
ISD::UDIV,
779780
ISD::SREM,
@@ -3260,6 +3261,43 @@ static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
32603261
return;
32613262
if (C.Op0.getValueType() != MVT::i128)
32623263
return;
3264+
3265+
// Recognize vector comparison reductions.
3266+
if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3267+
C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3268+
(isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3269+
bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3270+
bool CmpNull = isNullConstant(C.Op1);
3271+
SDValue Src = peekThroughBitcasts(C.Op0);
3272+
if (Src.hasOneUse() && isBitwiseNot(Src)) {
3273+
Src = Src.getOperand(0);
3274+
CmpNull = !CmpNull;
3275+
}
3276+
unsigned Opcode = 0;
3277+
if (Src.hasOneUse()) {
3278+
switch (Src.getOpcode()) {
3279+
case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3280+
case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3281+
case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3282+
case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3283+
case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3284+
case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3285+
default: break;
3286+
}
3287+
}
3288+
if (Opcode) {
3289+
C.Opcode = Opcode;
3290+
C.Op0 = Src->getOperand(0);
3291+
C.Op1 = Src->getOperand(1);
3292+
C.CCValid = SystemZ::CCMASK_VCMP;
3293+
C.CCMask = CmpNull ? SystemZ::CCMASK_VCMP_NONE : SystemZ::CCMASK_VCMP_ALL;
3294+
if (!CmpEq)
3295+
C.CCMask ^= C.CCValid;
3296+
return;
3297+
}
3298+
}
3299+
3300+
// Everything below here is not useful if we have native i128 compares.
32633301
if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
32643302
return;
32653303

@@ -3443,8 +3481,14 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
34433481
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
34443482
DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
34453483
}
3446-
if (C.Opcode == SystemZISD::VICMPES) {
3447-
SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3484+
if (C.Opcode == SystemZISD::VICMPES ||
3485+
C.Opcode == SystemZISD::VICMPHS ||
3486+
C.Opcode == SystemZISD::VICMPHLS ||
3487+
C.Opcode == SystemZISD::VFCMPES ||
3488+
C.Opcode == SystemZISD::VFCMPHS ||
3489+
C.Opcode == SystemZISD::VFCMPHES) {
3490+
EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3491+
SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
34483492
SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
34493493
return SDValue(Val.getNode(), 1);
34503494
}
@@ -8036,6 +8080,42 @@ SDValue SystemZTargetLowering::combineBSWAP(
80368080
return SDValue();
80378081
}
80388082

8083+
SDValue SystemZTargetLowering::combineSETCC(
8084+
SDNode *N, DAGCombinerInfo &DCI) const {
8085+
SelectionDAG &DAG = DCI.DAG;
8086+
const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8087+
const SDValue LHS = N->getOperand(0);
8088+
const SDValue RHS = N->getOperand(1);
8089+
bool CmpNull = isNullConstant(RHS);
8090+
bool CmpAllOnes = isAllOnesConstant(RHS);
8091+
EVT VT = N->getValueType(0);
8092+
SDLoc DL(N);
8093+
8094+
// Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8095+
// change the outer compare to a i128 compare. This will normally
8096+
// allow the reduction to be recognized in adjustICmp128, and even if
8097+
// not, the i128 compare will still generate better code.
8098+
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8099+
SDValue Src = peekThroughBitcasts(LHS);
8100+
if (Src.getOpcode() == ISD::SETCC &&
8101+
Src.getValueType().isFixedLengthVector() &&
8102+
Src.getValueType().getScalarType() == MVT::i1) {
8103+
EVT CmpVT = Src.getOperand(0).getValueType();
8104+
if (CmpVT.getSizeInBits() == 128) {
8105+
EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8106+
SDValue LHS =
8107+
DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8108+
SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8109+
: DAG.getAllOnesConstant(DL, MVT::i128);
8110+
return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8111+
N->getFlags());
8112+
}
8113+
}
8114+
}
8115+
8116+
return SDValue();
8117+
}
8118+
80398119
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
80408120
// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
80418121
// set by the CCReg instruction using the CCValid / CCMask masks,
@@ -8286,6 +8366,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
82868366
case ISD::SINT_TO_FP:
82878367
case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
82888368
case ISD::BSWAP: return combineBSWAP(N, DCI);
8369+
case ISD::SETCC: return combineSETCC(N, DCI);
82898370
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
82908371
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
82918372
case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,7 @@ class SystemZTargetLowering : public TargetLowering {
755755
SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
756756
SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
757757
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
758+
SDValue combineSETCC(SDNode *N, DAGCombinerInfo &DCI) const;
758759
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
759760
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
760761
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;

0 commit comments

Comments
 (0)