Skip to content

Commit 1d46733

Browse files
[X86] Use GFNI for LZCNT vXi8 ops (#141888)
This PULL REQUEST implements vXi8 ctlz lowering for X86 using GFNI instructions Fixes #140729
1 parent 11d8454 commit 1d46733

File tree

2 files changed

+241
-350
lines changed

2 files changed

+241
-350
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28988,6 +28988,30 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
2898828988
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
2898928989
}
2899028990

28991+
static SDValue LowerVectorCTLZ_GFNI(SDValue Op, const SDLoc &DL,
28992+
SelectionDAG &DAG,
28993+
const X86Subtarget &Subtarget) {
28994+
MVT VT = Op.getSimpleValueType();
28995+
SDValue Input = Op.getOperand(0);
28996+
28997+
assert(VT.isVector() && VT.getVectorElementType() == MVT::i8 &&
28998+
"Expected vXi8 input for GFNI-based CTLZ lowering");
28999+
29000+
SDValue Reversed = DAG.getNode(ISD::BITREVERSE, DL, VT, Input);
29001+
29002+
SDValue Neg = DAG.getNegative(Reversed, DL, VT);
29003+
SDValue Filtered = DAG.getNode(ISD::AND, DL, VT, Reversed, Neg);
29004+
29005+
MVT VT64 = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
29006+
SDValue CTTZConst = DAG.getConstant(0xAACCF0FF00000000ULL, DL, VT64);
29007+
SDValue CTTZMatrix = DAG.getBitcast(VT, CTTZConst);
29008+
29009+
SDValue LZCNT =
29010+
DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, Filtered, CTTZMatrix,
29011+
DAG.getTargetConstant(8, DL, MVT::i8));
29012+
return LZCNT;
29013+
}
29014+
2899129015
static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
2899229016
SelectionDAG &DAG) {
2899329017
MVT VT = Op.getSimpleValueType();
@@ -28996,6 +29020,9 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
2899629020
SDLoc dl(Op);
2899729021
unsigned Opc = Op.getOpcode();
2899829022

29023+
if (VT.isVector() && VT.getScalarType() == MVT::i8 && Subtarget.hasGFNI())
29024+
return LowerVectorCTLZ_GFNI(Op, dl, DAG, Subtarget);
29025+
2899929026
if (VT.isVector())
2900029027
return LowerVectorCTLZ(Op, dl, Subtarget, DAG);
2900129028

0 commit comments

Comments
 (0)