@@ -28988,6 +28988,30 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
28988
28988
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
28989
28989
}
28990
28990
28991
+ static SDValue LowerVectorCTLZ_GFNI(SDValue Op, const SDLoc &DL,
28992
+ SelectionDAG &DAG,
28993
+ const X86Subtarget &Subtarget) {
28994
+ MVT VT = Op.getSimpleValueType();
28995
+ SDValue Input = Op.getOperand(0);
28996
+
28997
+ assert(VT.isVector() && VT.getVectorElementType() == MVT::i8 &&
28998
+ "Expected vXi8 input for GFNI-based CTLZ lowering");
28999
+
29000
+ SDValue Reversed = DAG.getNode(ISD::BITREVERSE, DL, VT, Input);
29001
+
29002
+ SDValue Neg = DAG.getNegative(Reversed, DL, VT);
29003
+ SDValue Filtered = DAG.getNode(ISD::AND, DL, VT, Reversed, Neg);
29004
+
29005
+ MVT VT64 = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
29006
+ SDValue CTTZConst = DAG.getConstant(0xAACCF0FF00000000ULL, DL, VT64);
29007
+ SDValue CTTZMatrix = DAG.getBitcast(VT, CTTZConst);
29008
+
29009
+ SDValue LZCNT =
29010
+ DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, Filtered, CTTZMatrix,
29011
+ DAG.getTargetConstant(8, DL, MVT::i8));
29012
+ return LZCNT;
29013
+ }
29014
+
28991
29015
static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
28992
29016
SelectionDAG &DAG) {
28993
29017
MVT VT = Op.getSimpleValueType();
@@ -28996,6 +29020,9 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
28996
29020
SDLoc dl(Op);
28997
29021
unsigned Opc = Op.getOpcode();
28998
29022
29023
+ if (VT.isVector() && VT.getScalarType() == MVT::i8 && Subtarget.hasGFNI())
29024
+ return LowerVectorCTLZ_GFNI(Op, dl, DAG, Subtarget);
29025
+
28999
29026
if (VT.isVector())
29000
29027
return LowerVectorCTLZ(Op, dl, Subtarget, DAG);
29001
29028
0 commit comments