Skip to content

Commit fe1f0de

Browse files
committed
[RISCV][WebAssembly][TargetLowering] Allow expandCTLZ/expandCTTZ to rely on CTPOP expansion for vectors.
Our fallback expansion for CTLZ/CTTZ relies on CTPOP. If CTPOP isn't legal or custom for a vector type we would scalarize the CTLZ/CTTZ. This is different than CTPOP itself which would use a vector expansion. This patch teaches expandCTLZ/CTTZ to rely on the vector CTPOP expansion instead of scalarizing. To do this I had to add additional checks to make sure the operations used by CTPOP expansions are all supported. Some of the operations were already needed for the CTLZ/CTTZ expansion. This is a huge improvement to the RISCV which doesn't have a scalar ctlz or cttz in the base ISA. For WebAssembly, I've added Custom lowering to keep the scalarizing behavior. I've also extended the scalarizing to CTPOP. Differential Revision: https://reviews.llvm.org/D111919
1 parent 60e19f6 commit fe1f0de

File tree

5 files changed

+2234
-19661
lines changed

5 files changed

+2234
-19661
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6980,6 +6980,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
69806980
return SDValue();
69816981
}
69826982

6983+
// Only expand vector types if we have the appropriate vector bit operations.
6984+
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
6985+
assert(VT.isVector() && "Expected vector type");
6986+
unsigned Len = VT.getScalarSizeInBits();
6987+
return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
6988+
TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
6989+
TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
6990+
(Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
6991+
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
6992+
}
6993+
69836994
bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
69846995
SelectionDAG &DAG) const {
69856996
SDLoc dl(Node);
@@ -6994,11 +7005,7 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
69947005
return false;
69957006

69967007
// Only expand vector types if we have the appropriate vector bit operations.
6997-
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6998-
!isOperationLegalOrCustom(ISD::SUB, VT) ||
6999-
!isOperationLegalOrCustom(ISD::SRL, VT) ||
7000-
(Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
7001-
!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
7008+
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
70027009
return false;
70037010

70047011
// This is the "best" algorithm from
@@ -7068,8 +7075,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
70687075
}
70697076

70707077
// Only expand vector types if we have the appropriate vector bit operations.
7078+
// This includes the operations needed to expand CTPOP if it isn't supported.
70717079
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
7072-
!isOperationLegalOrCustom(ISD::CTPOP, VT) ||
7080+
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
7081+
!canExpandVectorCTPOP(*this, VT)) ||
70737082
!isOperationLegalOrCustom(ISD::SRL, VT) ||
70747083
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
70757084
return false;
@@ -7120,9 +7129,11 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
71207129
}
71217130

71227131
// Only expand vector types if we have the appropriate vector bit operations.
7132+
// This includes the operations needed to expand CTPOP if it isn't supported.
71237133
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
71247134
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
7125-
!isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
7135+
!isOperationLegalOrCustom(ISD::CTLZ, VT) &&
7136+
!canExpandVectorCTPOP(*this, VT)) ||
71267137
!isOperationLegalOrCustom(ISD::SUB, VT) ||
71277138
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
71287139
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
215215
setOperationAction(ISD::SELECT_CC, T, Expand);
216216

217217
// Expand integer operations supported for scalars but not SIMD
218-
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
219-
ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
218+
for (auto Op :
219+
{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
220220
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
221221
setOperationAction(Op, T, Expand);
222222

@@ -225,8 +225,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
225225
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
226226
setOperationAction(Op, T, Legal);
227227

228-
// And we have popcnt for i8x16
228+
// And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
229229
setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
230+
setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
231+
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
232+
233+
// Custom lower bit counting operations for other types to scalarize them.
234+
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
235+
for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
236+
setOperationAction(Op, T, Custom);
230237

231238
// Expand float operations supported for scalars but not SIMD
232239
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
@@ -1405,6 +1412,10 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
14051412
return LowerLoad(Op, DAG);
14061413
case ISD::STORE:
14071414
return LowerStore(Op, DAG);
1415+
case ISD::CTPOP:
1416+
case ISD::CTLZ:
1417+
case ISD::CTTZ:
1418+
return DAG.UnrollVectorOp(Op.getNode());
14081419
}
14091420
}
14101421

0 commit comments

Comments
 (0)