Skip to content

Commit f528816

Browse files
author
Krzysztof Parzyszek
committed
[Hexagon] Move selection of HVX multiply from lowering to patterns
Also, change i32*i32 to V6_vmpyieoh + V6_vmpyiewuh_acc, which works on V60 as well.
1 parent 0373c76 commit f528816

File tree

3 files changed

+27
-103
lines changed

3 files changed

+27
-103
lines changed

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 1 addition & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ HexagonTargetLowering::initializeHVXLowering() {
9191
setOperationAction(ISD::XOR, T, Legal);
9292
setOperationAction(ISD::ADD, T, Legal);
9393
setOperationAction(ISD::SUB, T, Legal);
94+
setOperationAction(ISD::MUL, T, Legal);
9495
setOperationAction(ISD::CTPOP, T, Legal);
9596
setOperationAction(ISD::CTLZ, T, Legal);
9697
if (T != ByteV) {
@@ -103,7 +104,6 @@ HexagonTargetLowering::initializeHVXLowering() {
103104
setOperationAction(ISD::LOAD, T, Custom);
104105
setOperationAction(ISD::MLOAD, T, Custom);
105106
setOperationAction(ISD::MSTORE, T, Custom);
106-
setOperationAction(ISD::MUL, T, Custom);
107107
setOperationAction(ISD::MULHS, T, Custom);
108108
setOperationAction(ISD::MULHU, T, Custom);
109109
setOperationAction(ISD::BUILD_VECTOR, T, Custom);
@@ -1444,73 +1444,6 @@ HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
14441444
{VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
14451445
}
14461446

1447-
SDValue
1448-
HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
1449-
MVT ResTy = ty(Op);
1450-
assert(ResTy.isVector() && isHvxSingleTy(ResTy));
1451-
const SDLoc &dl(Op);
1452-
SmallVector<int,256> ShuffMask;
1453-
1454-
MVT ElemTy = ResTy.getVectorElementType();
1455-
unsigned VecLen = ResTy.getVectorNumElements();
1456-
SDValue Vs = Op.getOperand(0);
1457-
SDValue Vt = Op.getOperand(1);
1458-
1459-
switch (ElemTy.SimpleTy) {
1460-
case MVT::i8: {
1461-
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1462-
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1463-
// where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1464-
MVT ExtTy = typeExtElem(ResTy, 2);
1465-
unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv
1466-
: Hexagon::V6_vmpyhv;
1467-
SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
1468-
1469-
// Discard high halves of the resulting values, collect the low halves.
1470-
for (unsigned I = 0; I < VecLen; I += 2) {
1471-
ShuffMask.push_back(I); // Pick even element.
1472-
ShuffMask.push_back(I+VecLen); // Pick odd element.
1473-
}
1474-
VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
1475-
SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
1476-
return DAG.getBitcast(ResTy, BS);
1477-
}
1478-
case MVT::i16:
1479-
// For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
1480-
// (There is also V6_vmpyhv, which behaves in an analogous way to
1481-
// V6_vmpybv.)
1482-
return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG);
1483-
case MVT::i32: {
1484-
auto MulL_V60 = [&](SDValue Vs, SDValue Vt) {
1485-
// Use the following sequence for signed word multiply:
1486-
// T0 = V6_vmpyiowh Vs, Vt
1487-
// T1 = V6_vaslw T0, 16
1488-
// T2 = V6_vmpyiewuh_acc T1, Vs, Vt
1489-
SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
1490-
SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
1491-
SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
1492-
SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
1493-
{T1, Vs, Vt}, DAG);
1494-
return T2;
1495-
};
1496-
auto MulL_V62 = [&](SDValue Vs, SDValue Vt) {
1497-
MVT PairTy = typeJoin({ResTy, ResTy});
1498-
SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy,
1499-
{Vs, Vt}, DAG);
1500-
SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy,
1501-
{T0, Vs, Vt}, DAG);
1502-
return opSplit(T1, dl, DAG).first;
1503-
};
1504-
if (Subtarget.useHVXV62Ops())
1505-
return MulL_V62(Vs, Vt);
1506-
return MulL_V60(Vs, Vt);
1507-
}
1508-
default:
1509-
break;
1510-
}
1511-
return SDValue();
1512-
}
1513-
15141447
SDValue
15151448
HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
15161449
MVT ResTy = ty(Op);
@@ -2100,7 +2033,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
21002033
case ISD::SRA:
21012034
case ISD::SHL:
21022035
case ISD::SRL: return LowerHvxShift(Op, DAG);
2103-
case ISD::MUL: return LowerHvxMul(Op, DAG);
21042036
case ISD::MULHS:
21052037
case ISD::MULHU: return LowerHvxMulh(Op, DAG);
21062038
case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);

llvm/lib/Target/Hexagon/HexagonPatternsHVX.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,20 @@ let Predicates = [UseHVX] in {
316316
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
317317
}
318318

319+
let Predicates = [UseHVX] in {
320+
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
321+
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
322+
// where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
323+
def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
324+
(V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
325+
(LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
326+
def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
327+
(V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
328+
def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
329+
(V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
330+
HvxVR:$Vs, HvxVR:$Vt)>;
331+
}
332+
319333
let Predicates = [UseHVX] in {
320334
def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
321335
def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;

llvm/test/CodeGen/Hexagon/autohvx/arith.ll

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,16 @@ define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
223223
; --- mul
224224

225225
; CHECK-LABEL: mpyb_64:
226-
; CHECK: v[[H00:[0-9]+]]:[[L00:[0-9]+]].h = vmpy(v0.b,v1.b)
227-
; CHECK: vshuffe(v[[H00]].b,v[[L00]].b)
226+
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
227+
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
228228
define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
229229
%p = mul <64 x i8> %v0, %v1
230230
ret <64 x i8> %p
231231
}
232232

233233
; CHECK-LABEL: mpyb_128:
234-
; CHECK: v[[H10:[0-9]+]]:[[L10:[0-9]+]].h = vmpy(v0.b,v1.b)
235-
; CHECK: vshuffe(v[[H10]].b,v[[L10]].b)
234+
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
235+
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
236236
define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
237237
%p = mul <128 x i8> %v0, %v1
238238
ret <128 x i8> %p
@@ -252,43 +252,21 @@ define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
252252
ret <64 x i16> %p
253253
}
254254

255-
; CHECK-LABEL: mpyw_64_v60:
256-
; CHECK-DAG: r[[T00:[0-9]+]] = #16
257-
; CHECK-DAG: v[[T01:[0-9]+]].w = vmpyio(v0.w,v1.h)
258-
; CHECK: v[[T02:[0-9]+]].w = vasl(v[[T01]].w,r[[T00]])
259-
; CHECK: v[[T02]].w += vmpyie(v0.w,v1.uh)
260-
define <16 x i32> @mpyw_64_v60(<16 x i32> %v0, <16 x i32> %v1) #0 {
255+
; CHECK-LABEL: mpyw_64:
256+
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
257+
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
258+
define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
261259
%p = mul <16 x i32> %v0, %v1
262260
ret <16 x i32> %p
263261
}
264262

265-
; CHECK-LABEL: mpyw_128_v60:
266-
; CHECK-DAG: r[[T10:[0-9]+]] = #16
267-
; CHECK-DAG: v[[T11:[0-9]+]].w = vmpyio(v0.w,v1.h)
268-
; CHECK: v[[T12:[0-9]+]].w = vasl(v[[T11]].w,r[[T10]])
269-
; CHECK: v[[T12]].w += vmpyie(v0.w,v1.uh)
270-
define <32 x i32> @mpyw_128_v60(<32 x i32> %v0, <32 x i32> %v1) #1 {
271-
%p = mul <32 x i32> %v0, %v1
272-
ret <32 x i32> %p
273-
}
274-
275-
; CHECK-LABEL: mpyw_64_v62:
276-
; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
277-
; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
278-
define <16 x i32> @mpyw_64_v62(<16 x i32> %v0, <16 x i32> %v1) #3 {
279-
%p = mul <16 x i32> %v0, %v1
280-
ret <16 x i32> %p
281-
}
282-
283-
; CHECK-LABEL: mpyw_128_v62:
284-
; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
285-
; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
286-
define <32 x i32> @mpyw_128_v62(<32 x i32> %v0, <32 x i32> %v1) #4 {
263+
; CHECK-LABEL: mpyw_128:
264+
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
265+
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
266+
define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
287267
%p = mul <32 x i32> %v0, %v1
288268
ret <32 x i32> %p
289269
}
290270

291271
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
292272
attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
293-
attributes #3 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" }
294-
attributes #4 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" }

0 commit comments

Comments
 (0)