Skip to content

Commit 8152d6f

Browse files
committed
[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled
Change-Id: I1f3c4783424a2c068f207a7680f85ef95c70f573
1 parent 18991f4 commit 8152d6f

File tree

4 files changed

+187
-0
lines changed

4 files changed

+187
-0
lines changed

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ HexagonTargetLowering::initializeHVXLowering() {
117117
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
118118
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
119119

120+
if (Subtarget.useHVX128BOps())
121+
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
120122
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
121123
Subtarget.useHVXFloatingPoint()) {
122124

@@ -2001,6 +2003,28 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
20012003

20022004
return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
20032005
}
2006+
2007+
// Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2008+
// Splat the input into a 32-element i32 vector, then AND each element
2009+
// with a unique bitmask to isolate individual bits.
2010+
if (ResTy == MVT::v32i1 &&
2011+
(ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2012+
Subtarget.useHVX128BOps()) {
2013+
SDValue Val32 = Val;
2014+
if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2015+
Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2016+
2017+
MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2018+
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2019+
SmallVector<SDValue, 32> Mask;
2020+
for (unsigned i = 0; i < 32; ++i)
2021+
Mask.push_back(DAG.getConstant(1u << i, dl, MVT::i32));
2022+
2023+
SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2024+
SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2025+
return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
2026+
}
2027+
20042028
if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
20052029
// Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
20062030
unsigned BitWidth = ValTy.getSizeInBits();
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
3+
4+
define void @bitcast_i32_to_v32i1(ptr %in, ptr %out) {
5+
; CHECK-LABEL: bitcast_i32_to_v32i1:
6+
; CHECK: .cfi_startproc
7+
; CHECK-NEXT: // %bb.0: // %entry
8+
; CHECK-NEXT: {
9+
; CHECK-NEXT: r3:2 = combine(#-1,##.LCPI0_0)
10+
; CHECK-NEXT: allocframe(r29,#128):raw
11+
; CHECK-NEXT: }
12+
; CHECK-NEXT: .cfi_def_cfa r30, 8
13+
; CHECK-NEXT: .cfi_offset r31, -4
14+
; CHECK-NEXT: .cfi_offset r30, -8
15+
; CHECK-NEXT: {
16+
; CHECK-NEXT: r29 = and(r29,#-128)
17+
; CHECK-NEXT: r0 = memw(r0+#0)
18+
; CHECK-NEXT: v1 = vmem(r2+#0)
19+
; CHECK-NEXT: }
20+
; CHECK-NEXT: {
21+
; CHECK-NEXT: v0 = vsplat(r0)
22+
; CHECK-NEXT: r5 = add(r29,#0)
23+
; CHECK-NEXT: }
24+
; CHECK-NEXT: {
25+
; CHECK-NEXT: v0 = vand(v0,v1)
26+
; CHECK-NEXT: }
27+
; CHECK-NEXT: {
28+
; CHECK-NEXT: q0 = vand(v0,r3)
29+
; CHECK-NEXT: }
30+
; CHECK-NEXT: {
31+
; CHECK-NEXT: v0 = vand(q0,r3)
32+
; CHECK-NEXT: vmem(r5+#0) = v0.new
33+
; CHECK-NEXT: }
34+
; CHECK-NEXT: {
35+
; CHECK-NEXT: r4 = memw(r5+#0)
36+
; CHECK-NEXT: r0 = memw(r5+#4)
37+
; CHECK-NEXT: }
38+
; CHECK-NEXT: {
39+
; CHECK-NEXT: r2 = and(r4,#255)
40+
; CHECK-NEXT: r0 = and(r0,#255)
41+
; CHECK-NEXT: }
42+
; CHECK-NEXT: {
43+
; CHECK-NEXT: p1 = cmp.gtu(r2,#0)
44+
; CHECK-NEXT: p0 = cmp.gtu(r0,#0)
45+
; CHECK-NEXT: }
46+
; CHECK-NEXT: {
47+
; CHECK-NEXT: r0 = mux(p1,#1,#0)
48+
; CHECK-NEXT: r2 = mux(p0,#1,#0)
49+
; CHECK-NEXT: memb(r1+#0) = r0.new
50+
; CHECK-NEXT: }
51+
; CHECK-NEXT: {
52+
; CHECK-NEXT: memb(r1+#1) = r2
53+
; CHECK-NEXT: }
54+
; CHECK-NEXT: {
55+
; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
56+
; CHECK-NEXT: }
57+
entry:
58+
%load = load i32, ptr %in, align 4
59+
%bitcast = bitcast i32 %load to <32 x i1>
60+
%e0 = extractelement <32 x i1> %bitcast, i32 0
61+
%e1 = extractelement <32 x i1> %bitcast, i32 1
62+
%z0 = zext i1 %e0 to i8
63+
%z1 = zext i1 %e1 to i8
64+
%ptr0 = getelementptr i8, ptr %out, i32 0
65+
%ptr1 = getelementptr i8, ptr %out, i32 1
66+
store i8 %z0, ptr %ptr0, align 1
67+
store i8 %z1, ptr %ptr1, align 1
68+
ret void
69+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
3+
4+
define void @bitcast_v2i16_to_v32i1(ptr %in, ptr %out) {
5+
; CHECK-LABEL: bitcast_v2i16_to_v32i1:
6+
; CHECK: .cfi_startproc
7+
; CHECK-NEXT: // %bb.0: // %entry
8+
; CHECK-NEXT: {
9+
; CHECK-NEXT: r3:2 = combine(#-1,##.LCPI0_0)
10+
; CHECK-NEXT: r0 = memw(r0+#0)
11+
; CHECK-NEXT: }
12+
; CHECK-NEXT: {
13+
; CHECK-NEXT: v0 = vsplat(r0)
14+
; CHECK-NEXT: r2 = #0
15+
; CHECK-NEXT: v1 = vmem(r2+#0)
16+
; CHECK-NEXT: }
17+
; CHECK-NEXT: {
18+
; CHECK-NEXT: v0 = vand(v0,v1)
19+
; CHECK-NEXT: }
20+
; CHECK-NEXT: {
21+
; CHECK-NEXT: q0 = vand(v0,r3)
22+
; CHECK-NEXT: }
23+
; CHECK-NEXT: {
24+
; CHECK-NEXT: v0 = vand(q0,r3)
25+
; CHECK-NEXT: }
26+
; CHECK-NEXT: {
27+
; CHECK-NEXT: r0 = vextract(v0,r2)
28+
; CHECK-NEXT: }
29+
; CHECK-NEXT: {
30+
; CHECK-NEXT: r0 = and(r0,#255)
31+
; CHECK-NEXT: }
32+
; CHECK-NEXT: {
33+
; CHECK-NEXT: p0 = cmp.gtu(r0,#0)
34+
; CHECK-NEXT: }
35+
; CHECK-NEXT: {
36+
; CHECK-NEXT: r0 = mux(p0,#1,#0)
37+
; CHECK-NEXT: jumpr r31
38+
; CHECK-NEXT: memb(r1+#0) = r0.new
39+
; CHECK-NEXT: }
40+
entry:
41+
%load = load <2 x i16>, ptr %in, align 4
42+
%bitcast = bitcast <2 x i16> %load to <32 x i1>
43+
%extract = extractelement <32 x i1> %bitcast, i32 0
44+
%zext = zext i1 %extract to i8
45+
store i8 %zext, ptr %out, align 1
46+
ret void
47+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
3+
4+
define void @bitcast_v4i8_to_v32i1(ptr %in, ptr %out) {
5+
; CHECK-LABEL: bitcast_v4i8_to_v32i1:
6+
; CHECK: .cfi_startproc
7+
; CHECK-NEXT: // %bb.0: // %entry
8+
; CHECK-NEXT: {
9+
; CHECK-NEXT: r3:2 = combine(#-1,##.LCPI0_0)
10+
; CHECK-NEXT: r0 = memw(r0+#0)
11+
; CHECK-NEXT: }
12+
; CHECK-NEXT: {
13+
; CHECK-NEXT: v0 = vsplat(r0)
14+
; CHECK-NEXT: r2 = #0
15+
; CHECK-NEXT: v1 = vmem(r2+#0)
16+
; CHECK-NEXT: }
17+
; CHECK-NEXT: {
18+
; CHECK-NEXT: v0 = vand(v0,v1)
19+
; CHECK-NEXT: }
20+
; CHECK-NEXT: {
21+
; CHECK-NEXT: q0 = vand(v0,r3)
22+
; CHECK-NEXT: }
23+
; CHECK-NEXT: {
24+
; CHECK-NEXT: v0 = vand(q0,r3)
25+
; CHECK-NEXT: }
26+
; CHECK-NEXT: {
27+
; CHECK-NEXT: r0 = vextract(v0,r2)
28+
; CHECK-NEXT: }
29+
; CHECK-NEXT: {
30+
; CHECK-NEXT: r0 = and(r0,#255)
31+
; CHECK-NEXT: }
32+
; CHECK-NEXT: {
33+
; CHECK-NEXT: p0 = cmp.gtu(r0,#0)
34+
; CHECK-NEXT: }
35+
; CHECK-NEXT: {
36+
; CHECK-NEXT: r0 = mux(p0,#1,#0)
37+
; CHECK-NEXT: jumpr r31
38+
; CHECK-NEXT: memb(r1+#0) = r0.new
39+
; CHECK-NEXT: }
40+
entry:
41+
%load = load <4 x i8>, ptr %in, align 4
42+
%bitcast = bitcast <4 x i8> %load to <32 x i1>
43+
%extract = extractelement <32 x i1> %bitcast, i32 0
44+
%zext = zext i1 %extract to i8
45+
store i8 %zext, ptr %out, align 1
46+
ret void
47+
}

0 commit comments

Comments
 (0)