-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled #147466
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-hexagon Author: None (pkarveti) ChangesFull diff: https://github.com/llvm/llvm-project/pull/147466.diff 4 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0e13dd3214da6..9b69c892308cb 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -117,6 +117,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget.useHVX128BOps())
+ setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
@@ -2001,6 +2003,28 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
}
+
+ // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
+ // Splat the input into a 32-element i32 vector, then AND each element
+ // with a unique bitmask to isolate individual bits.
+ if (ResTy == MVT::v32i1 &&
+ (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
+ Subtarget.useHVX128BOps()) {
+ SDValue Val32 = Val;
+ if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
+ Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
+
+ MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
+ SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
+ SmallVector<SDValue, 32> Mask;
+ for (unsigned i = 0; i < 32; ++i)
+ Mask.push_back(DAG.getConstant(1u << i, dl, MVT::i32));
+
+ SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
+ SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
+ return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
+ }
+
if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
// Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
unsigned BitWidth = ValTy.getSizeInBits();
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll
new file mode 100644
index 0000000000000..741589d3cde74
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll
@@ -0,0 +1,20 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+; CHECK: [[VREG1:v([0-9]+)]] = vsplat(r{{[0-9]*}})
+; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
+; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}})
+
+define void @bitcast_i32_to_v32i1_full(ptr %in, ptr %out) {
+entry:
+ %load = load i32, ptr %in, align 4
+ %bitcast = bitcast i32 %load to <32 x i1>
+ %e0 = extractelement <32 x i1> %bitcast, i32 0
+ %e1 = extractelement <32 x i1> %bitcast, i32 1
+ %z0 = zext i1 %e0 to i8
+ %z1 = zext i1 %e1 to i8
+ %ptr0 = getelementptr i8, ptr %out, i32 0
+ %ptr1 = getelementptr i8, ptr %out, i32 1
+ store i8 %z0, ptr %ptr0, align 1
+ store i8 %z1, ptr %ptr1, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll
new file mode 100644
index 0000000000000..45068e8e080b8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll
@@ -0,0 +1,16 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0)
+; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]])
+; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
+; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}})
+
+define void @bitcast_v2i16_to_v32i1(ptr %in, ptr %out) {
+entry:
+ %load = load <2 x i16>, ptr %in, align 4
+ %bitcast = bitcast <2 x i16> %load to <32 x i1>
+ %extract = extractelement <32 x i1> %bitcast, i32 0
+ %zext = zext i1 %extract to i8
+ store i8 %zext, ptr %out, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll
new file mode 100644
index 0000000000000..15219332856c5
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll
@@ -0,0 +1,16 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0)
+; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]])
+; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
+; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}})
+
+define void @bitcast_v4i8_to_v32i1(ptr %in, ptr %out) {
+entry:
+ %load = load <4 x i8>, ptr %in, align 4
+ %bitcast = bitcast <4 x i8> %load to <32 x i1>
+ %extract = extractelement <32 x i1> %bitcast, i32 0
+ %zext = zext i1 %extract to i8
+ store i8 %zext, ptr %out, align 1
+ ret void
+}
|
Check lines in the test cases should be autogenerated by script llvm/utils/update_llc_test_checks.py like other tests. |
@aankit-ca @androm3da Could you please review this patch? |
|
||
// Handle bitcast from i32, v2i16, and v4i8 to v32i1. | ||
// Splat the input into a 32-element i32 vector, then AND each element | ||
// with a unique bitmask to isolate individual bits. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// with a unique bitmask to isolate individual bits. | |
// with a unique bitmask to isolate individual bits. |
; CHECK-NEXT: r29 = and(r29,#-128) | ||
; CHECK-NEXT: r0 = memw(r0+#0) | ||
; CHECK-NEXT: v1 = vmem(r2+#0) | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: { | ||
; CHECK-NEXT: v0 = vsplat(r0) | ||
; CHECK-NEXT: r5 = add(r29,#0) | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: { | ||
; CHECK-NEXT: v0 = vand(v0,v1) | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: { | ||
; CHECK-NEXT: q0 = vand(v0,r3) | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: { | ||
; CHECK-NEXT: v0 = vand(q0,r3) | ||
; CHECK-NEXT: vmem(r5+#0) = v0.new | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: { | ||
; CHECK-NEXT: r4 = memw(r5+#0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have checks that just test this patch. This test can break with any other unrelated set of changes. Same for all the other tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I asked the auto generated check lines as it is in our testing guidelines - https://www.llvm.org/docs/TestingGuide.html#best-practices-for-regression-tests. and recently @RKSimon also regenerating few Hexagon tests's check lines with the script https://github.com/llvm/llvm-project/commits/main/llvm/test/CodeGen/Hexagon.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @aankit-ca. Due to the packetization rules in Hexagon, we shouldn't auto-generate the CHECKs using the script as it makes the test very brittle. This PR intends to check for very specific optimization and should only be looking for the presence of these lines:
v0 = vsplat(r0)
v0 = vand(v0,v1)
q0 = vand(v0,r3)
Change-Id: I1f3c4783424a2c068f207a7680f85ef95c70f573
No description provided.