[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled #147466

pkarveti · 2025-07-08T06:33:20Z

No description provided.

llvmbot · 2025-07-08T06:33:52Z

@llvm/pr-subscribers-backend-hexagon

Author: None (pkarveti)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/147466.diff

4 Files Affected:

(modified) llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp (+24)
(added) llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll (+20)
(added) llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll (+16)
(added) llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll (+16)

diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0e13dd3214da6..9b69c892308cb 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -117,6 +117,8 @@ HexagonTargetLowering::initializeHVXLowering() {
   setOperationAction(ISD::VECTOR_SHUFFLE,          ByteW, Legal);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
+  if (Subtarget.useHVX128BOps())
+    setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
   if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
       Subtarget.useHVXFloatingPoint()) {
 
@@ -2001,6 +2003,28 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
 
     return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
   }
+
+  // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
+  // Splat the input into a 32-element i32 vector, then AND each element
+  //  with a unique bitmask to isolate individual bits.
+  if (ResTy == MVT::v32i1 &&
+      (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
+      Subtarget.useHVX128BOps()) {
+    SDValue Val32 = Val;
+    if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
+      Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
+
+    MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
+    SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
+    SmallVector<SDValue, 32> Mask;
+    for (unsigned i = 0; i < 32; ++i)
+      Mask.push_back(DAG.getConstant(1u << i, dl, MVT::i32));
+
+    SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
+    SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
+    return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
+  }
+
   if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
     // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
     unsigned BitWidth = ValTy.getSizeInBits();
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll
new file mode 100644
index 0000000000000..741589d3cde74
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll
@@ -0,0 +1,20 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+; CHECK: [[VREG1:v([0-9]+)]] = vsplat(r{{[0-9]*}})
+; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
+; CHECK: q[[QREG:[0-9]+]] =  vand([[VREG2]],r{{[0-9]+}})
+
+define void @bitcast_i32_to_v32i1_full(ptr %in, ptr %out) {
+entry:
+  %load = load i32, ptr %in, align 4
+  %bitcast = bitcast i32 %load to <32 x i1>
+  %e0 = extractelement <32 x i1> %bitcast, i32 0
+  %e1 = extractelement <32 x i1> %bitcast, i32 1
+  %z0 = zext i1 %e0 to i8
+  %z1 = zext i1 %e1 to i8
+  %ptr0 = getelementptr i8, ptr %out, i32 0
+  %ptr1 = getelementptr i8, ptr %out, i32 1
+  store i8 %z0, ptr %ptr0, align 1
+  store i8 %z1, ptr %ptr1, align 1
+  ret void
+}
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll
new file mode 100644
index 0000000000000..45068e8e080b8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll
@@ -0,0 +1,16 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0)
+; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]])
+; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
+; CHECK: q[[QREG:[0-9]+]] =  vand([[VREG2]],r{{[0-9]+}})
+
+define void @bitcast_v2i16_to_v32i1(ptr %in, ptr %out) {
+entry:
+  %load = load <2 x i16>, ptr %in, align 4
+  %bitcast = bitcast <2 x i16> %load to <32 x i1>
+  %extract = extractelement <32 x i1> %bitcast, i32 0
+  %zext = zext i1 %extract to i8
+  store i8 %zext, ptr %out, align 1
+  ret void
+}
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll
new file mode 100644
index 0000000000000..15219332856c5
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll
@@ -0,0 +1,16 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0)
+; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]])
+; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
+; CHECK: q[[QREG:[0-9]+]] =  vand([[VREG2]],r{{[0-9]+}})
+
+define void @bitcast_v4i8_to_v32i1(ptr %in, ptr %out) {
+entry:
+  %load = load <4 x i8>, ptr %in, align 4
+  %bitcast = bitcast <4 x i8> %load to <32 x i1>
+  %extract = extractelement <32 x i1> %bitcast, i32 0
+  %zext = zext i1 %extract to i8
+  store i8 %zext, ptr %out, align 1
+  ret void
+}

xgupta · 2025-07-08T10:26:24Z

Check lines in the test cases should be autogenerated by script llvm/utils/update_llc_test_checks.py like other tests.

pkarveti · 2025-07-08T14:36:00Z

@aankit-ca @androm3da Could you please review this patch?

aankit-ca · 2025-07-08T16:55:19Z

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

+
+  // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
+  // Splat the input into a 32-element i32 vector, then AND each element
+  //  with a unique bitmask to isolate individual bits.


Suggested change

// with a unique bitmask to isolate individual bits.

// with a unique bitmask to isolate individual bits.

aankit-ca · 2025-07-08T16:56:51Z

llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll

+; CHECK-NEXT:     r29 = and(r29,#-128)
+; CHECK-NEXT:     r0 = memw(r0+#0)
+; CHECK-NEXT:     v1 = vmem(r2+#0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vsplat(r0)
+; CHECK-NEXT:     r5 = add(r29,#0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vand(v0,v1)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v0,r3)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vand(q0,r3)
+; CHECK-NEXT:     vmem(r5+#0) = v0.new
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r4 = memw(r5+#0)


Have checks that just test this patch. This test can break with any other unrelated set of changes. Same for all the other tests

I asked the auto generated check lines as it is in our testing guidelines - https://www.llvm.org/docs/TestingGuide.html#best-practices-for-regression-tests. and recently @RKSimon also regenerating few Hexagon tests's check lines with the script https://github.com/llvm/llvm-project/commits/main/llvm/test/CodeGen/Hexagon.

I agree with @aankit-ca. Due to the packetization rules in Hexagon, we shouldn't auto-generate the CHECKs using the script as it makes the test very brittle. This PR intends to check for very specific optimization and should only be looking for the presence of these lines:
v0 = vsplat(r0)
v0 = vand(v0,v1)
q0 = vand(v0,r3)

Change-Id: I1f3c4783424a2c068f207a7680f85ef95c70f573

llvmbot added the backend:Hexagon label Jul 8, 2025

pkarveti force-pushed the bitcast branch from b22d2a1 to 8152d6f Compare July 8, 2025 14:30

aankit-ca reviewed Jul 8, 2025

View reviewed changes

aankit-ca requested review from androm3da, iajbar and jverma-quic July 8, 2025 16:57

[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled

5eccba6

Change-Id: I1f3c4783424a2c068f207a7680f85ef95c70f573

pkarveti force-pushed the bitcast branch from 8152d6f to 5eccba6 Compare July 9, 2025 04:50

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled #147466

[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled #147466

pkarveti commented Jul 8, 2025

Uh oh!

llvmbot commented Jul 8, 2025

Uh oh!

xgupta commented Jul 8, 2025

Uh oh!

pkarveti commented Jul 8, 2025

Uh oh!

aankit-ca Jul 8, 2025

Uh oh!

aankit-ca Jul 8, 2025

Uh oh!

xgupta Jul 8, 2025 •

edited

Loading

Uh oh!

jverma-quic Jul 8, 2025

Uh oh!

Uh oh!

	// with a unique bitmask to isolate individual bits.
	// with a unique bitmask to isolate individual bits.

[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled #147466

Are you sure you want to change the base?

[Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled #147466

Conversation

pkarveti commented Jul 8, 2025

Uh oh!

llvmbot commented Jul 8, 2025

Uh oh!

xgupta commented Jul 8, 2025

Uh oh!

pkarveti commented Jul 8, 2025

Uh oh!

aankit-ca Jul 8, 2025

Choose a reason for hiding this comment

Uh oh!

aankit-ca Jul 8, 2025

Choose a reason for hiding this comment

Uh oh!

xgupta Jul 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jverma-quic Jul 8, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

xgupta Jul 8, 2025 •

edited

Loading