From 5eccba6684171bfccae611421ee0a304931aa118 Mon Sep 17 00:00:00 2001 From: pavani karveti Date: Mon, 7 Jul 2025 23:09:59 -0700 Subject: [PATCH] [Hexagon]Handle bitcast of i32/v2i16/v4i8 -> v32i1 when Hvx is enabled Change-Id: I1f3c4783424a2c068f207a7680f85ef95c70f573 --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 24 +++++++++++++++++++ .../CodeGen/Hexagon/bitcast-i32-to-v32i1.ll | 20 ++++++++++++++++ .../CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll | 16 +++++++++++++ .../CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll | 16 +++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll create mode 100644 llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll create mode 100644 llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0e13dd3214da6..f6f892a5ed994 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -117,6 +117,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + if (Subtarget.useHVX128BOps()) + setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { @@ -2001,6 +2003,28 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); } + + // Handle bitcast from i32, v2i16, and v4i8 to v32i1. + // Splat the input into a 32-element i32 vector, then AND each element + // with a unique bitmask to isolate individual bits. + if (ResTy == MVT::v32i1 && + (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && + Subtarget.useHVX128BOps()) { + SDValue Val32 = Val; + if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) + Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); + + MVT VecTy = MVT::getVectorVT(MVT::i32, 32); + SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32); + SmallVector Mask; + for (unsigned i = 0; i < 32; ++i) + Mask.push_back(DAG.getConstant(1u << i, dl, MVT::i32)); + + SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask); + SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec); + return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded); + } + if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { // Handle bitcast from i128 -> v128i1 and i64 -> v64i1. unsigned BitWidth = ValTy.getSizeInBits(); diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll new file mode 100644 index 0000000000000..741589d3cde74 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll @@ -0,0 +1,20 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s + +; CHECK: [[VREG1:v([0-9]+)]] = vsplat(r{{[0-9]*}}) +; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}}) +; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}}) + +define void @bitcast_i32_to_v32i1_full(ptr %in, ptr %out) { +entry: + %load = load i32, ptr %in, align 4 + %bitcast = bitcast i32 %load to <32 x i1> + %e0 = extractelement <32 x i1> %bitcast, i32 0 + %e1 = extractelement <32 x i1> %bitcast, i32 1 + %z0 = zext i1 %e0 to i8 + %z1 = zext i1 %e1 to i8 + %ptr0 = getelementptr i8, ptr %out, i32 0 + %ptr1 = getelementptr i8, ptr %out, i32 1 + store i8 %z0, ptr %ptr0, align 1 + store i8 %z1, ptr %ptr1, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll new file mode 100644 index 0000000000000..45068e8e080b8 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll @@ -0,0 +1,16 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s + +; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0) +; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}}) +; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}}) + +define void @bitcast_v2i16_to_v32i1(ptr %in, ptr %out) { +entry: + %load = load <2 x i16>, ptr %in, align 4 + %bitcast = bitcast <2 x i16> %load to <32 x i1> + %extract = extractelement <32 x i1> %bitcast, i32 0 + %zext = zext i1 %extract to i8 + store i8 %zext, ptr %out, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll new file mode 100644 index 0000000000000..15219332856c5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll @@ -0,0 +1,16 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s + +; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0) +; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}}) +; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}}) + +define void @bitcast_v4i8_to_v32i1(ptr %in, ptr %out) { +entry: + %load = load <4 x i8>, ptr %in, align 4 + %bitcast = bitcast <4 x i8> %load to <32 x i1> + %extract = extractelement <32 x i1> %bitcast, i32 0 + %zext = zext i1 %extract to i8 + store i8 %zext, ptr %out, align 1 + ret void +}