diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 2a991bafbf148..4d15cfa651270 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -582,6 +582,13 @@ def: Pat<(v8i1 (trunc V8I8:$Rs)), (A4_vcmpbeqi (Combinew (A2_andir (HiReg $Rs), (i32 0x01010101)), (A2_andir (LoReg $Rs), (i32 0x01010101))), (i32 1))>; +def : Pat<(v4i1 (trunc V4I8:$Rs)), + (A4_vcmpheqi (Combinew (A2_andir (HiReg (S2_vzxtbh $Rs)), 0x00010001), + (A2_andir (LoReg (S2_vzxtbh $Rs)), 0x00010001)), + (i32 1))>; +def: Pat<(v2i1 (trunc V2I16:$Rs)), + (A4_vcmpweqi (A2_andp (S2_vzxthw $Rs), (A2_combineii (i32 1), (i32 1))), + (i32 1))>; // Saturation: diff --git a/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll index 1090b64fcad52..c91f16d91d1be 100644 --- a/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll +++ b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll @@ -1,24 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=hexagon < %s | FileCheck %s +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s define void @f0(<2 x i32> %a0, ptr %a1) { ; CHECK-LABEL: f0: -; CHECK: .cfi_startproc -; CHECK-NEXT: // %bb.0: // %b0 -; CHECK-NEXT: { -; CHECK-NEXT: r5:4 = combine(#1,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r1:0 = and(r1:0,r5:4) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmpw.eq(r1:0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = p0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: memb(r2+#0) = r0.new -; CHECK-NEXT: } +; CHECK: r[[REG1H:([0-9]+)]]:[[REG1L:([0-9]+)]] = combine(#1,#1) +; CHECK: r[[REG2H:([0-9]+)]]:[[REG2L:([0-9]+)]] = and(r[[REG2H]]:[[REG2L]],r[[REG1H]]:[[REG1L]]) +; CHECK: p{{[0-9]+}} = vcmpw.eq(r[[REG2H]]:[[REG2L]],#1) b0: %v0 = trunc <2 x i32> %a0 to <2 x i1> store <2 x i1> %v0, ptr %a1, align 1 @@ -27,20 +14,9 @@ b0: define void @f1(<4 x i16> %a0, ptr %a1) { ; CHECK-LABEL: f1: -; CHECK: .cfi_startproc -; CHECK-NEXT: // %bb.0: // %b0 -; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,##65537) -; CHECK-NEXT: r1 = and(r1,##65537) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmph.eq(r1:0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = p0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: memb(r2+#0) = r0.new -; CHECK-NEXT: } +; CHECK: [[REG0:r([0-9]+)]] = and([[REG0]],##65537) +; CHECK: [[REG1:r([0-9]+)]] = and([[REG1]],##65537) +; CHECK: p{{[0-9]+}} = vcmph.eq(r{{[0-9]+}}:{{[0-9]+}},#1) b0: %v0 = trunc <4 x i16> %a0 to <4 x i1> store <4 x i1> %v0, ptr %a1, align 1 @@ -49,22 +25,35 @@ b0: define void @f2(<8 x i8> %a0, ptr %a1) { ; CHECK-LABEL: f2: -; CHECK: .cfi_startproc -; CHECK-NEXT: // %bb.0: // %b0 -; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,##16843009) -; CHECK-NEXT: r1 = and(r1,##16843009) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmpb.eq(r1:0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = p0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: memb(r2+#0) = r0.new -; CHECK-NEXT: } +; CHECK: [[REG0:r([0-9]+)]] = and([[REG0]],##16843009) +; CHECK: [[REG1:r([0-9]+)]] = and([[REG1]],##16843009) +; CHECK: p{{[0-9]+}} = vcmpb.eq(r{{[0-9]+}}:{{[0-9]+}},#1) b0: %v0 = trunc <8 x i8> %a0 to <8 x i1> store <8 x i1> %v0, ptr %a1, align 1 ret void } + +define void @f3(<4 x i8> %a0, ptr %a1) { +; CHECK-LABEL: f3: +; CHECK: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = vzxtbh(r{{[0-9]+}}) +; CHECK: r[[REGL]] = and(r[[REGL]],##65537) +; CHECK: r[[REGH]] = and(r[[REGH]],##65537) +; CHECK: p{{[0-9]+}} = vcmph.eq(r[[REGH]]:[[REGL]],#1) +b0: + %v0 = trunc <4 x i8> %a0 to <4 x i1> + store <4 x i1> %v0, ptr %a1, align 1 + ret void +} + +define void @f4(<2 x i16> %a0, ptr %a1) { +; CHECK-LABEL: f4: +; CHECK: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = vzxthw(r{{[0-9]+}}) +; CHECK: r[[REG1H:([0-9]+)]]:[[REG1L:([0-9]+)]] = combine(#1,#1) +; CHECK: r[[REGH]]:[[REGL]] = and(r[[REGH]]:[[REGL]],r[[REG1H]]:[[REG1L]]) +; CHECK: p{{[0-9]+}} = vcmpw.eq(r[[REGH]]:[[REGL]],#1) +b0: + %v0 = trunc <2 x i16> %a0 to <2 x i1> + store <2 x i1> %v0, ptr %a1, align 1 + ret void +}