Skip to content

Commit e653877

Browse files
committed
[Hexagon]Handle truncate of v4i8/v2i16 -> v4i1/v2i1 when Hvx is enabled
Change-Id: Id1c25dfbaf95a56b687eb6e47d2e48c8fe84deaf
1 parent dcc692a commit e653877

File tree

2 files changed

+41
-45
lines changed

2 files changed

+41
-45
lines changed

llvm/lib/Target/Hexagon/HexagonPatterns.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,13 @@ def: Pat<(v8i1 (trunc V8I8:$Rs)),
582582
(A4_vcmpbeqi (Combinew (A2_andir (HiReg $Rs), (i32 0x01010101)),
583583
(A2_andir (LoReg $Rs), (i32 0x01010101))),
584584
(i32 1))>;
585+
def : Pat<(v4i1 (trunc V4I8:$Rs)),
586+
(A4_vcmpheqi (Combinew (A2_andir (HiReg (S2_vzxtbh $Rs)), 0x00010001),
587+
(A2_andir (LoReg (S2_vzxtbh $Rs)), 0x00010001)),
588+
(i32 1))>;
589+
def: Pat<(v2i1 (trunc V2I16:$Rs)),
590+
(A4_vcmpweqi (A2_andp (S2_vzxthw $Rs), (A2_combineii (i32 1), (i32 1))),
591+
(i32 1))>;
585592

586593

587594
// Saturation:
Lines changed: 34 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,11 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
21
; RUN: llc -mtriple=hexagon < %s | FileCheck %s
2+
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
33

44
define void @f0(<2 x i32> %a0, ptr %a1) {
55
; CHECK-LABEL: f0:
6-
; CHECK: .cfi_startproc
7-
; CHECK-NEXT: // %bb.0: // %b0
8-
; CHECK-NEXT: {
9-
; CHECK-NEXT: r5:4 = combine(#1,#1)
10-
; CHECK-NEXT: }
11-
; CHECK-NEXT: {
12-
; CHECK-NEXT: r1:0 = and(r1:0,r5:4)
13-
; CHECK-NEXT: }
14-
; CHECK-NEXT: {
15-
; CHECK-NEXT: p0 = vcmpw.eq(r1:0,#1)
16-
; CHECK-NEXT: }
17-
; CHECK-NEXT: {
18-
; CHECK-NEXT: r0 = p0
19-
; CHECK-NEXT: jumpr r31
20-
; CHECK-NEXT: memb(r2+#0) = r0.new
21-
; CHECK-NEXT: }
6+
; CHECK: r[[REG1H:([0-9]+)]]:[[REG1L:([0-9]+)]] = combine(#1,#1)
7+
; CHECK: r[[REG2H:([0-9]+)]]:[[REG2L:([0-9]+)]] = and(r[[REG2H]]:[[REG2L]],r[[REG1H]]:[[REG1L]])
8+
; CHECK: p{{[0-9]+}} = vcmpw.eq(r[[REG2H]]:[[REG2L]],#1)
229
b0:
2310
%v0 = trunc <2 x i32> %a0 to <2 x i1>
2411
store <2 x i1> %v0, ptr %a1, align 1
@@ -27,20 +14,9 @@ b0:
2714

2815
define void @f1(<4 x i16> %a0, ptr %a1) {
2916
; CHECK-LABEL: f1:
30-
; CHECK: .cfi_startproc
31-
; CHECK-NEXT: // %bb.0: // %b0
32-
; CHECK-NEXT: {
33-
; CHECK-NEXT: r0 = and(r0,##65537)
34-
; CHECK-NEXT: r1 = and(r1,##65537)
35-
; CHECK-NEXT: }
36-
; CHECK-NEXT: {
37-
; CHECK-NEXT: p0 = vcmph.eq(r1:0,#1)
38-
; CHECK-NEXT: }
39-
; CHECK-NEXT: {
40-
; CHECK-NEXT: r0 = p0
41-
; CHECK-NEXT: jumpr r31
42-
; CHECK-NEXT: memb(r2+#0) = r0.new
43-
; CHECK-NEXT: }
17+
; CHECK: [[REG0:r([0-9]+)]] = and([[REG0]],##65537)
18+
; CHECK: [[REG1:r([0-9]+)]] = and([[REG1]],##65537)
19+
; CHECK: p{{[0-9]+}} = vcmph.eq(r{{[0-9]+}}:{{[0-9]+}},#1)
4420
b0:
4521
%v0 = trunc <4 x i16> %a0 to <4 x i1>
4622
store <4 x i1> %v0, ptr %a1, align 1
@@ -49,22 +25,35 @@ b0:
4925

5026
define void @f2(<8 x i8> %a0, ptr %a1) {
5127
; CHECK-LABEL: f2:
52-
; CHECK: .cfi_startproc
53-
; CHECK-NEXT: // %bb.0: // %b0
54-
; CHECK-NEXT: {
55-
; CHECK-NEXT: r0 = and(r0,##16843009)
56-
; CHECK-NEXT: r1 = and(r1,##16843009)
57-
; CHECK-NEXT: }
58-
; CHECK-NEXT: {
59-
; CHECK-NEXT: p0 = vcmpb.eq(r1:0,#1)
60-
; CHECK-NEXT: }
61-
; CHECK-NEXT: {
62-
; CHECK-NEXT: r0 = p0
63-
; CHECK-NEXT: jumpr r31
64-
; CHECK-NEXT: memb(r2+#0) = r0.new
65-
; CHECK-NEXT: }
28+
; CHECK: [[REG0:r([0-9]+)]] = and([[REG0]],##16843009)
29+
; CHECK: [[REG1:r([0-9]+)]] = and([[REG1]],##16843009)
30+
; CHECK: p{{[0-9]+}} = vcmpb.eq(r{{[0-9]+}}:{{[0-9]+}},#1)
6631
b0:
6732
%v0 = trunc <8 x i8> %a0 to <8 x i1>
6833
store <8 x i1> %v0, ptr %a1, align 1
6934
ret void
7035
}
36+
37+
define void @f3(<4 x i8> %a0, ptr %a1) {
38+
; CHECK-LABEL: f3:
39+
; CHECK: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = vzxtbh(r{{[0-9]+}})
40+
; CHECK: r[[REGL]] = and(r[[REGL]],##65537)
41+
; CHECK: r[[REGH]] = and(r[[REGH]],##65537)
42+
; CHECK: p{{[0-9]+}} = vcmph.eq(r[[REGH]]:[[REGL]],#1)
43+
b0:
44+
%v0 = trunc <4 x i8> %a0 to <4 x i1>
45+
store <4 x i1> %v0, ptr %a1, align 1
46+
ret void
47+
}
48+
49+
define void @f4(<2 x i16> %a0, ptr %a1) {
50+
; CHECK-LABEL: f4:
51+
; CHECK: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = vzxthw(r{{[0-9]+}})
52+
; CHECK: r[[REG1H:([0-9]+)]]:[[REG1L:([0-9]+)]] = combine(#1,#1)
53+
; CHECK: r[[REGH]]:[[REGL]] = and(r[[REGH]]:[[REGL]],r[[REG1H]]:[[REG1L]])
54+
; CHECK: p{{[0-9]+}} = vcmpw.eq(r[[REGH]]:[[REGL]],#1)
55+
b0:
56+
%v0 = trunc <2 x i16> %a0 to <2 x i1>
57+
store <2 x i1> %v0, ptr %a1, align 1
58+
ret void
59+
}

0 commit comments

Comments
 (0)