Skip to content

Commit cd75c2f

Browse files
authored
[AArch64] Enable using SVE2 bit-sel instructions with Neon types. (#146906)
This affects EOR3/BCAX/BSL/NBSL/BSL1N/BSL2N.
1 parent b67504c commit cd75c2f

File tree

5 files changed

+568
-2
lines changed

5 files changed

+568
-2
lines changed

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5368,6 +5368,19 @@ multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm,
53685368
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
53695369
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
53705370
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
5371+
5372+
// Allow selecting SVE2 ternary ops with Neon types.
5373+
foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in {
5374+
def : Pat<(SVEType<VT>.DSub (op V64:$op1, V64:$op2, V64:$op3)),
5375+
(EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, dsub),
5376+
(INSERT_SUBREG (IMPLICIT_DEF), $op2, dsub),
5377+
(INSERT_SUBREG (IMPLICIT_DEF), $op3, dsub)), dsub)>;
5378+
5379+
def : Pat<(SVEType<VT>.ZSub (op V128:$op1, V128:$op2, V128:$op3)),
5380+
(EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $op1, zsub),
5381+
(INSERT_SUBREG (IMPLICIT_DEF), $op2, zsub),
5382+
(INSERT_SUBREG (IMPLICIT_DEF), $op3, zsub)), zsub)>;
5383+
}
53715384
}
53725385

53735386
class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,

llvm/test/CodeGen/AArch64/bcax.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
22
; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
33
; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
4+
; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s
5+
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3 %s
46

57
define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
68
; SHA3-LABEL: bcax_64x2:
@@ -13,6 +15,15 @@ define <2 x i64> @bcax_64x2(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
1315
; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b
1416
; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b
1517
; NOSHA3-NEXT: ret
18+
;
19+
; SVE2-LABEL: bcax_64x2:
20+
; SVE2: // %bb.0:
21+
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
22+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
23+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
24+
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
25+
; SVE2-NEXT: mov v0.16b, v2.16b
26+
; SVE2-NEXT: ret
1627
%4 = xor <2 x i64> %1, <i64 -1, i64 -1>
1728
%5 = and <2 x i64> %4, %0
1829
%6 = xor <2 x i64> %5, %2
@@ -30,6 +41,15 @@ define <4 x i32> @bcax_32x4(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
3041
; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b
3142
; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b
3243
; NOSHA3-NEXT: ret
44+
;
45+
; SVE2-LABEL: bcax_32x4:
46+
; SVE2: // %bb.0:
47+
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
48+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
49+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
50+
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
51+
; SVE2-NEXT: mov v0.16b, v2.16b
52+
; SVE2-NEXT: ret
3353
%4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
3454
%5 = and <4 x i32> %4, %0
3555
%6 = xor <4 x i32> %5, %2
@@ -47,6 +67,15 @@ define <8 x i16> @bcax_16x8(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
4767
; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b
4868
; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b
4969
; NOSHA3-NEXT: ret
70+
;
71+
; SVE2-LABEL: bcax_16x8:
72+
; SVE2: // %bb.0:
73+
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
74+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
75+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
76+
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
77+
; SVE2-NEXT: mov v0.16b, v2.16b
78+
; SVE2-NEXT: ret
5079
%4 = xor <8 x i16> %1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
5180
%5 = and <8 x i16> %4, %0
5281
%6 = xor <8 x i16> %5, %2
@@ -64,6 +93,15 @@ define <16 x i8> @bcax_8x16(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
6493
; NOSHA3-NEXT: bic v0.16b, v0.16b, v1.16b
6594
; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b
6695
; NOSHA3-NEXT: ret
96+
;
97+
; SVE2-LABEL: bcax_8x16:
98+
; SVE2: // %bb.0:
99+
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
100+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
101+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
102+
; SVE2-NEXT: bcax z2.d, z2.d, z0.d, z1.d
103+
; SVE2-NEXT: mov v0.16b, v2.16b
104+
; SVE2-NEXT: ret
67105
%4 = xor <16 x i8> %1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
68106
%5 = and <16 x i8> %4, %0
69107
%6 = xor <16 x i8> %5, %2

0 commit comments

Comments
 (0)