From 46785cbdf84d42328ec56ed88c170ca8cc546f2f Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Tue, 8 Jul 2025 04:19:01 -0700 Subject: [PATCH 1/2] Add tests. --- llvm/test/CodeGen/AArch64/bsl.ll | 85 +++++++++++++++++++++++++++ llvm/test/CodeGen/AArch64/eor3.ll | 48 ++++++++++++++- llvm/test/CodeGen/AArch64/sve2-bsl.ll | 64 ++++++++++++++++++++ 3 files changed, 196 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll index 5a270bc71cfc1..8e402ed23d085 100644 --- a/llvm/test/CodeGen/AArch64/bsl.ll +++ b/llvm/test/CodeGen/AArch64/bsl.ll @@ -431,3 +431,88 @@ define <4 x i8> @bsl2n_v4i8(<4 x i8> %0, <4 x i8> %1, <4 x i8> %2) { %7 = or <4 x i8> %4, %6 ret <4 x i8> %7 } + +; NOT (a) has a dedicated instruction (MVN). +define <2 x i64> @not_q(<2 x i64> %0) #0 { +; NEON-LABEL: not_q: +; NEON: // %bb.0: +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: not_q: +; SVE2: // %bb.0: +; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: ret + %2 = xor <2 x i64> %0, splat (i64 -1) + ret <2 x i64> %2 +} + +; NAND (a, b) = NBSL (a, b, b) = NBSL (b, a, a). +define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 { +; NEON-LABEL: nand_q: +; NEON: // %bb.0: +; NEON-NEXT: and v0.16b, v1.16b, v0.16b +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: nand_q: +; SVE2: // %bb.0: +; SVE2-NEXT: and v0.16b, v1.16b, v0.16b +; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: ret + %3 = and <2 x i64> %1, %0 + %4 = xor <2 x i64> %3, splat (i64 -1) + ret <2 x i64> %4 +} + +; NOR (a, b) = NBSL (a, b, a) = NBSL (b, a, b). +define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 { +; NEON-LABEL: nor_q: +; NEON: // %bb.0: +; NEON-NEXT: orr v0.16b, v1.16b, v0.16b +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: nor_q: +; SVE2: // %bb.0: +; SVE2-NEXT: orr v0.16b, v1.16b, v0.16b +; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: ret + %3 = or <2 x i64> %1, %0 + %4 = xor <2 x i64> %3, splat (i64 -1) + ret <2 x i64> %4 +} + +; EON (a, b) = BSL2N (a, a, b) = BSL2N (b, b, a). +define <2 x i64> @eon_q(<2 x i64> %0, <2 x i64> %1) #0 { +; NEON-LABEL: eon_q: +; NEON: // %bb.0: +; NEON-NEXT: eor v0.16b, v0.16b, v1.16b +; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: eon_q: +; SVE2: // %bb.0: +; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b +; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: ret + %3 = xor <2 x i64> %0, %1 + %4 = xor <2 x i64> %3, splat (i64 -1) + ret <2 x i64> %4 +} + +; ORN (a, b) has a dedicated instruction (ORN). +define <2 x i64> @orn_q(<2 x i64> %0, <2 x i64> %1) #0 { +; NEON-LABEL: orn_q: +; NEON: // %bb.0: +; NEON-NEXT: orn v0.16b, v0.16b, v1.16b +; NEON-NEXT: ret +; +; SVE2-LABEL: orn_q: +; SVE2: // %bb.0: +; SVE2-NEXT: orn v0.16b, v0.16b, v1.16b +; SVE2-NEXT: ret + %3 = xor <2 x i64> %1, splat (i64 -1) + %4 = or <2 x i64> %0, %3 + ret <2 x i64> %4 +} diff --git a/llvm/test/CodeGen/AArch64/eor3.ll b/llvm/test/CodeGen/AArch64/eor3.ll index b89d9d608575c..a2631681847dc 100644 --- a/llvm/test/CodeGen/AArch64/eor3.ll +++ b/llvm/test/CodeGen/AArch64/eor3.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s ; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s -; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3 %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3-SVE2 %s define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; SHA3-LABEL: eor3_16x8_left: @@ -24,6 +24,11 @@ define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: mov v0.16b, v2.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_16x8_left: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; SHA3-SVE2-NEXT: ret %4 = xor <16 x i8> %0, %1 %5 = xor <16 x i8> %2, %4 ret <16 x i8> %5 @@ -49,6 +54,11 @@ define <16 x i8> @eor3_16x8_right(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: mov v0.16b, v1.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_16x8_right: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: ret %4 = xor <16 x i8> %1, %2 %5 = xor <16 x i8> %4, %0 ret <16 x i8> %5 @@ -74,6 +84,11 @@ define <8 x i16> @eor3_8x16_left(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { ; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: mov v0.16b, v2.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_8x16_left: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; SHA3-SVE2-NEXT: ret %4 = xor <8 x i16> %0, %1 %5 = xor <8 x i16> %2, %4 ret <8 x i16> %5 @@ -99,6 +114,11 @@ define <8 x i16> @eor3_8x16_right(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { ; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: mov v0.16b, v1.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_8x16_right: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: ret %4 = xor <8 x i16> %1, %2 %5 = xor <8 x i16> %4, %0 ret <8 x i16> %5 @@ -124,6 +144,11 @@ define <4 x i32> @eor3_4x32_left(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { ; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: mov v0.16b, v2.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_4x32_left: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; SHA3-SVE2-NEXT: ret %4 = xor <4 x i32> %0, %1 %5 = xor <4 x i32> %2, %4 ret <4 x i32> %5 @@ -149,6 +174,11 @@ define <4 x i32> @eor3_4x32_right(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { ; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: mov v0.16b, v1.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_4x32_right: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: ret %4 = xor <4 x i32> %1, %2 %5 = xor <4 x i32> %4, %0 ret <4 x i32> %5 @@ -174,6 +204,11 @@ define <2 x i64> @eor3_2x64_left(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: mov v0.16b, v2.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_2x64_left: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; SHA3-SVE2-NEXT: ret %4 = xor <2 x i64> %0, %1 %5 = xor <2 x i64> %2, %4 ret <2 x i64> %5 @@ -199,6 +234,11 @@ define <2 x i64> @eor3_2x64_right(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: mov v0.16b, v1.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_2x64_right: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: ret %4 = xor <2 x i64> %1, %2 %5 = xor <2 x i64> %4, %0 ret <2 x i64> %5 @@ -222,6 +262,12 @@ define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) { ; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b ; SVE2-NEXT: mvn v0.16b, v0.16b ; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_vnot: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: eor v0.16b, v0.16b, v1.16b +; SHA3-SVE2-NEXT: mvn v0.16b, v0.16b +; SHA3-SVE2-NEXT: ret %3 = xor <2 x i64> %0, %4 = xor <2 x i64> %3, %1 ret <2 x i64> %4 diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index 8aedeac18f64a..660d0c85a4b7c 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -312,3 +312,67 @@ entry: %t3 = xor %t2, %b ret %t3 } + +; NOT (a) = NBSL (a, a, a). +; We don't have a pattern for this right now because the tied register +; constraint can lead to worse code gen. +define @not( %0) #0 { +; CHECK-LABEL: not: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %2 = xor %0, splat (i64 -1) + ret %2 +} + +; NAND (a, b) = NBSL (a, b, b) = NBSL (b, a, a). +define @nand( %0, %1) #0 { +; CHECK-LABEL: nand: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: ret + %3 = and %1, %0 + %4 = xor %3, splat (i64 -1) + ret %4 +} + +; NOR (a, b) = NBSL (a, b, a) = NBSL (b, a, b). +define @nor( %0, %1) #0 { +; CHECK-LABEL: nor: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: ret + %3 = or %1, %0 + %4 = xor %3, splat (i64 -1) + ret %4 +} + +; EON (a, b) = BSL2N (a, a, b) = BSL2N (b, b, a). +define @eon( %0, %1) #0 { +; CHECK-LABEL: eon: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor3 z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %3 = xor %0, %1 + %4 = xor %3, splat (i64 -1) + ret %4 +} + +; ORN (a, b) = BSL2N (a, b, a). +define @orn( %0, %1) #0 { +; CHECK-LABEL: orn: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z1.d, z1.d, z2.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %3 = xor %1, splat (i64 -1) + %4 = or %0, %3 + ret %4 +} From 7f5c069de223eedf28f2f9efa603287194bc27b4 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Tue, 8 Jul 2025 04:19:17 -0700 Subject: [PATCH 2/2] [AArch64] Use SVE2 bit-sel instructions for some binary patterns. We can use NBSL/BSL2N to implement the following operations via the corresponding identities: * EON(a, b) = BSL2N(a, a, b) * NAND(a, b) = NBSL(a, b, b) = NBSL(b, a, a) * NOR(a, b) = NBSL(a, b, a) = NBSL(b, a, b) * ORN(a, b) = BSL2N(a, b, a) These operations are currently lowered into at least two instructions because we don't have dedicated Neon/SVE instructions for them. With the appropriate pattern of NBSL/BSL2N we can lower them in a single instruction. P.S. We can also use NBSL to implement an unpredicated NOT(a) = NBSL(a, a, a). However, because of the tied register constraint, this may not be always profitable. --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 30 +++++++++++++++++++ llvm/test/CodeGen/AArch64/bsl.ll | 18 +++++++---- llvm/test/CodeGen/AArch64/eor3.ll | 12 +++++--- .../test/CodeGen/AArch64/sve-pred-selectop.ll | 12 ++------ llvm/test/CodeGen/AArch64/sve2-bsl.ll | 15 +++------- 5 files changed, 57 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 261df563bb2a9..8f02fc0b647ac 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4034,6 +4034,36 @@ let Predicates = [HasSVE2_or_SME] in { defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", AArch64bsl2n>; defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", AArch64nbsl>; + multiclass binary_bitwise { + def : Pat<(InOp VT:$op1, VT:$op2), (OutOp $op1, $op2)>; + + def : Pat<(SVEType.DSub (InOp V64:$op1, V64:$op2)), + (EXTRACT_SUBREG (OutOp (INSERT_SUBREG (IMPLICIT_DEF), (SVEType.DSub $op1), dsub), + (INSERT_SUBREG (IMPLICIT_DEF), (SVEType.DSub $op2), dsub)), dsub)>; + + def : Pat<(SVEType.ZSub (InOp V128:$op1, V128:$op2)), + (EXTRACT_SUBREG (OutOp (INSERT_SUBREG (IMPLICIT_DEF), (SVEType.ZSub $op1), zsub), + (INSERT_SUBREG (IMPLICIT_DEF), (SVEType.ZSub $op2), zsub)), zsub)>; + } + + foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in { + // EON (a, b) = BSL2N (a, a, b) = BSL2N (b, b, a) + defm : binary_bitwise, + OutPatFrag<(ops node:$op1, node:$op2), (BSL2N_ZZZZ $op1, $op1, $op2)>>; + + // NAND (a, b) = NBSL (a, b, b) = NBSL (b, a, a) + defm : binary_bitwise, + OutPatFrag<(ops node:$op1, node:$op2), (NBSL_ZZZZ $op2, $op1, $op1)>>; + + // NOR (a, b) = NBSL (a, b, a) = NBSL (b, a, b) + defm : binary_bitwise, + OutPatFrag<(ops node:$op1, node:$op2), (NBSL_ZZZZ $op2, $op1, $op2)>>; + + // ORN (a, b) = BSL2N (a, b, a) + defm : binary_bitwise, + OutPatFrag<(ops node:$op1, node:$op2), (BSL2N_ZZZZ $op1, $op2, $op1)>>; + } + // SVE2 bitwise xor and rotate right by immediate defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>; diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll index 8e402ed23d085..df6b6f75b8935 100644 --- a/llvm/test/CodeGen/AArch64/bsl.ll +++ b/llvm/test/CodeGen/AArch64/bsl.ll @@ -457,8 +457,10 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 { ; ; SVE2-LABEL: nand_q: ; SVE2: // %bb.0: -; SVE2-NEXT: and v0.16b, v1.16b, v0.16b -; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z1.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %3 = and <2 x i64> %1, %0 %4 = xor <2 x i64> %3, splat (i64 -1) @@ -475,8 +477,10 @@ define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 { ; ; SVE2-LABEL: nor_q: ; SVE2: // %bb.0: -; SVE2-NEXT: orr v0.16b, v1.16b, v0.16b -; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z0.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %3 = or <2 x i64> %1, %0 %4 = xor <2 x i64> %3, splat (i64 -1) @@ -493,8 +497,10 @@ define <2 x i64> @eon_q(<2 x i64> %0, <2 x i64> %1) #0 { ; ; SVE2-LABEL: eon_q: ; SVE2: // %bb.0: -; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b -; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %3 = xor <2 x i64> %0, %1 %4 = xor <2 x i64> %3, splat (i64 -1) diff --git a/llvm/test/CodeGen/AArch64/eor3.ll b/llvm/test/CodeGen/AArch64/eor3.ll index a2631681847dc..eccd09131b525 100644 --- a/llvm/test/CodeGen/AArch64/eor3.ll +++ b/llvm/test/CodeGen/AArch64/eor3.ll @@ -259,14 +259,18 @@ define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) { ; ; SVE2-LABEL: eor3_vnot: ; SVE2: // %bb.0: -; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b -; SVE2-NEXT: mvn v0.16b, v0.16b +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret ; ; SHA3-SVE2-LABEL: eor3_vnot: ; SHA3-SVE2: // %bb.0: -; SHA3-SVE2-NEXT: eor v0.16b, v0.16b, v1.16b -; SHA3-SVE2-NEXT: mvn v0.16b, v0.16b +; SHA3-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SHA3-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SHA3-SVE2-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d +; SHA3-SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SHA3-SVE2-NEXT: ret %3 = xor <2 x i64> %0, %4 = xor <2 x i64> %3, %1 diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll index 30ec2de2bd9cc..9a78726c450d1 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll @@ -322,11 +322,9 @@ entry: define @ornot_v4i32( %z, %x, %y) { ; CHECK-LABEL: ornot_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z3.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d ; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: eor z2.d, z2.d, z3.d -; CHECK-NEXT: orr z1.d, z1.d, z2.d ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -340,11 +338,9 @@ entry: define @ornot_v8i16( %z, %x, %y) { ; CHECK-LABEL: ornot_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z3.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d ; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: eor z2.d, z2.d, z3.d -; CHECK-NEXT: orr z1.d, z1.d, z2.d ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -358,11 +354,9 @@ entry: define @ornot_v16i8( %z, %x, %y) { ; CHECK-LABEL: ornot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z3.b, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: eor z2.d, z2.d, z3.d -; CHECK-NEXT: orr z1.d, z1.d, z2.d ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index 660d0c85a4b7c..6cfe66eb8e633 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -330,9 +330,7 @@ define @not( %0) #0 { define @nand( %0, %1) #0 { ; CHECK-LABEL: nand: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z0.d, z1.d, z0.d -; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z1.d ; CHECK-NEXT: ret %3 = and %1, %0 %4 = xor %3, splat (i64 -1) @@ -343,9 +341,7 @@ define @nand( %0, %1) #0 define @nor( %0, %1) #0 { ; CHECK-LABEL: nor: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: orr z0.d, z1.d, z0.d -; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z0.d ; CHECK-NEXT: ret %3 = or %1, %0 %4 = xor %3, splat (i64 -1) @@ -356,8 +352,7 @@ define @nor( %0, %1) #0 define @eon( %0, %1) #0 { ; CHECK-LABEL: eon: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor3 z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d ; CHECK-NEXT: ret %3 = xor %0, %1 %4 = xor %3, splat (i64 -1) @@ -368,9 +363,7 @@ define @eon( %0, %1) #0 define @orn( %0, %1) #0 { ; CHECK-LABEL: orn: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z1.d, z1.d, z2.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z0.d ; CHECK-NEXT: ret %3 = xor %1, splat (i64 -1) %4 = or %0, %3