Skip to content

Commit b19de81

Browse files
committed
[WebAssembly] Improve codegen for v128.bitselect
Add patterns selecting ((v1 ^ v2) & c) ^ v2 and ((v1 ^ v2) & ~c) ^ v2 to v128.bitselect. Resolves #56827. Reviewed By: aheejin Differential Revision: https://reviews.llvm.org/D131131
1 parent c9e1ecd commit b19de81

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,20 @@ def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)),
811811
(and (vnot V128:$c), (vec.vt V128:$v2)))),
812812
(BITSELECT $v1, $v2, $c)>;
813813

814+
// Bitselect is also equivalent to ((v1 ^ v2) & c) ^ v2
815+
foreach vec = IntVecs in
816+
def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
817+
(vec.vt V128:$c)),
818+
(vec.vt V128:$v2))),
819+
(BITSELECT $v1, $v2, $c)>;
820+
821+
// Same pattern with `c` negated so `a` and `b` get swapped.
822+
foreach vec = IntVecs in
823+
def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
824+
(vnot (vec.vt V128:$c))),
825+
(vec.vt V128:$v2))),
826+
(BITSELECT $v2, $v1, $c)>;
827+
814828
// Also implement vselect in terms of bitselect
815829
foreach vec = AllVecs in
816830
def : Pat<(vec.vt (vselect

llvm/test/CodeGen/WebAssembly/simd-arith.ll

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,39 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
343343
ret <16 x i8> %a
344344
}
345345

346+
; CHECK-LABEL: bitselect_xor_v16i8:
347+
; NO-SIMD128-NOT: v128
348+
; SIMD128-NEXT: .functype bitselect_xor_v16i8 (v128, v128, v128) -> (v128){{$}}
349+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
350+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
351+
; SIMD128-FAST-NEXT: v128.xor
352+
; SIMD128-FAST-NEXT: v128.and
353+
; SIMD128-FAST-NEXT: v128.xor
354+
define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
355+
%xor1 = xor <16 x i8> %v1, %v2
356+
%and = and <16 x i8> %xor1, %c
357+
%a = xor <16 x i8> %and, %v2
358+
ret <16 x i8> %a
359+
}
360+
361+
; CHECK-LABEL: bitselect_xor_reversed_v16i8:
362+
; NO-SIMD128-NOT: v128
363+
; SIMD128-NEXT: .functype bitselect_xor_reversed_v16i8 (v128, v128, v128) -> (v128){{$}}
364+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
365+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
366+
; SIMD128-FAST-NEXT: v128.xor
367+
; SIMD128-FAST-NEXT: v128.not
368+
; SIMD128-FAST-NEXT: v128.and
369+
; SIMD128-FAST-NEXT: v128.xor
370+
define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
371+
%xor1 = xor <16 x i8> %v1, %v2
372+
%notc = xor <16 x i8> %c, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
373+
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
374+
%and = and <16 x i8> %xor1, %notc
375+
%a = xor <16 x i8> %and, %v2
376+
ret <16 x i8> %a
377+
}
378+
346379
; ==============================================================================
347380
; 8 x i16
348381
; ==============================================================================
@@ -659,6 +692,39 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
659692
ret <8 x i16> %a
660693
}
661694

695+
; CHECK-LABEL: bitselect_xor_v8i16:
696+
; NO-SIMD128-NOT: v128
697+
; SIMD128-NEXT: .functype bitselect_xor_v8i16 (v128, v128, v128) -> (v128){{$}}
698+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
699+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
700+
; SIMD128-FAST-NEXT: v128.xor
701+
; SIMD128-FAST-NEXT: v128.and
702+
; SIMD128-FAST-NEXT: v128.xor
703+
define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
704+
%xor1 = xor <8 x i16> %v1, %v2
705+
%and = and <8 x i16> %xor1, %c
706+
%a = xor <8 x i16> %and, %v2
707+
ret <8 x i16> %a
708+
}
709+
710+
; CHECK-LABEL: bitselect_xor_reversed_v8i16:
711+
; NO-SIMD128-NOT: v128
712+
; SIMD128-NEXT: .functype bitselect_xor_reversed_v8i16 (v128, v128, v128) -> (v128){{$}}
713+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
714+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
715+
; SIMD128-FAST-NEXT: v128.xor
716+
; SIMD128-FAST-NEXT: v128.not
717+
; SIMD128-FAST-NEXT: v128.and
718+
; SIMD128-FAST-NEXT: v128.xor
719+
define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
720+
%xor1 = xor <8 x i16> %v1, %v2
721+
%notc = xor <8 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1,
722+
i16 -1, i16 -1, i16 -1, i16 -1>
723+
%and = and <8 x i16> %xor1, %notc
724+
%a = xor <8 x i16> %and, %v2
725+
ret <8 x i16> %a
726+
}
727+
662728
; CHECK-LABEL: extmul_low_s_v8i16:
663729
; NO-SIMD128-NOT: i16x8
664730
; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}}
@@ -998,6 +1064,38 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
9981064
ret <4 x i32> %a
9991065
}
10001066

1067+
; CHECK-LABEL: bitselect_xor_v4i32:
1068+
; NO-SIMD128-NOT: v128
1069+
; SIMD128-NEXT: .functype bitselect_xor_v4i32 (v128, v128, v128) -> (v128){{$}}
1070+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
1071+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1072+
; SIMD128-FAST-NEXT: v128.xor
1073+
; SIMD128-FAST-NEXT: v128.and
1074+
; SIMD128-FAST-NEXT: v128.xor
1075+
define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
1076+
%xor1 = xor <4 x i32> %v1, %v2
1077+
%and = and <4 x i32> %xor1, %c
1078+
%a = xor <4 x i32> %and, %v2
1079+
ret <4 x i32> %a
1080+
}
1081+
1082+
; CHECK-LABEL: bitselect_xor_reversed_v4i32:
1083+
; NO-SIMD128-NOT: v128
1084+
; SIMD128-NEXT: .functype bitselect_xor_reversed_v4i32 (v128, v128, v128) -> (v128){{$}}
1085+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
1086+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1087+
; SIMD128-FAST-NEXT: v128.xor
1088+
; SIMD128-FAST-NEXT: v128.not
1089+
; SIMD128-FAST-NEXT: v128.and
1090+
; SIMD128-FAST-NEXT: v128.xor
1091+
define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
1092+
%xor1 = xor <4 x i32> %v1, %v2
1093+
%notc = xor <4 x i32> %c, <i32 -1, i32 -1, i32 -1, i32 -1>
1094+
%and = and <4 x i32> %xor1, %notc
1095+
%a = xor <4 x i32> %and, %v2
1096+
ret <4 x i32> %a
1097+
}
1098+
10011099
; CHECK-LABEL: extmul_low_s_v4i32:
10021100
; NO-SIMD128-NOT: i32x4
10031101
; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}}
@@ -1390,6 +1488,38 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
13901488
ret <2 x i64> %a
13911489
}
13921490

1491+
; CHECK-LABEL: bitselect_xor_v2i64:
1492+
; NO-SIMD128-NOT: v128
1493+
; SIMD128-NEXT: .functype bitselect_xor_v2i64 (v128, v128, v128) -> (v128){{$}}
1494+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
1495+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1496+
; SIMD128-FAST-NEXT: v128.xor
1497+
; SIMD128-FAST-NEXT: v128.and
1498+
; SIMD128-FAST-NEXT: v128.xor
1499+
define <2 x i64> @bitselect_xor_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
1500+
%xor1 = xor <2 x i64> %v1, %v2
1501+
%and = and <2 x i64> %xor1, %c
1502+
%a = xor <2 x i64> %and, %v2
1503+
ret <2 x i64> %a
1504+
}
1505+
1506+
; CHECK-LABEL: bitselect_xor_reversed_v2i64:
1507+
; NO-SIMD128-NOT: v128
1508+
; SIMD128-NEXT: .functype bitselect_xor_reversed_v2i64 (v128, v128, v128) -> (v128){{$}}
1509+
; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
1510+
; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
1511+
; SIMD128-FAST-NEXT: v128.xor
1512+
; SIMD128-FAST-NEXT: v128.not
1513+
; SIMD128-FAST-NEXT: v128.and
1514+
; SIMD128-FAST-NEXT: v128.xor
1515+
define <2 x i64> @bitselect_xor_reversed_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
1516+
%xor1 = xor <2 x i64> %v1, %v2
1517+
%notc = xor <2 x i64> %c, <i64 -1, i64 -1>
1518+
%and = and <2 x i64> %xor1, %notc
1519+
%a = xor <2 x i64> %and, %v2
1520+
ret <2 x i64> %a
1521+
}
1522+
13931523
; CHECK-LABEL: extmul_low_s_v2i64:
13941524
; NO-SIMD128-NOT: i64x2
13951525
; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}}

0 commit comments

Comments
 (0)