Skip to content

Commit 33dbfb9

Browse files
committed
[WASM] [DAGCombine] Add vselect A, <0>, B fold
- Move optimization of vselect A, <0>, B to DAGCombiner.cpp - Change the condition from build vector zero only to constant splat zero | build vector zero. - Optimize relevant test case by removing ptr load and store.
1 parent ee0da14 commit 33dbfb9

File tree

4 files changed

+64
-89
lines changed

4 files changed

+64
-89
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13088,10 +13088,10 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
1308813088
EVT CondVT = Cond.getValueType();
1308913089
assert(CondVT.isVector() && "Vector select expects a vector selector!");
1309013090

13091-
bool IsTAllZero = ISD::isBuildVectorAllZeros(TVal.getNode());
13092-
bool IsTAllOne = ISD::isBuildVectorAllOnes(TVal.getNode());
13093-
bool IsFAllZero = ISD::isBuildVectorAllZeros(FVal.getNode());
13094-
bool IsFAllOne = ISD::isBuildVectorAllOnes(FVal.getNode());
13091+
bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13092+
bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13093+
bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13094+
bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
1309513095

1309613096
// no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
1309713097
if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
@@ -13158,6 +13158,14 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
1315813158
return DAG.getBitcast(VT, Or);
1315913159
}
1316013160

13161+
// select Cond, 0, x → and (Cond, !x)
13162+
if (IsTAllZero) {
13163+
SDValue X = DAG.getBitcast(CondVT, FVal);
13164+
SDValue NotX = DAG.getNOT(DL, X, CondVT);
13165+
SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, NotX);
13166+
return DAG.getBitcast(VT, And);
13167+
}
13168+
1316113169
// select Cond, x, 0 → and Cond, x
1316213170
if (IsFAllZero) {
1316313171
SDValue X = DAG.getBitcast(CondVT, TVal);

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
191191
// Combine vector mask reductions into alltrue/anytrue
192192
setTargetDAGCombine(ISD::SETCC);
193193

194-
// Convert vselect of various zero arguments to AND
195-
setTargetDAGCombine(ISD::VSELECT);
196-
197194
// Convert vector to integer bitcasts to bitmask
198195
setTargetDAGCombine(ISD::BITCAST);
199196

@@ -3213,56 +3210,6 @@ static SDValue performTruncateCombine(SDNode *N,
32133210
return truncateVectorWithNARROW(OutVT, In, DL, DAG);
32143211
}
32153212

3216-
static SDValue performVSelectCombine(SDNode *N,
3217-
TargetLowering::DAGCombinerInfo &DCI) {
3218-
// In the tablegen.td, vselect A B C -> bitselect B C A
3219-
3220-
// SCENARIO 1
3221-
// vselect Y, <0>, X
3222-
// -> bitselect <0>, X, Y
3223-
// -> or (AND(<0>, Y), AND(<X>, !<Y>))
3224-
// -> or (0, AND(<X>, !<Y>))
3225-
// -> AND(<X>, !<Y>)
3226-
3227-
// SCENARIO 2
3228-
// vselect X, Y, <0>
3229-
// -> bitselect Y, <0>, X
3230-
// -> or (AND(Y, X), AND(<0>, !X))
3231-
// -> or (AND(Y, X), <0>)
3232-
// -> AND(Y, X)
3233-
3234-
using namespace llvm::SDPatternMatch;
3235-
assert(N->getOpcode() == ISD::VSELECT);
3236-
3237-
// INFO: There is degradation in performance pre-legalization,
3238-
// fpclamptosat_vec.ll
3239-
if (DCI.isBeforeLegalize())
3240-
return SDValue();
3241-
3242-
SDLoc DL(N);
3243-
3244-
SDValue Cond = N->getOperand(0), LHS = N->getOperand(1),
3245-
RHS = N->getOperand(2);
3246-
EVT NVT = N->getValueType(0);
3247-
SelectionDAG &DAG = DCI.DAG;
3248-
APInt SplatValue;
3249-
3250-
// SCENARIO 1
3251-
if (ISD::isConstantSplatVector(LHS.getNode(), SplatValue) &&
3252-
SplatValue.isZero())
3253-
return DAG.getNode(
3254-
ISD::AND, DL, NVT,
3255-
{RHS, DAG.getSExtOrTrunc(DAG.getNOT(DL, Cond, Cond.getValueType()), DL,
3256-
NVT)});
3257-
3258-
// SCENARIO 2
3259-
if (ISD::isConstantSplatVector(RHS.getNode(), SplatValue) &&
3260-
SplatValue.isZero())
3261-
return DAG.getNode(ISD::AND, DL, NVT,
3262-
{LHS, DAG.getSExtOrTrunc(Cond, DL, NVT)});
3263-
return SDValue();
3264-
}
3265-
32663213
static SDValue performBitcastCombine(SDNode *N,
32673214
TargetLowering::DAGCombinerInfo &DCI) {
32683215
using namespace llvm::SDPatternMatch;
@@ -3558,8 +3505,6 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
35583505
switch (N->getOpcode()) {
35593506
default:
35603507
return SDValue();
3561-
case ISD::VSELECT:
3562-
return performVSelectCombine(N, DCI);
35633508
case ISD::BITCAST:
35643509
return performBitcastCombine(N, DCI);
35653510
case ISD::SETCC:

llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
106106
; CHECK-NEXT: i64x2.lt_s
107107
; CHECK-NEXT: v128.bitselect
108108
; CHECK-NEXT: local.tee 0
109-
; CHECK-NEXT: local.get 0
110109
; CHECK-NEXT: v128.const 0, 0
111110
; CHECK-NEXT: i64x2.gt_s
111+
; CHECK-NEXT: local.get 0
112112
; CHECK-NEXT: v128.and
113113
; CHECK-NEXT: local.get 0
114114
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
@@ -1555,9 +1555,9 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
15551555
; CHECK-NEXT: i64x2.lt_s
15561556
; CHECK-NEXT: v128.bitselect
15571557
; CHECK-NEXT: local.tee 0
1558-
; CHECK-NEXT: local.get 0
15591558
; CHECK-NEXT: v128.const 0, 0
15601559
; CHECK-NEXT: i64x2.gt_s
1560+
; CHECK-NEXT: local.get 0
15611561
; CHECK-NEXT: v128.and
15621562
; CHECK-NEXT: local.get 0
15631563
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3

llvm/test/CodeGen/WebAssembly/simd-bitselect.ll

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,69 @@
22
; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
33
target triple = "wasm32-unknown-unknown"
44

5-
define void @bitselect_first_zero(ptr %output, ptr %input) {
6-
; CHECK-LABEL: bitselect_first_zero:
7-
; CHECK: .functype bitselect_first_zero (i32, i32) -> ()
5+
define <4 x i32> @bitselect_splat_first_zero_and_icmp(<4 x i32> %input) {
6+
; CHECK-LABEL: bitselect_splat_first_zero_and_icmp:
7+
; CHECK: .functype bitselect_splat_first_zero_and_icmp (v128) -> (v128)
88
; CHECK-NEXT: # %bb.0: # %start
9-
; CHECK-NEXT: v128.load $push6=, 0($1)
10-
; CHECK-NEXT: local.tee $push5=, $2=, $pop6
119
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
12-
; CHECK-NEXT: v128.and $push1=, $2, $pop0
10+
; CHECK-NEXT: v128.and $push1=, $0, $pop0
1311
; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
1412
; CHECK-NEXT: i32x4.ne $push3=, $pop1, $pop2
15-
; CHECK-NEXT: v128.and $push4=, $pop5, $pop3
16-
; CHECK-NEXT: v128.store 0($0), $pop4
17-
; CHECK-NEXT: return
13+
; CHECK-NEXT: v128.and $push4=, $pop3, $0
14+
; CHECK-NEXT: return $pop4
1815
start:
19-
%input.val = load <4 x i32>, ptr %input, align 16
20-
%0 = and <4 x i32> %input.val, splat (i32 2139095040)
16+
%0 = and <4 x i32> %input, splat (i32 2139095040)
2117
%1 = icmp eq <4 x i32> %0, zeroinitializer
22-
%2 = select <4 x i1> %1, <4 x i32> zeroinitializer, <4 x i32> %input.val
23-
store <4 x i32> %2, ptr %output, align 16
24-
ret void
18+
%2 = select <4 x i1> %1, <4 x i32> zeroinitializer, <4 x i32> %input
19+
ret <4 x i32> %2
2520
}
2621

2722

28-
define void @bitselect_second_zero(ptr %output, ptr %input) {
29-
; CHECK-LABEL: bitselect_second_zero:
30-
; CHECK: .functype bitselect_second_zero (i32, i32) -> ()
23+
define <4 x i32> @bitselect_splat_second_zero_and_icmp(<4 x i32> %input) {
24+
; CHECK-LABEL: bitselect_splat_second_zero_and_icmp:
25+
; CHECK: .functype bitselect_splat_second_zero_and_icmp (v128) -> (v128)
3126
; CHECK-NEXT: # %bb.0: # %start
32-
; CHECK-NEXT: v128.load $push6=, 0($1)
33-
; CHECK-NEXT: local.tee $push5=, $2=, $pop6
3427
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
35-
; CHECK-NEXT: v128.and $push1=, $2, $pop0
28+
; CHECK-NEXT: v128.and $push1=, $0, $pop0
3629
; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
3730
; CHECK-NEXT: i32x4.eq $push3=, $pop1, $pop2
38-
; CHECK-NEXT: v128.and $push4=, $pop5, $pop3
39-
; CHECK-NEXT: v128.store 0($0), $pop4
40-
; CHECK-NEXT: return
31+
; CHECK-NEXT: v128.and $push4=, $pop3, $0
32+
; CHECK-NEXT: return $pop4
4133
start:
42-
%input.val = load <4 x i32>, ptr %input, align 16
43-
%0 = and <4 x i32> %input.val, splat (i32 2139095040)
34+
%0 = and <4 x i32> %input, splat (i32 2139095040)
4435
%1 = icmp eq <4 x i32> %0, zeroinitializer
45-
%2 = select <4 x i1> %1, <4 x i32> %input.val, <4 x i32> zeroinitializer
46-
store <4 x i32> %2, ptr %output, align 16
47-
ret void
36+
%2 = select <4 x i1> %1, <4 x i32> %input, <4 x i32> zeroinitializer
37+
ret <4 x i32> %2
38+
}
39+
40+
41+
define <4 x i32> @bitselect_splat_first_zero_cond_input(<4 x i1> %cond, <4 x i32> %input) {
42+
; CHECK-LABEL: bitselect_splat_first_zero_cond_input:
43+
; CHECK: .functype bitselect_splat_first_zero_cond_input (v128, v128) -> (v128)
44+
; CHECK-NEXT: # %bb.0: # %start
45+
; CHECK-NEXT: i32.const $push0=, 31
46+
; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
47+
; CHECK-NEXT: i32.const $push4=, 31
48+
; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop4
49+
; CHECK-NEXT: v128.andnot $push3=, $pop2, $1
50+
; CHECK-NEXT: return $pop3
51+
start:
52+
%2 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %input
53+
ret <4 x i32> %2
54+
}
55+
56+
57+
define <4 x i32> @bitselect_splat_second_zero_cond_input(<4 x i1> %cond, <4 x i32> %input) {
58+
; CHECK-LABEL: bitselect_splat_second_zero_cond_input:
59+
; CHECK: .functype bitselect_splat_second_zero_cond_input (v128, v128) -> (v128)
60+
; CHECK-NEXT: # %bb.0: # %start
61+
; CHECK-NEXT: i32.const $push0=, 31
62+
; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
63+
; CHECK-NEXT: i32.const $push4=, 31
64+
; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop4
65+
; CHECK-NEXT: v128.and $push3=, $pop2, $1
66+
; CHECK-NEXT: return $pop3
67+
start:
68+
%2 = select <4 x i1> %cond, <4 x i32> %input, <4 x i32> zeroinitializer
69+
ret <4 x i32> %2
4870
}

0 commit comments

Comments
 (0)