Skip to content

Commit 01bfe97

Browse files
committed
[ISEL] Canonicalize STEP_VECTOR to LHS if RHS is a splat.
This helps recognise patterns where we're trying to match STEP_VECTOR patterns to INDEX instructions that take a GPR for the Start/Step. The reason for canonicalising this operation to the LHS is because it will already be canonicalised to the LHS if the RHS is a constant splat vector. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D118459
1 parent 78bf2e0 commit 01bfe97

File tree

4 files changed

+60
-45
lines changed

4 files changed

+60
-45
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,11 @@ class SelectionDAG {
14121412
/// Return an AssertAlignSDNode.
14131413
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A);
14141414

1415+
/// Swap N1 and N2 if Opcode is a commutative binary opcode
1416+
/// and the canonical form expects the opposite order.
1417+
void canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
1418+
SDValue &N2) const;
1419+
14151420
/// Return the specified value casted to
14161421
/// the target's desired shift amount type.
14171422
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5629,20 +5629,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
56295629
return getNode(Opcode, DL, VT, N1, N2, Flags);
56305630
}
56315631

5632+
void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
5633+
SDValue &N2) const {
5634+
if (!TLI->isCommutativeBinOp(Opcode))
5635+
return;
5636+
5637+
// Canonicalize:
5638+
// binop(const, nonconst) -> binop(nonconst, const)
5639+
bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
5640+
bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
5641+
bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
5642+
bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
5643+
if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
5644+
std::swap(N1, N2);
5645+
5646+
// Canonicalize:
5647+
// binop(splat(x), step_vector) -> binop(step_vector, splat(x))
5648+
else if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
5649+
N2.getOpcode() == ISD::STEP_VECTOR)
5650+
std::swap(N1, N2);
5651+
}
5652+
56325653
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
56335654
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
56345655
assert(N1.getOpcode() != ISD::DELETED_NODE &&
56355656
N2.getOpcode() != ISD::DELETED_NODE &&
56365657
"Operand is DELETED_NODE!");
5637-
// Canonicalize constant to RHS if commutative.
5638-
if (TLI->isCommutativeBinOp(Opcode)) {
5639-
bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
5640-
bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
5641-
bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
5642-
bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
5643-
if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
5644-
std::swap(N1, N2);
5645-
}
5658+
5659+
canonicalizeCommutativeBinop(Opcode, N1, N2);
56465660

56475661
auto *N1C = dyn_cast<ConstantSDNode>(N1);
56485662
auto *N2C = dyn_cast<ConstantSDNode>(N2);

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ define <vscale x 16 x i1> @lane_mask_nxv16i1_i8(i8 %index, i8 %TC) {
8080
; CHECK: // %bb.0:
8181
; CHECK-NEXT: index z0.b, #0, #1
8282
; CHECK-NEXT: mov z1.b, w0
83-
; CHECK-NEXT: uqadd z0.b, z1.b, z0.b
83+
; CHECK-NEXT: uqadd z0.b, z0.b, z1.b
8484
; CHECK-NEXT: mov z1.b, w1
8585
; CHECK-NEXT: ptrue p0.b
8686
; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
@@ -96,7 +96,7 @@ define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) {
9696
; CHECK-NEXT: mov z1.h, w0
9797
; CHECK-NEXT: and z0.h, z0.h, #0xff
9898
; CHECK-NEXT: and z1.h, z1.h, #0xff
99-
; CHECK-NEXT: add z0.h, z1.h, z0.h
99+
; CHECK-NEXT: add z0.h, z0.h, z1.h
100100
; CHECK-NEXT: mov z1.h, w1
101101
; CHECK-NEXT: umin z0.h, z0.h, #255
102102
; CHECK-NEXT: and z1.h, z1.h, #0xff
@@ -115,7 +115,7 @@ define <vscale x 4 x i1> @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) {
115115
; CHECK-NEXT: mov z1.s, w0
116116
; CHECK-NEXT: and z0.s, z0.s, #0xff
117117
; CHECK-NEXT: and z1.s, z1.s, #0xff
118-
; CHECK-NEXT: add z0.s, z1.s, z0.s
118+
; CHECK-NEXT: add z0.s, z0.s, z1.s
119119
; CHECK-NEXT: mov z1.s, w1
120120
; CHECK-NEXT: umin z0.s, z0.s, #255
121121
; CHECK-NEXT: and z1.s, z1.s, #0xff
@@ -135,7 +135,7 @@ define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) {
135135
; CHECK-NEXT: mov z1.d, x0
136136
; CHECK-NEXT: and z0.d, z0.d, #0xff
137137
; CHECK-NEXT: and z1.d, z1.d, #0xff
138-
; CHECK-NEXT: add z0.d, z1.d, z0.d
138+
; CHECK-NEXT: add z0.d, z0.d, z1.d
139139
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
140140
; CHECK-NEXT: mov z2.d, x1
141141
; CHECK-NEXT: umin z0.d, z0.d, #255
@@ -167,29 +167,29 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) {
167167
; CHECK-NEXT: mov z2.d, z0.d
168168
; CHECK-NEXT: mov z4.s, w1
169169
; CHECK-NEXT: incw z1.s
170-
; CHECK-NEXT: uqadd z5.s, z3.s, z0.s
170+
; CHECK-NEXT: uqadd z5.s, z0.s, z3.s
171171
; CHECK-NEXT: incw z2.s, all, mul #2
172172
; CHECK-NEXT: mov z6.d, z1.d
173173
; CHECK-NEXT: cmphi p1.s, p0/z, z4.s, z5.s
174-
; CHECK-NEXT: uqadd z5.s, z3.s, z1.s
174+
; CHECK-NEXT: uqadd z5.s, z1.s, z3.s
175175
; CHECK-NEXT: cmphi p2.s, p0/z, z4.s, z5.s
176-
; CHECK-NEXT: uqadd z5.s, z3.s, z2.s
176+
; CHECK-NEXT: uqadd z5.s, z2.s, z3.s
177177
; CHECK-NEXT: incw z6.s, all, mul #2
178178
; CHECK-NEXT: incw z0.s, all, mul #4
179179
; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z5.s
180-
; CHECK-NEXT: uqadd z5.s, z3.s, z6.s
180+
; CHECK-NEXT: uqadd z5.s, z6.s, z3.s
181181
; CHECK-NEXT: incw z1.s, all, mul #4
182182
; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z5.s
183-
; CHECK-NEXT: uqadd z0.s, z3.s, z0.s
184-
; CHECK-NEXT: uqadd z1.s, z3.s, z1.s
183+
; CHECK-NEXT: uqadd z0.s, z0.s, z3.s
184+
; CHECK-NEXT: uqadd z1.s, z1.s, z3.s
185185
; CHECK-NEXT: incw z2.s, all, mul #4
186186
; CHECK-NEXT: incw z6.s, all, mul #4
187187
; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h
188188
; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h
189189
; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z0.s
190190
; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z1.s
191-
; CHECK-NEXT: uqadd z0.s, z3.s, z2.s
192-
; CHECK-NEXT: uqadd z1.s, z3.s, z6.s
191+
; CHECK-NEXT: uqadd z0.s, z2.s, z3.s
192+
; CHECK-NEXT: uqadd z1.s, z6.s, z3.s
193193
; CHECK-NEXT: cmphi p5.s, p0/z, z4.s, z0.s
194194
; CHECK-NEXT: cmphi p0.s, p0/z, z4.s, z1.s
195195
; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h
@@ -223,63 +223,63 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) {
223223
; CHECK-NEXT: mov z2.d, z0.d
224224
; CHECK-NEXT: mov z4.d, x1
225225
; CHECK-NEXT: incd z1.d
226-
; CHECK-NEXT: uqadd z5.d, z3.d, z0.d
227-
; CHECK-NEXT: uqadd z6.d, z3.d, z1.d
226+
; CHECK-NEXT: uqadd z5.d, z0.d, z3.d
227+
; CHECK-NEXT: uqadd z6.d, z1.d, z3.d
228228
; CHECK-NEXT: cmphi p1.d, p0/z, z4.d, z5.d
229229
; CHECK-NEXT: mov z5.d, z1.d
230230
; CHECK-NEXT: incd z2.d, all, mul #2
231231
; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d
232-
; CHECK-NEXT: uqadd z6.d, z3.d, z2.d
232+
; CHECK-NEXT: uqadd z6.d, z2.d, z3.d
233233
; CHECK-NEXT: mov z7.d, z0.d
234234
; CHECK-NEXT: incd z5.d, all, mul #2
235235
; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s
236236
; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d
237-
; CHECK-NEXT: uqadd z6.d, z3.d, z5.d
237+
; CHECK-NEXT: uqadd z6.d, z5.d, z3.d
238238
; CHECK-NEXT: mov z24.d, z1.d
239239
; CHECK-NEXT: incd z7.d, all, mul #4
240240
; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d
241-
; CHECK-NEXT: uqadd z6.d, z3.d, z7.d
241+
; CHECK-NEXT: uqadd z6.d, z7.d, z3.d
242242
; CHECK-NEXT: mov z25.d, z2.d
243243
; CHECK-NEXT: incd z24.d, all, mul #4
244244
; CHECK-NEXT: mov z26.d, z5.d
245245
; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z6.d
246-
; CHECK-NEXT: uqadd z6.d, z3.d, z24.d
246+
; CHECK-NEXT: uqadd z6.d, z24.d, z3.d
247247
; CHECK-NEXT: incd z25.d, all, mul #4
248248
; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z6.d
249-
; CHECK-NEXT: uqadd z6.d, z3.d, z25.d
249+
; CHECK-NEXT: uqadd z6.d, z25.d, z3.d
250250
; CHECK-NEXT: incd z26.d, all, mul #4
251251
; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z6.d
252-
; CHECK-NEXT: uqadd z6.d, z3.d, z26.d
252+
; CHECK-NEXT: uqadd z6.d, z26.d, z3.d
253253
; CHECK-NEXT: uzp1 p2.s, p2.s, p3.s
254254
; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d
255255
; CHECK-NEXT: incd z0.d, all, mul #8
256256
; CHECK-NEXT: incd z1.d, all, mul #8
257257
; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s
258258
; CHECK-NEXT: uzp1 p3.s, p6.s, p3.s
259-
; CHECK-NEXT: uqadd z0.d, z3.d, z0.d
260-
; CHECK-NEXT: uqadd z1.d, z3.d, z1.d
259+
; CHECK-NEXT: uqadd z0.d, z0.d, z3.d
260+
; CHECK-NEXT: uqadd z1.d, z1.d, z3.d
261261
; CHECK-NEXT: incd z2.d, all, mul #8
262262
; CHECK-NEXT: incd z5.d, all, mul #8
263263
; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h
264264
; CHECK-NEXT: uzp1 p2.h, p4.h, p3.h
265265
; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z0.d
266266
; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z1.d
267-
; CHECK-NEXT: uqadd z0.d, z3.d, z2.d
268-
; CHECK-NEXT: uqadd z1.d, z3.d, z5.d
267+
; CHECK-NEXT: uqadd z0.d, z2.d, z3.d
268+
; CHECK-NEXT: uqadd z1.d, z5.d, z3.d
269269
; CHECK-NEXT: incd z7.d, all, mul #8
270270
; CHECK-NEXT: incd z24.d, all, mul #8
271271
; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d
272272
; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d
273-
; CHECK-NEXT: uqadd z0.d, z3.d, z7.d
274-
; CHECK-NEXT: uqadd z1.d, z3.d, z24.d
273+
; CHECK-NEXT: uqadd z0.d, z7.d, z3.d
274+
; CHECK-NEXT: uqadd z1.d, z24.d, z3.d
275275
; CHECK-NEXT: incd z25.d, all, mul #8
276276
; CHECK-NEXT: incd z26.d, all, mul #8
277277
; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s
278278
; CHECK-NEXT: uzp1 p4.s, p5.s, p6.s
279279
; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d
280280
; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d
281-
; CHECK-NEXT: uqadd z0.d, z3.d, z25.d
282-
; CHECK-NEXT: uqadd z1.d, z3.d, z26.d
281+
; CHECK-NEXT: uqadd z0.d, z25.d, z3.d
282+
; CHECK-NEXT: uqadd z1.d, z26.d, z3.d
283283
; CHECK-NEXT: cmphi p7.d, p0/z, z4.d, z0.d
284284
; CHECK-NEXT: cmphi p0.d, p0/z, z4.d, z1.d
285285
; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s
@@ -308,9 +308,9 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i8(i8 %index, i8 %TC) {
308308
; CHECK-NEXT: mov z2.b, w0
309309
; CHECK-NEXT: add z1.b, z0.b, z1.b
310310
; CHECK-NEXT: mov z3.b, w1
311-
; CHECK-NEXT: uqadd z0.b, z2.b, z0.b
311+
; CHECK-NEXT: uqadd z0.b, z0.b, z2.b
312312
; CHECK-NEXT: ptrue p1.b
313-
; CHECK-NEXT: uqadd z1.b, z2.b, z1.b
313+
; CHECK-NEXT: uqadd z1.b, z1.b, z2.b
314314
; CHECK-NEXT: cmphi p0.b, p1/z, z3.b, z0.b
315315
; CHECK-NEXT: cmphi p1.b, p1/z, z3.b, z1.b
316316
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-stepvector.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ define <vscale x 2 x i64> @multiple_use_stepvector_nxv2i64_1(i64 %data) {
218218
; CHECK: // %bb.0: // %entry
219219
; CHECK-NEXT: index z0.d, #0, #1
220220
; CHECK-NEXT: mov z1.d, x0
221-
; CHECK-NEXT: add z1.d, z1.d, z0.d
221+
; CHECK-NEXT: add z1.d, z0.d, z1.d
222222
; CHECK-NEXT: ptrue p0.d
223223
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
224224
; CHECK-NEXT: ret
@@ -292,11 +292,7 @@ entry:
292292
define <vscale x 2 x i64> @mul_add_stepvector_nxv2i64_commutative(i64 %x, i64 %y) {
293293
; CHECK-LABEL: mul_add_stepvector_nxv2i64_commutative:
294294
; CHECK: // %bb.0: // %entry
295-
; CHECK-NEXT: index z1.d, #0, #1
296-
; CHECK-NEXT: mov z2.d, x1
297-
; CHECK-NEXT: ptrue p0.d
298-
; CHECK-NEXT: mov z0.d, x0
299-
; CHECK-NEXT: mla z0.d, p0/m, z2.d, z1.d
295+
; CHECK-NEXT: index z0.d, x0, x1
300296
; CHECK-NEXT: ret
301297
entry:
302298
%0 = insertelement <vscale x 2 x i64> poison, i64 %y, i32 0

0 commit comments

Comments
 (0)