Skip to content

Commit ba7d78a

Browse files
authored
[DAG] foldABSToABD - fallback to value tracking if the (ABS (SUB LHS, RHS)) operands aren't extended (#147053)
ISD::ABDS can be used if the signed subtraction will not overwrap (this is an extension to handle cases where the NSW flag has been lost) ISD::ABDU can be used if both operands have at least 1 zero sign bit. Fixes #147049
1 parent 9f66ebe commit ba7d78a

File tree

4 files changed

+36
-45
lines changed

4 files changed

+36
-45
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11402,18 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
1140211402
SDValue AbsOp0 = N->getOperand(0);
1140311403
unsigned Opc0 = Op0.getOpcode();
1140411404

11405-
// Check if the operands of the sub are (zero|sign)-extended.
11406-
// TODO: Should we use ValueTracking instead?
11405+
// Check if the operands of the sub are (zero|sign)-extended, otherwise
11406+
// fallback to ValueTracking.
1140711407
if (Opc0 != Op1.getOpcode() ||
1140811408
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
1140911409
Opc0 != ISD::SIGN_EXTEND_INREG)) {
1141011410
// fold (abs (sub nsw x, y)) -> abds(x, y)
1141111411
// Don't fold this for unsupported types as we lose the NSW handling.
11412-
if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11413-
TLI.preferABDSToABSWithNSW(VT)) {
11412+
if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11413+
(AbsOp0->getFlags().hasNoSignedWrap() ||
11414+
DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
1141411415
SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
1141511416
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
1141611417
}
11418+
// fold (abs (sub x, y)) -> abdu(x, y)
11419+
if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11420+
DAG.SignBitIsZero(Op1)) {
11421+
SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11422+
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11423+
}
1141711424
return SDValue();
1141811425
}
1141911426

llvm/test/CodeGen/AArch64/abd-combine.ll

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
1818
; CHECK-LABEL: abdu_const:
1919
; CHECK: // %bb.0:
2020
; CHECK-NEXT: movi v1.4s, #1
21-
; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
22-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
23-
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
24-
; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
25-
; CHECK-NEXT: abs v1.4s, v1.4s
26-
; CHECK-NEXT: abs v0.4s, v0.4s
27-
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
21+
; CHECK-NEXT: ushll v2.4s, v0.4h, #0
22+
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
23+
; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
24+
; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
25+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
2826
; CHECK-NEXT: ret
2927
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
3028
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
3735
; CHECK-LABEL: abdu_const_lhs:
3836
; CHECK: // %bb.0:
3937
; CHECK-NEXT: movi v1.4s, #1
40-
; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h
41-
; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
42-
; CHECK-NEXT: abs v0.4s, v0.4s
43-
; CHECK-NEXT: abs v1.4s, v2.4s
38+
; CHECK-NEXT: ushll v2.4s, v0.4h, #0
39+
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
40+
; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
41+
; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
4442
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
4543
; CHECK-NEXT: ret
4644
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
@@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
5351
define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
5452
; CHECK-LABEL: abdu_const_zero:
5553
; CHECK: // %bb.0:
56-
; CHECK-NEXT: movi v1.2d, #0000000000000000
57-
; CHECK-NEXT: ushll v2.4s, v0.4h, #0
58-
; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
59-
; CHECK-NEXT: neg v1.4s, v2.4s
60-
; CHECK-NEXT: abs v0.4s, v0.4s
61-
; CHECK-NEXT: abs v1.4s, v1.4s
62-
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
6354
; CHECK-NEXT: ret
6455
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
6556
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
@@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
328319
; CHECK-LABEL: abds_const:
329320
; CHECK: // %bb.0:
330321
; CHECK-NEXT: movi v1.4s, #1
331-
; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
332-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
333-
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
334-
; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
335-
; CHECK-NEXT: abs v1.4s, v1.4s
336-
; CHECK-NEXT: abs v0.4s, v0.4s
337-
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
322+
; CHECK-NEXT: sshll v2.4s, v0.4h, #0
323+
; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
324+
; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
325+
; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
326+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
338327
; CHECK-NEXT: ret
339328
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
340329
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
347336
; CHECK-LABEL: abds_const_lhs:
348337
; CHECK: // %bb.0:
349338
; CHECK-NEXT: movi v1.4s, #1
350-
; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h
351-
; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
352-
; CHECK-NEXT: abs v0.4s, v0.4s
353-
; CHECK-NEXT: abs v1.4s, v2.4s
339+
; CHECK-NEXT: sshll v2.4s, v0.4h, #0
340+
; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
341+
; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
342+
; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
354343
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
355344
; CHECK-NEXT: ret
356345
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
@@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
363352
define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
364353
; CHECK-LABEL: abds_const_zero:
365354
; CHECK: // %bb.0:
366-
; CHECK-NEXT: movi v1.2d, #0000000000000000
367-
; CHECK-NEXT: sshll v2.4s, v0.4h, #0
368-
; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
369-
; CHECK-NEXT: neg v1.4s, v2.4s
355+
; CHECK-NEXT: sshll v1.4s, v0.4h, #0
356+
; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
370357
; CHECK-NEXT: abs v0.4s, v0.4s
371358
; CHECK-NEXT: abs v1.4s, v1.4s
372359
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h

llvm/test/CodeGen/AArch64/sve-abd.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,7 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
283283
; CHECK-NEXT: ptrue p0.s
284284
; CHECK-NEXT: and z0.s, z0.s, #0xff
285285
; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
286-
; CHECK-NEXT: sub z0.s, z0.s, z1.s
287-
; CHECK-NEXT: abs z0.s, p0/m, z0.s
286+
; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s
288287
; CHECK-NEXT: ret
289288
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
290289
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>

llvm/test/CodeGen/RISCV/rvv/abd.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,10 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
316316
; CHECK: # %bb.0:
317317
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
318318
; CHECK-NEXT: vzext.vf4 v10, v8
319-
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
320-
; CHECK-NEXT: vsext.vf2 v8, v9
321-
; CHECK-NEXT: vwsub.wv v10, v10, v8
322-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
323-
; CHECK-NEXT: vrsub.vi v8, v10, 0
324-
; CHECK-NEXT: vmax.vv v8, v10, v8
319+
; CHECK-NEXT: vsext.vf4 v12, v9
320+
; CHECK-NEXT: vmin.vv v8, v10, v12
321+
; CHECK-NEXT: vmax.vv v10, v10, v12
322+
; CHECK-NEXT: vsub.vv v8, v10, v8
325323
; CHECK-NEXT: ret
326324
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
327325
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>

0 commit comments

Comments
 (0)