Skip to content

Commit 0b6df40

Browse files
committed
[AArch64] Combine ISD::AND into AArch64ISD::ANDS
If we already have a AArch64ISD::ANDS node with identical operands, we can merge any ISD::AND into it, reducing the instruction count by calculating the value and the flags in a single operation. This code is taken from the X86 backend, and could also handle AArch64ISD::ADDS and AArch64ISD::SUBS, but I couldn't find any test cases where it came up. Differential Revision: https://reviews.llvm.org/D118584
1 parent d874091 commit 0b6df40

File tree

2 files changed

+33
-10
lines changed

2 files changed

+33
-10
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17351,6 +17351,31 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
1735117351
return SDValue();
1735217352
}
1735317353

17354+
// Combines for S forms of generic opcodes (AArch64ISD::ANDS into ISD::AND for
17355+
// example). NOTE: This could be used for ADDS and SUBS too, if we can find test
17356+
// cases.
17357+
static SDValue performANDSCombine(SDNode *N,
17358+
TargetLowering::DAGCombinerInfo &DCI) {
17359+
SDLoc DL(N);
17360+
SDValue LHS = N->getOperand(0);
17361+
SDValue RHS = N->getOperand(1);
17362+
EVT VT = N->getValueType(0);
17363+
17364+
// If the flag result isn't used, convert back to a generic opcode.
17365+
if (!N->hasAnyUseOfValue(1)) {
17366+
SDValue Res = DCI.DAG.getNode(ISD::AND, DL, VT, LHS, RHS);
17367+
return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
17368+
DL);
17369+
}
17370+
17371+
// Combine identical generic nodes into this node, re-using the result.
17372+
if (SDNode *GenericAddSub =
17373+
DCI.DAG.getNodeIfExists(ISD::AND, DCI.DAG.getVTList(VT), {LHS, RHS}))
17374+
DCI.CombineTo(GenericAddSub, SDValue(N, 0));
17375+
17376+
return SDValue();
17377+
}
17378+
1735417379
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
1735517380
// setcc_merge_zero pred
1735617381
// (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne
@@ -18415,6 +18440,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1841518440
return performTBZCombine(N, DCI, DAG);
1841618441
case AArch64ISD::CSEL:
1841718442
return performCSELCombine(N, DCI, DAG);
18443+
case AArch64ISD::ANDS:
18444+
return performANDSCombine(N, DCI);
1841818445
case AArch64ISD::DUP:
1841918446
return performPostLD1Combine(N, DCI, false);
1842018447
case AArch64ISD::NVCAST:

llvm/test/CodeGen/AArch64/peephole-and-tst.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,7 @@ do.end: ; preds = %4
126126
define i64 @test_and1(i64 %x, i64 %y) {
127127
; CHECK-LABEL: test_and1:
128128
; CHECK: // %bb.0:
129-
; CHECK-NEXT: and x8, x0, #0x3
130-
; CHECK-NEXT: tst x0, #0x3
129+
; CHECK-NEXT: ands x8, x0, #0x3
131130
; CHECK-NEXT: csel x0, x8, x1, eq
132131
; CHECK-NEXT: ret
133132
%a = and i64 %x, 3
@@ -151,22 +150,20 @@ define i64 @test_and2(i64 %x, i64 %y) {
151150
define i64 @test_and3(i64 %x, i64 %y) {
152151
; CHECK-LABEL: test_and3:
153152
; CHECK: // %bb.0:
154-
; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
153+
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
155154
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
156155
; CHECK-NEXT: .cfi_def_cfa_offset 32
157156
; CHECK-NEXT: .cfi_offset w19, -8
158157
; CHECK-NEXT: .cfi_offset w20, -16
159-
; CHECK-NEXT: .cfi_offset w21, -24
160158
; CHECK-NEXT: .cfi_offset w30, -32
161159
; CHECK-NEXT: mov x20, x0
162-
; CHECK-NEXT: and x21, x0, #0x3
163160
; CHECK-NEXT: mov x0, xzr
164161
; CHECK-NEXT: mov x19, x1
165162
; CHECK-NEXT: bl callee
166-
; CHECK-NEXT: tst x20, #0x3
167-
; CHECK-NEXT: csel x0, x21, x19, eq
163+
; CHECK-NEXT: ands x8, x20, #0x3
164+
; CHECK-NEXT: csel x0, x8, x19, eq
168165
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
169-
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
166+
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
170167
; CHECK-NEXT: ret
171168
%a = and i64 %x, 3
172169
%b = call i64 @callee(i64 0)
@@ -185,8 +182,7 @@ define i64 @test_and_4(i64 %x, i64 %y) {
185182
; CHECK-NEXT: mov x19, x0
186183
; CHECK-NEXT: ands x0, x0, #0x3
187184
; CHECK-NEXT: bl callee
188-
; CHECK-NEXT: tst x19, #0x3
189-
; CHECK-NEXT: and x8, x19, #0x3
185+
; CHECK-NEXT: ands x8, x19, #0x3
190186
; CHECK-NEXT: csel x0, x8, x0, eq
191187
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
192188
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)