Skip to content

Commit 0edc98c

Browse files
authored
[ARM] Copy SMAX(lhs, 0) and SMIN(lhs, 0) patterns from AArch64 to ARM (#146565)
They work on ARM too.
1 parent 0736f33 commit 0edc98c

File tree

4 files changed

+204
-3
lines changed

4 files changed

+204
-3
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5519,6 +5519,24 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
55195519
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
55205520
return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
55215521
}
5522+
5523+
// Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
5524+
// (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
5525+
// (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
5526+
// Both require less instructions than compare and conditional select.
5527+
if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC &&
5528+
RHSC->isZero() && CFVal && CFVal->isZero() &&
5529+
LHS.getValueType() == RHS.getValueType()) {
5530+
EVT VT = LHS.getValueType();
5531+
SDValue Shift =
5532+
DAG.getNode(ISD::SRA, dl, VT, LHS,
5533+
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
5534+
5535+
if (CC == ISD::SETGT)
5536+
Shift = DAG.getNOT(dl, Shift, VT);
5537+
5538+
return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
5539+
}
55225540
}
55235541

55245542
if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=armv7a < %s | FileCheck %s --check-prefix=ARM
3+
; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB
4+
; RUN: llc -mtriple=armv7m < %s | FileCheck %s --check-prefix=THUMB2
5+
; RUN: llc -mtriple=thumbv8.1m.main < %s | FileCheck %s --check-prefix=THUMBV8
6+
7+
declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone
8+
9+
define i8 @smaxi8_zero(i8 %a) {
10+
; ARM-LABEL: smaxi8_zero:
11+
; ARM: @ %bb.0:
12+
; ARM-NEXT: sxtb r0, r0
13+
; ARM-NEXT: bic r0, r0, r0, asr #31
14+
; ARM-NEXT: bx lr
15+
;
16+
; THUMB-LABEL: smaxi8_zero:
17+
; THUMB: @ %bb.0:
18+
; THUMB-NEXT: sxtb r0, r0
19+
; THUMB-NEXT: asrs r1, r0, #31
20+
; THUMB-NEXT: bics r0, r1
21+
; THUMB-NEXT: bx lr
22+
;
23+
; THUMB2-LABEL: smaxi8_zero:
24+
; THUMB2: @ %bb.0:
25+
; THUMB2-NEXT: sxtb r0, r0
26+
; THUMB2-NEXT: bic.w r0, r0, r0, asr #31
27+
; THUMB2-NEXT: bx lr
28+
;
29+
; THUMBV8-LABEL: smaxi8_zero:
30+
; THUMBV8: @ %bb.0:
31+
; THUMBV8-NEXT: sxtb r0, r0
32+
; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31
33+
; THUMBV8-NEXT: bx lr
34+
%c = call i8 @llvm.smax.i8(i8 %a, i8 0)
35+
ret i8 %c
36+
}
37+
38+
declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone
39+
40+
define i16 @smaxi16_zero(i16 %a) {
41+
; ARM-LABEL: smaxi16_zero:
42+
; ARM: @ %bb.0:
43+
; ARM-NEXT: sxth r0, r0
44+
; ARM-NEXT: bic r0, r0, r0, asr #31
45+
; ARM-NEXT: bx lr
46+
;
47+
; THUMB-LABEL: smaxi16_zero:
48+
; THUMB: @ %bb.0:
49+
; THUMB-NEXT: sxth r0, r0
50+
; THUMB-NEXT: asrs r1, r0, #31
51+
; THUMB-NEXT: bics r0, r1
52+
; THUMB-NEXT: bx lr
53+
;
54+
; THUMB2-LABEL: smaxi16_zero:
55+
; THUMB2: @ %bb.0:
56+
; THUMB2-NEXT: sxth r0, r0
57+
; THUMB2-NEXT: bic.w r0, r0, r0, asr #31
58+
; THUMB2-NEXT: bx lr
59+
;
60+
; THUMBV8-LABEL: smaxi16_zero:
61+
; THUMBV8: @ %bb.0:
62+
; THUMBV8-NEXT: sxth r0, r0
63+
; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31
64+
; THUMBV8-NEXT: bx lr
65+
%c = call i16 @llvm.smax.i16(i16 %a, i16 0)
66+
ret i16 %c
67+
}
68+
69+
declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone
70+
71+
define i32 @smaxi32_zero(i32 %a) {
72+
; ARM-LABEL: smaxi32_zero:
73+
; ARM: @ %bb.0:
74+
; ARM-NEXT: bic r0, r0, r0, asr #31
75+
; ARM-NEXT: bx lr
76+
;
77+
; THUMB-LABEL: smaxi32_zero:
78+
; THUMB: @ %bb.0:
79+
; THUMB-NEXT: asrs r1, r0, #31
80+
; THUMB-NEXT: bics r0, r1
81+
; THUMB-NEXT: bx lr
82+
;
83+
; THUMB2-LABEL: smaxi32_zero:
84+
; THUMB2: @ %bb.0:
85+
; THUMB2-NEXT: bic.w r0, r0, r0, asr #31
86+
; THUMB2-NEXT: bx lr
87+
;
88+
; THUMBV8-LABEL: smaxi32_zero:
89+
; THUMBV8: @ %bb.0:
90+
; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31
91+
; THUMBV8-NEXT: bx lr
92+
%c = call i32 @llvm.smax.i32(i32 %a, i32 0)
93+
ret i32 %c
94+
}
95+
96+
; SMIN
97+
98+
declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone
99+
100+
define i8 @smini8_zero(i8 %a) {
101+
; ARM-LABEL: smini8_zero:
102+
; ARM: @ %bb.0:
103+
; ARM-NEXT: sxtb r0, r0
104+
; ARM-NEXT: and r0, r0, r0, asr #31
105+
; ARM-NEXT: bx lr
106+
;
107+
; THUMB-LABEL: smini8_zero:
108+
; THUMB: @ %bb.0:
109+
; THUMB-NEXT: sxtb r1, r0
110+
; THUMB-NEXT: asrs r0, r1, #31
111+
; THUMB-NEXT: ands r0, r1
112+
; THUMB-NEXT: bx lr
113+
;
114+
; THUMB2-LABEL: smini8_zero:
115+
; THUMB2: @ %bb.0:
116+
; THUMB2-NEXT: sxtb r0, r0
117+
; THUMB2-NEXT: and.w r0, r0, r0, asr #31
118+
; THUMB2-NEXT: bx lr
119+
;
120+
; THUMBV8-LABEL: smini8_zero:
121+
; THUMBV8: @ %bb.0:
122+
; THUMBV8-NEXT: sxtb r0, r0
123+
; THUMBV8-NEXT: and.w r0, r0, r0, asr #31
124+
; THUMBV8-NEXT: bx lr
125+
%c = call i8 @llvm.smin.i8(i8 %a, i8 0)
126+
ret i8 %c
127+
}
128+
129+
declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone
130+
131+
define i16 @smini16_zero(i16 %a) {
132+
; ARM-LABEL: smini16_zero:
133+
; ARM: @ %bb.0:
134+
; ARM-NEXT: sxth r0, r0
135+
; ARM-NEXT: and r0, r0, r0, asr #31
136+
; ARM-NEXT: bx lr
137+
;
138+
; THUMB-LABEL: smini16_zero:
139+
; THUMB: @ %bb.0:
140+
; THUMB-NEXT: sxth r1, r0
141+
; THUMB-NEXT: asrs r0, r1, #31
142+
; THUMB-NEXT: ands r0, r1
143+
; THUMB-NEXT: bx lr
144+
;
145+
; THUMB2-LABEL: smini16_zero:
146+
; THUMB2: @ %bb.0:
147+
; THUMB2-NEXT: sxth r0, r0
148+
; THUMB2-NEXT: and.w r0, r0, r0, asr #31
149+
; THUMB2-NEXT: bx lr
150+
;
151+
; THUMBV8-LABEL: smini16_zero:
152+
; THUMBV8: @ %bb.0:
153+
; THUMBV8-NEXT: sxth r0, r0
154+
; THUMBV8-NEXT: and.w r0, r0, r0, asr #31
155+
; THUMBV8-NEXT: bx lr
156+
%c = call i16 @llvm.smin.i16(i16 %a, i16 0)
157+
ret i16 %c
158+
}
159+
160+
declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone
161+
162+
define i32 @smini32_zero(i32 %a) {
163+
; ARM-LABEL: smini32_zero:
164+
; ARM: @ %bb.0:
165+
; ARM-NEXT: and r0, r0, r0, asr #31
166+
; ARM-NEXT: bx lr
167+
;
168+
; THUMB-LABEL: smini32_zero:
169+
; THUMB: @ %bb.0:
170+
; THUMB-NEXT: asrs r1, r0, #31
171+
; THUMB-NEXT: ands r0, r1
172+
; THUMB-NEXT: bx lr
173+
;
174+
; THUMB2-LABEL: smini32_zero:
175+
; THUMB2: @ %bb.0:
176+
; THUMB2-NEXT: and.w r0, r0, r0, asr #31
177+
; THUMB2-NEXT: bx lr
178+
;
179+
; THUMBV8-LABEL: smini32_zero:
180+
; THUMBV8: @ %bb.0:
181+
; THUMBV8-NEXT: and.w r0, r0, r0, asr #31
182+
; THUMBV8-NEXT: bx lr
183+
%c = call i32 @llvm.smin.i32(i32 %a, i32 0)
184+
ret i32 %c
185+
}

llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(ptr %pSrc, i32 %block
66
; CHECK: @ %bb.0: @ %entry
77
; CHECK-NEXT: .save {r7, lr}
88
; CHECK-NEXT: push {r7, lr}
9-
; CHECK-NEXT: subs r2, r1, #4
10-
; CHECK-NEXT: movw r3, #0
9+
; CHECK-NEXT: movs r3, #0
1110
; CHECK-NEXT: movt r3, #65408
1211
; CHECK-NEXT: vdup.32 q0, r3
1312
; CHECK-NEXT: dlstp.32 lr, r1

llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ define void @arm_cmplx_mag_squared_q15_mve(ptr %pSrc, ptr %pDst, i32 %blockSize)
55
; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve:
66
; CHECK: @ %bb.0: @ %entry
77
; CHECK-NEXT: push {r7, lr}
8-
; CHECK-NEXT: subs.w r3, r2, #8
98
; CHECK-NEXT: dlstp.16 lr, r2
109
; CHECK-NEXT: .LBB0_1: @ %do.body
1110
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

0 commit comments

Comments
 (0)