Skip to content

Commit 514d845

Browse files
committed
Remove UnsafeFPMath in combineRepeatedFPDivisors
1 parent 4a9bbb4 commit 514d845

File tree

3 files changed

+55
-57
lines changed

3 files changed

+55
-57
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18209,9 +18209,8 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
1820918209
// TODO: Limit this transform based on optsize/minsize - it always creates at
1821018210
// least 1 extra instruction. But the perf win may be substantial enough
1821118211
// that only minsize should restrict this.
18212-
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
1821318212
const SDNodeFlags Flags = N->getFlags();
18214-
if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
18213+
if (LegalDAG || !Flags.hasAllowReciprocal())
1821518214
return SDValue();
1821618215

1821718216
// Skip if current node is a reciprocal/fneg-reciprocal.
@@ -18248,7 +18247,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
1824818247

1824918248
// This division is eligible for optimization only if global unsafe math
1825018249
// is enabled or if this division allows reciprocal formation.
18251-
if (UnsafeMath || U->getFlags().hasAllowReciprocal())
18250+
if (U->getFlags().hasAllowReciprocal())
1825218251
Users.insert(U);
1825318252
}
1825418253
}

llvm/test/CodeGen/AArch64/fdiv-combine.ll

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
; a / D; b / D; c / D;
1212
; =>
1313
; recip = 1.0 / D; a * recip; b * recip; c * recip;
14-
define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
14+
define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
1515
; CHECK-SD-LABEL: three_fdiv_float:
1616
; CHECK-SD: // %bb.0:
1717
; CHECK-SD-NEXT: fmov s4, #1.00000000
@@ -28,14 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
2828
; CHECK-GI-NEXT: fdiv s2, s3, s0
2929
; CHECK-GI-NEXT: fmov s0, s4
3030
; CHECK-GI-NEXT: b foo_3f
31-
%div = fdiv float %a, %D
32-
%div1 = fdiv float %b, %D
33-
%div2 = fdiv float %c, %D
31+
%div = fdiv arcp float %a, %D
32+
%div1 = fdiv arcp float %b, %D
33+
%div2 = fdiv arcp float %c, %D
3434
tail call void @foo_3f(float %div, float %div1, float %div2)
3535
ret void
3636
}
3737

38-
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
38+
define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
3939
; CHECK-SD-LABEL: three_fdiv_double:
4040
; CHECK-SD: // %bb.0:
4141
; CHECK-SD-NEXT: fmov d4, #1.00000000
@@ -52,14 +52,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
5252
; CHECK-GI-NEXT: fdiv d2, d3, d0
5353
; CHECK-GI-NEXT: fmov d0, d4
5454
; CHECK-GI-NEXT: b foo_3d
55-
%div = fdiv double %a, %D
56-
%div1 = fdiv double %b, %D
57-
%div2 = fdiv double %c, %D
55+
%div = fdiv arcp double %a, %D
56+
%div1 = fdiv arcp double %b, %D
57+
%div2 = fdiv arcp double %c, %D
5858
tail call void @foo_3d(double %div, double %div1, double %div2)
5959
ret void
6060
}
6161

62-
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
62+
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
6363
; CHECK-SD-LABEL: three_fdiv_4xfloat:
6464
; CHECK-SD: // %bb.0:
6565
; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
@@ -76,14 +76,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
7676
; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
7777
; CHECK-GI-NEXT: mov v0.16b, v4.16b
7878
; CHECK-GI-NEXT: b foo_3_4xf
79-
%div = fdiv <4 x float> %a, %D
80-
%div1 = fdiv <4 x float> %b, %D
81-
%div2 = fdiv <4 x float> %c, %D
79+
%div = fdiv arcp <4 x float> %a, %D
80+
%div1 = fdiv arcp <4 x float> %b, %D
81+
%div2 = fdiv arcp <4 x float> %c, %D
8282
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
8383
ret void
8484
}
8585

86-
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
86+
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
8787
; CHECK-SD-LABEL: three_fdiv_2xdouble:
8888
; CHECK-SD: // %bb.0:
8989
; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
@@ -100,42 +100,42 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double>
100100
; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
101101
; CHECK-GI-NEXT: mov v0.16b, v4.16b
102102
; CHECK-GI-NEXT: b foo_3_2xd
103-
%div = fdiv <2 x double> %a, %D
104-
%div1 = fdiv <2 x double> %b, %D
105-
%div2 = fdiv <2 x double> %c, %D
103+
%div = fdiv arcp <2 x double> %a, %D
104+
%div1 = fdiv arcp <2 x double> %b, %D
105+
%div2 = fdiv arcp <2 x double> %c, %D
106106
tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
107107
ret void
108108
}
109109

110110
; Following test cases check we never combine two FDIVs if neither of them
111111
; calculates a reciprocal.
112-
define void @two_fdiv_float(float %D, float %a, float %b) #0 {
112+
define void @two_fdiv_float(float %D, float %a, float %b) {
113113
; CHECK-LABEL: two_fdiv_float:
114114
; CHECK: // %bb.0:
115115
; CHECK-NEXT: fdiv s3, s1, s0
116116
; CHECK-NEXT: fdiv s1, s2, s0
117117
; CHECK-NEXT: fmov s0, s3
118118
; CHECK-NEXT: b foo_2f
119-
%div = fdiv float %a, %D
120-
%div1 = fdiv float %b, %D
119+
%div = fdiv arcp float %a, %D
120+
%div1 = fdiv arcp float %b, %D
121121
tail call void @foo_2f(float %div, float %div1)
122122
ret void
123123
}
124124

125-
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
125+
define void @two_fdiv_double(double %D, double %a, double %b) {
126126
; CHECK-LABEL: two_fdiv_double:
127127
; CHECK: // %bb.0:
128128
; CHECK-NEXT: fdiv d3, d1, d0
129129
; CHECK-NEXT: fdiv d1, d2, d0
130130
; CHECK-NEXT: fmov d0, d3
131131
; CHECK-NEXT: b foo_2d
132-
%div = fdiv double %a, %D
133-
%div1 = fdiv double %b, %D
132+
%div = fdiv arcp double %a, %D
133+
%div1 = fdiv arcp double %b, %D
134134
tail call void @foo_2d(double %div, double %div1)
135135
ret void
136136
}
137137

138-
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
138+
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
139139
; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
140140
; CHECK-SD: // %bb.0:
141141
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -157,14 +157,14 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
157157
; CHECK-GI-NEXT: b foo_3_4xf
158158
%D.ins = insertelement <4 x float> poison, float %D, i64 0
159159
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
160-
%div = fdiv <4 x float> %a, %splat
161-
%div1 = fdiv <4 x float> %b, %splat
162-
%div2 = fdiv <4 x float> %c, %splat
160+
%div = fdiv arcp <4 x float> %a, %splat
161+
%div1 = fdiv arcp <4 x float> %b, %splat
162+
%div2 = fdiv arcp <4 x float> %c, %splat
163163
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
164164
ret void
165165
}
166166

167-
define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
167+
define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #0 {
168168
; CHECK-SD-LABEL: splat_fdiv_v4f32:
169169
; CHECK-SD: // %bb.0: // %entry
170170
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -183,11 +183,11 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
183183
entry:
184184
%D.ins = insertelement <4 x float> poison, float %D, i64 0
185185
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
186-
%div = fdiv <4 x float> %a, %splat
186+
%div = fdiv arcp <4 x float> %a, %splat
187187
ret <4 x float> %div
188188
}
189189

190-
define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
190+
define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #0 {
191191
; CHECK-LABEL: splat_fdiv_nxv4f32:
192192
; CHECK: // %bb.0: // %entry
193193
; CHECK-NEXT: fmov s2, #1.00000000
@@ -198,11 +198,11 @@ define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %
198198
entry:
199199
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
200200
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
201-
%div = fdiv <vscale x 4 x float> %a, %splat
201+
%div = fdiv arcp <vscale x 4 x float> %a, %splat
202202
ret <vscale x 4 x float> %div
203203
}
204204

205-
define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
205+
define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
206206
; CHECK-LABEL: splat_three_fdiv_nxv4f32:
207207
; CHECK: // %bb.0: // %entry
208208
; CHECK-NEXT: fmov s4, #1.00000000
@@ -215,14 +215,14 @@ define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale
215215
entry:
216216
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
217217
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
218-
%div = fdiv <vscale x 4 x float> %a, %splat
219-
%div1 = fdiv <vscale x 4 x float> %b, %splat
220-
%div2 = fdiv <vscale x 4 x float> %c, %splat
218+
%div = fdiv arcp <vscale x 4 x float> %a, %splat
219+
%div1 = fdiv arcp <vscale x 4 x float> %b, %splat
220+
%div2 = fdiv arcp <vscale x 4 x float> %c, %splat
221221
tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
222222
ret void
223223
}
224224

225-
define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
225+
define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #0 {
226226
; CHECK-LABEL: splat_fdiv_nxv2f64:
227227
; CHECK: // %bb.0: // %entry
228228
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -237,7 +237,7 @@ entry:
237237
ret <vscale x 2 x double> %div
238238
}
239239

240-
define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
240+
define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
241241
; CHECK-LABEL: splat_two_fdiv_nxv2f64:
242242
; CHECK: // %bb.0: // %entry
243243
; CHECK-NEXT: fmov d3, #1.00000000
@@ -249,8 +249,8 @@ define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale
249249
entry:
250250
%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
251251
%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
252-
%div = fdiv <vscale x 2 x double> %a, %splat
253-
%div1 = fdiv <vscale x 2 x double> %b, %splat
252+
%div = fdiv arcp <vscale x 2 x double> %a, %splat
253+
%div1 = fdiv arcp <vscale x 2 x double> %b, %splat
254254
tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
255255
ret void
256256
}
@@ -264,5 +264,4 @@ declare void @foo_2d(double, double)
264264
declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
265265
declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
266266

267-
attributes #0 = { "unsafe-fp-math"="true" }
268-
attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }
267+
attributes #0 = { "target-features"="+sve" }

llvm/test/CodeGen/NVPTX/fast-math.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f
395395
ret float %w
396396
}
397397

398-
define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
398+
define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) {
399399
; CHECK-LABEL: repeated_div_fast(
400400
; CHECK: {
401401
; CHECK-NEXT: .reg .pred %p<2>;
@@ -416,14 +416,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0
416416
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
417417
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
418418
; CHECK-NEXT: ret;
419-
%x = fdiv float %a, %divisor
420-
%y = fdiv float %b, %divisor
421-
%z = fmul float %x, %y
419+
%x = fdiv arcp float %a, %divisor
420+
%y = fdiv contract arcp afn float %b, %divisor
421+
%z = fmul contract float %x, %y
422422
%w = select i1 %pred, float %z, float %y
423423
ret float %w
424424
}
425425

426-
define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) #0 {
426+
define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) {
427427
; CHECK-LABEL: repeated_div_fast_sel(
428428
; CHECK: {
429429
; CHECK-NEXT: .reg .pred %p<2>;
@@ -441,13 +441,13 @@ define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor
441441
; CHECK-NEXT: div.approx.f32 %r5, %r3, %r4;
442442
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
443443
; CHECK-NEXT: ret;
444-
%x = fdiv float %a, %divisor
445-
%y = fdiv float %b, %divisor
444+
%x = fdiv afn float %a, %divisor
445+
%y = fdiv afn float %b, %divisor
446446
%w = select i1 %pred, float %x, float %y
447447
ret float %w
448448
}
449449

450-
define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
450+
define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
451451
; CHECK-LABEL: repeated_div_fast_ftz(
452452
; CHECK: {
453453
; CHECK-NEXT: .reg .pred %p<2>;
@@ -468,14 +468,14 @@ define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor
468468
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
469469
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
470470
; CHECK-NEXT: ret;
471-
%x = fdiv float %a, %divisor
472-
%y = fdiv float %b, %divisor
473-
%z = fmul float %x, %y
471+
%x = fdiv arcp float %a, %divisor
472+
%y = fdiv contract arcp afn float %b, %divisor
473+
%z = fmul contract float %x, %y
474474
%w = select i1 %pred, float %z, float %y
475475
ret float %w
476476
}
477477

478-
define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
478+
define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #1 {
479479
; CHECK-LABEL: repeated_div_fast_ftz_sel(
480480
; CHECK: {
481481
; CHECK-NEXT: .reg .pred %p<2>;
@@ -493,8 +493,8 @@ define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %div
493493
; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4;
494494
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
495495
; CHECK-NEXT: ret;
496-
%x = fdiv float %a, %divisor
497-
%y = fdiv float %b, %divisor
496+
%x = fdiv afn float %a, %divisor
497+
%y = fdiv afn float %b, %divisor
498498
%w = select i1 %pred, float %x, float %y
499499
ret float %w
500500
}

0 commit comments

Comments
 (0)