Skip to content

Commit 6d7be75

Browse files
committed
[AArch64][GISel] Handle small vector fadd reductions.
This adds some test cases for v2 and v3 half vector fadd reductions. In doing so it appears that GlobalISel was having trouble lowering the smaller vector sizes. Add some basic handling by widening to a power2 or scalarizing if necessary. Larger vectors are still having problems as FewerElements currently requires the number of elements to be modula the vector length.
1 parent 38cd903 commit 6d7be75

File tree

3 files changed

+202
-18
lines changed

3 files changed

+202
-18
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,6 +1288,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
12881288
.clampMaxNumElements(1, s64, 2)
12891289
.clampMaxNumElements(1, s32, 4)
12901290
.clampMaxNumElements(1, s16, 8)
1291+
.moreElementsToNextPow2(1)
1292+
.scalarize(1)
12911293
.lower();
12921294

12931295
// For fmul reductions we need to split up into individual operations. We

llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,106 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
1919
ret float %r
2020
}
2121

22+
define half @add_v2HalfH(<2 x half> %bin.rdx) {
23+
; CHECK-SD-NOFP16-LABEL: add_v2HalfH:
24+
; CHECK-SD-NOFP16: // %bb.0:
25+
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
26+
; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
27+
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
28+
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
29+
; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
30+
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
31+
; CHECK-SD-NOFP16-NEXT: ret
32+
;
33+
; CHECK-SD-FP16-LABEL: add_v2HalfH:
34+
; CHECK-SD-FP16: // %bb.0:
35+
; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
36+
; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
37+
; CHECK-SD-FP16-NEXT: ret
38+
;
39+
; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
40+
; CHECK-GI-NOFP16: // %bb.0:
41+
; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
42+
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
43+
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
44+
; CHECK-GI-NOFP16-NEXT: fmov s1, w8
45+
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1]
46+
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
47+
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
48+
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
49+
; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
50+
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
51+
; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
52+
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
53+
; CHECK-GI-NOFP16-NEXT: ret
54+
;
55+
; CHECK-GI-FP16-LABEL: add_v2HalfH:
56+
; CHECK-GI-FP16: // %bb.0:
57+
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
58+
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
59+
; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
60+
; CHECK-GI-FP16-NEXT: ret
61+
%r = call half @llvm.vector.reduce.fadd.f16.v2f16(half -0.0, <2 x half> %bin.rdx)
62+
ret half %r
63+
}
64+
65+
define half @add_v3HalfH(<3 x half> %bin.rdx) {
66+
; CHECK-SD-NOFP16-LABEL: add_v3HalfH:
67+
; CHECK-SD-NOFP16: // %bb.0:
68+
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
69+
; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
70+
; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
71+
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[2]
72+
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
73+
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
74+
; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1
75+
; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
76+
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
77+
; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
78+
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
79+
; CHECK-SD-NOFP16-NEXT: ret
80+
;
81+
; CHECK-SD-FP16-LABEL: add_v3HalfH:
82+
; CHECK-SD-FP16: // %bb.0:
83+
; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
84+
; CHECK-SD-FP16-NEXT: mov h1, v0.h[2]
85+
; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
86+
; CHECK-SD-FP16-NEXT: fadd h0, h0, h1
87+
; CHECK-SD-FP16-NEXT: ret
88+
;
89+
; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
90+
; CHECK-GI-NOFP16: // %bb.0:
91+
; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
92+
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
93+
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
94+
; CHECK-GI-NOFP16-NEXT: fmov s1, w8
95+
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
96+
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
97+
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
98+
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[2]
99+
; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
100+
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
101+
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
102+
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
103+
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
104+
; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
105+
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
106+
; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
107+
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
108+
; CHECK-GI-NOFP16-NEXT: ret
109+
;
110+
; CHECK-GI-FP16-LABEL: add_v3HalfH:
111+
; CHECK-GI-FP16: // %bb.0:
112+
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
113+
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
114+
; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
115+
; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
116+
; CHECK-GI-FP16-NEXT: fadd h0, h0, h2
117+
; CHECK-GI-FP16-NEXT: ret
118+
%r = call half @llvm.vector.reduce.fadd.f16.v3f16(half -0.0, <3 x half> %bin.rdx)
119+
ret half %r
120+
}
121+
22122
define half @add_HalfH(<4 x half> %bin.rdx) {
23123
; CHECK-SD-NOFP16-LABEL: add_HalfH:
24124
; CHECK-SD-NOFP16: // %bb.0:

llvm/test/CodeGen/AArch64/vecreduce-fadd.ll

Lines changed: 100 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,88 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
1313
ret float %r
1414
}
1515

16+
define half @add_v2HalfH(<2 x half> %bin.rdx) {
17+
; CHECK-SD-NOFP16-LABEL: add_v2HalfH:
18+
; CHECK-SD-NOFP16: // %bb.0:
19+
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
20+
; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
21+
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
22+
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
23+
; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
24+
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
25+
; CHECK-SD-NOFP16-NEXT: ret
26+
;
27+
; CHECK-SD-FP16-LABEL: add_v2HalfH:
28+
; CHECK-SD-FP16: // %bb.0:
29+
; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
30+
; CHECK-SD-FP16-NEXT: mov v0.h[2], wzr
31+
; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
32+
; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
33+
; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
34+
; CHECK-SD-FP16-NEXT: ret
35+
;
36+
; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
37+
; CHECK-GI-NOFP16: // %bb.0:
38+
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
39+
; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
40+
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
41+
; CHECK-GI-NOFP16-NEXT: ret
42+
;
43+
; CHECK-GI-FP16-LABEL: add_v2HalfH:
44+
; CHECK-GI-FP16: // %bb.0:
45+
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
46+
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
47+
; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
48+
; CHECK-GI-FP16-NEXT: ret
49+
%r = call fast half @llvm.vector.reduce.fadd.f16.v2f16(half -0.0, <2 x half> %bin.rdx)
50+
ret half %r
51+
}
52+
53+
define half @add_v3HalfH(<3 x half> %bin.rdx) {
54+
; CHECK-SD-NOFP16-LABEL: add_v3HalfH:
55+
; CHECK-SD-NOFP16: // %bb.0:
56+
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
57+
; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
58+
; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
59+
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[2]
60+
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
61+
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
62+
; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1
63+
; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
64+
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
65+
; CHECK-SD-NOFP16-NEXT: ret
66+
;
67+
; CHECK-SD-FP16-LABEL: add_v3HalfH:
68+
; CHECK-SD-FP16: // %bb.0:
69+
; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
70+
; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
71+
; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
72+
; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
73+
; CHECK-SD-FP16-NEXT: ret
74+
;
75+
; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
76+
; CHECK-GI-NOFP16: // %bb.0:
77+
; CHECK-GI-NOFP16-NEXT: movi v1.2s, #128, lsl #24
78+
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
79+
; CHECK-GI-NOFP16-NEXT: mov v0.s[3], v1.s[0]
80+
; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
81+
; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
82+
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
83+
; CHECK-GI-NOFP16-NEXT: ret
84+
;
85+
; CHECK-GI-FP16-LABEL: add_v3HalfH:
86+
; CHECK-GI-FP16: // %bb.0:
87+
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
88+
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
89+
; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
90+
; CHECK-GI-FP16-NEXT: mov v0.h[3], v1.h[0]
91+
; CHECK-GI-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
92+
; CHECK-GI-FP16-NEXT: faddp h0, v0.2h
93+
; CHECK-GI-FP16-NEXT: ret
94+
%r = call fast half @llvm.vector.reduce.fadd.f16.v3f16(half -0.0, <3 x half> %bin.rdx)
95+
ret half %r
96+
}
97+
1698
define half @add_HalfH(<4 x half> %bin.rdx) {
1799
; CHECK-SD-NOFP16-LABEL: add_HalfH:
18100
; CHECK-SD-NOFP16: // %bb.0:
@@ -239,15 +321,15 @@ define float @fadd_reduction_v4f32_in_loop(ptr %ptr.start) {
239321
; CHECK: // %bb.0: // %entry
240322
; CHECK-NEXT: movi d0, #0000000000000000
241323
; CHECK-NEXT: mov x8, xzr
242-
; CHECK-NEXT: .LBB9_1: // %loop
324+
; CHECK-NEXT: .LBB11_1: // %loop
243325
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
244326
; CHECK-NEXT: ldr q1, [x0, x8]
245327
; CHECK-NEXT: add x8, x8, #16
246328
; CHECK-NEXT: cmp w8, #112
247329
; CHECK-NEXT: faddp v1.4s, v1.4s, v1.4s
248330
; CHECK-NEXT: faddp s1, v1.2s
249331
; CHECK-NEXT: fadd s0, s1, s0
250-
; CHECK-NEXT: b.ne .LBB9_1
332+
; CHECK-NEXT: b.ne .LBB11_1
251333
; CHECK-NEXT: // %bb.2: // %exit
252334
; CHECK-NEXT: ret
253335
entry:
@@ -276,7 +358,7 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
276358
; CHECK-SD-NOFP16: // %bb.0: // %entry
277359
; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
278360
; CHECK-SD-NOFP16-NEXT: mov x8, xzr
279-
; CHECK-SD-NOFP16-NEXT: .LBB10_1: // %loop
361+
; CHECK-SD-NOFP16-NEXT: .LBB12_1: // %loop
280362
; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
281363
; CHECK-SD-NOFP16-NEXT: ldr d1, [x0, x8]
282364
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
@@ -294,31 +376,31 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
294376
; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
295377
; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
296378
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
297-
; CHECK-SD-NOFP16-NEXT: b.ne .LBB10_1
379+
; CHECK-SD-NOFP16-NEXT: b.ne .LBB12_1
298380
; CHECK-SD-NOFP16-NEXT: // %bb.2: // %exit
299381
; CHECK-SD-NOFP16-NEXT: ret
300382
;
301383
; CHECK-SD-FP16-LABEL: fadd_reduction_v4f16_in_loop:
302384
; CHECK-SD-FP16: // %bb.0: // %entry
303385
; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
304386
; CHECK-SD-FP16-NEXT: mov x8, xzr
305-
; CHECK-SD-FP16-NEXT: .LBB10_1: // %loop
387+
; CHECK-SD-FP16-NEXT: .LBB12_1: // %loop
306388
; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
307389
; CHECK-SD-FP16-NEXT: ldr d1, [x0, x8]
308390
; CHECK-SD-FP16-NEXT: add x8, x8, #8
309391
; CHECK-SD-FP16-NEXT: cmp w8, #56
310392
; CHECK-SD-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
311393
; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
312394
; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
313-
; CHECK-SD-FP16-NEXT: b.ne .LBB10_1
395+
; CHECK-SD-FP16-NEXT: b.ne .LBB12_1
314396
; CHECK-SD-FP16-NEXT: // %bb.2: // %exit
315397
; CHECK-SD-FP16-NEXT: ret
316398
;
317399
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
318400
; CHECK-GI-NOFP16: // %bb.0: // %entry
319401
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
320402
; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
321-
; CHECK-GI-NOFP16-NEXT: .LBB10_1: // %loop
403+
; CHECK-GI-NOFP16-NEXT: .LBB12_1: // %loop
322404
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
323405
; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
324406
; CHECK-GI-NOFP16-NEXT: fmov s1, w9
@@ -333,7 +415,7 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
333415
; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
334416
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
335417
; CHECK-GI-NOFP16-NEXT: fmov w9, s0
336-
; CHECK-GI-NOFP16-NEXT: b.ne .LBB10_1
418+
; CHECK-GI-NOFP16-NEXT: b.ne .LBB12_1
337419
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
338420
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
339421
; CHECK-GI-NOFP16-NEXT: ret
@@ -342,15 +424,15 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
342424
; CHECK-GI-FP16: // %bb.0: // %entry
343425
; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
344426
; CHECK-GI-FP16-NEXT: mov x8, xzr
345-
; CHECK-GI-FP16-NEXT: .LBB10_1: // %loop
427+
; CHECK-GI-FP16-NEXT: .LBB12_1: // %loop
346428
; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
347429
; CHECK-GI-FP16-NEXT: ldr d1, [x0, x8]
348430
; CHECK-GI-FP16-NEXT: add x8, x8, #8
349431
; CHECK-GI-FP16-NEXT: cmp w8, #56
350432
; CHECK-GI-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
351433
; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
352434
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
353-
; CHECK-GI-FP16-NEXT: b.ne .LBB10_1
435+
; CHECK-GI-FP16-NEXT: b.ne .LBB12_1
354436
; CHECK-GI-FP16-NEXT: // %bb.2: // %exit
355437
; CHECK-GI-FP16-NEXT: ret
356438
entry:
@@ -379,7 +461,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
379461
; CHECK-SD-NOFP16: // %bb.0: // %entry
380462
; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
381463
; CHECK-SD-NOFP16-NEXT: mov x8, xzr
382-
; CHECK-SD-NOFP16-NEXT: .LBB11_1: // %loop
464+
; CHECK-SD-NOFP16-NEXT: .LBB13_1: // %loop
383465
; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
384466
; CHECK-SD-NOFP16-NEXT: ldr q1, [x0, x8]
385467
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
@@ -409,15 +491,15 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
409491
; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
410492
; CHECK-SD-NOFP16-NEXT: fadd s0, s2, s0
411493
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
412-
; CHECK-SD-NOFP16-NEXT: b.ne .LBB11_1
494+
; CHECK-SD-NOFP16-NEXT: b.ne .LBB13_1
413495
; CHECK-SD-NOFP16-NEXT: // %bb.2: // %exit
414496
; CHECK-SD-NOFP16-NEXT: ret
415497
;
416498
; CHECK-SD-FP16-LABEL: fadd_reduction_v8f16_in_loop:
417499
; CHECK-SD-FP16: // %bb.0: // %entry
418500
; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
419501
; CHECK-SD-FP16-NEXT: mov x8, xzr
420-
; CHECK-SD-FP16-NEXT: .LBB11_1: // %loop
502+
; CHECK-SD-FP16-NEXT: .LBB13_1: // %loop
421503
; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
422504
; CHECK-SD-FP16-NEXT: ldr q1, [x0, x8]
423505
; CHECK-SD-FP16-NEXT: add x8, x8, #8
@@ -426,15 +508,15 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
426508
; CHECK-SD-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
427509
; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
428510
; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
429-
; CHECK-SD-FP16-NEXT: b.ne .LBB11_1
511+
; CHECK-SD-FP16-NEXT: b.ne .LBB13_1
430512
; CHECK-SD-FP16-NEXT: // %bb.2: // %exit
431513
; CHECK-SD-FP16-NEXT: ret
432514
;
433515
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
434516
; CHECK-GI-NOFP16: // %bb.0: // %entry
435517
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
436518
; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
437-
; CHECK-GI-NOFP16-NEXT: .LBB11_1: // %loop
519+
; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop
438520
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
439521
; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
440522
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
@@ -451,7 +533,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
451533
; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
452534
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
453535
; CHECK-GI-NOFP16-NEXT: fmov w9, s0
454-
; CHECK-GI-NOFP16-NEXT: b.ne .LBB11_1
536+
; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1
455537
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
456538
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
457539
; CHECK-GI-NOFP16-NEXT: ret
@@ -460,7 +542,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
460542
; CHECK-GI-FP16: // %bb.0: // %entry
461543
; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
462544
; CHECK-GI-FP16-NEXT: mov x8, xzr
463-
; CHECK-GI-FP16-NEXT: .LBB11_1: // %loop
545+
; CHECK-GI-FP16-NEXT: .LBB13_1: // %loop
464546
; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
465547
; CHECK-GI-FP16-NEXT: ldr q1, [x0, x8]
466548
; CHECK-GI-FP16-NEXT: add x8, x8, #8
@@ -469,7 +551,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
469551
; CHECK-GI-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
470552
; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
471553
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
472-
; CHECK-GI-FP16-NEXT: b.ne .LBB11_1
554+
; CHECK-GI-FP16-NEXT: b.ne .LBB13_1
473555
; CHECK-GI-FP16-NEXT: // %bb.2: // %exit
474556
; CHECK-GI-FP16-NEXT: ret
475557
entry:

0 commit comments

Comments
 (0)