@@ -13,6 +13,88 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
13
13
ret float %r
14
14
}
15
15
16
+ define half @add_v2HalfH (<2 x half > %bin.rdx ) {
17
+ ; CHECK-SD-NOFP16-LABEL: add_v2HalfH:
18
+ ; CHECK-SD-NOFP16: // %bb.0:
19
+ ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
20
+ ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
21
+ ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
22
+ ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
23
+ ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
24
+ ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
25
+ ; CHECK-SD-NOFP16-NEXT: ret
26
+ ;
27
+ ; CHECK-SD-FP16-LABEL: add_v2HalfH:
28
+ ; CHECK-SD-FP16: // %bb.0:
29
+ ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
30
+ ; CHECK-SD-FP16-NEXT: mov v0.h[2], wzr
31
+ ; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
32
+ ; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
33
+ ; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
34
+ ; CHECK-SD-FP16-NEXT: ret
35
+ ;
36
+ ; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
37
+ ; CHECK-GI-NOFP16: // %bb.0:
38
+ ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
39
+ ; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
40
+ ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
41
+ ; CHECK-GI-NOFP16-NEXT: ret
42
+ ;
43
+ ; CHECK-GI-FP16-LABEL: add_v2HalfH:
44
+ ; CHECK-GI-FP16: // %bb.0:
45
+ ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
46
+ ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
47
+ ; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
48
+ ; CHECK-GI-FP16-NEXT: ret
49
+ %r = call fast half @llvm.vector.reduce.fadd.f16.v2f16 (half -0 .0 , <2 x half > %bin.rdx )
50
+ ret half %r
51
+ }
52
+
53
+ define half @add_v3HalfH (<3 x half > %bin.rdx ) {
54
+ ; CHECK-SD-NOFP16-LABEL: add_v3HalfH:
55
+ ; CHECK-SD-NOFP16: // %bb.0:
56
+ ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
57
+ ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
58
+ ; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
59
+ ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[2]
60
+ ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
61
+ ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
62
+ ; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1
63
+ ; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
64
+ ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
65
+ ; CHECK-SD-NOFP16-NEXT: ret
66
+ ;
67
+ ; CHECK-SD-FP16-LABEL: add_v3HalfH:
68
+ ; CHECK-SD-FP16: // %bb.0:
69
+ ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
70
+ ; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
71
+ ; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
72
+ ; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
73
+ ; CHECK-SD-FP16-NEXT: ret
74
+ ;
75
+ ; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
76
+ ; CHECK-GI-NOFP16: // %bb.0:
77
+ ; CHECK-GI-NOFP16-NEXT: movi v1.2s, #128, lsl #24
78
+ ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
79
+ ; CHECK-GI-NOFP16-NEXT: mov v0.s[3], v1.s[0]
80
+ ; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
81
+ ; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
82
+ ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
83
+ ; CHECK-GI-NOFP16-NEXT: ret
84
+ ;
85
+ ; CHECK-GI-FP16-LABEL: add_v3HalfH:
86
+ ; CHECK-GI-FP16: // %bb.0:
87
+ ; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
88
+ ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
89
+ ; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
90
+ ; CHECK-GI-FP16-NEXT: mov v0.h[3], v1.h[0]
91
+ ; CHECK-GI-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
92
+ ; CHECK-GI-FP16-NEXT: faddp h0, v0.2h
93
+ ; CHECK-GI-FP16-NEXT: ret
94
+ %r = call fast half @llvm.vector.reduce.fadd.f16.v3f16 (half -0 .0 , <3 x half > %bin.rdx )
95
+ ret half %r
96
+ }
97
+
16
98
define half @add_HalfH (<4 x half > %bin.rdx ) {
17
99
; CHECK-SD-NOFP16-LABEL: add_HalfH:
18
100
; CHECK-SD-NOFP16: // %bb.0:
@@ -239,15 +321,15 @@ define float @fadd_reduction_v4f32_in_loop(ptr %ptr.start) {
239
321
; CHECK: // %bb.0: // %entry
240
322
; CHECK-NEXT: movi d0, #0000000000000000
241
323
; CHECK-NEXT: mov x8, xzr
242
- ; CHECK-NEXT: .LBB9_1 : // %loop
324
+ ; CHECK-NEXT: .LBB11_1 : // %loop
243
325
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
244
326
; CHECK-NEXT: ldr q1, [x0, x8]
245
327
; CHECK-NEXT: add x8, x8, #16
246
328
; CHECK-NEXT: cmp w8, #112
247
329
; CHECK-NEXT: faddp v1.4s, v1.4s, v1.4s
248
330
; CHECK-NEXT: faddp s1, v1.2s
249
331
; CHECK-NEXT: fadd s0, s1, s0
250
- ; CHECK-NEXT: b.ne .LBB9_1
332
+ ; CHECK-NEXT: b.ne .LBB11_1
251
333
; CHECK-NEXT: // %bb.2: // %exit
252
334
; CHECK-NEXT: ret
253
335
entry:
@@ -276,7 +358,7 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
276
358
; CHECK-SD-NOFP16: // %bb.0: // %entry
277
359
; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
278
360
; CHECK-SD-NOFP16-NEXT: mov x8, xzr
279
- ; CHECK-SD-NOFP16-NEXT: .LBB10_1 : // %loop
361
+ ; CHECK-SD-NOFP16-NEXT: .LBB12_1 : // %loop
280
362
; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
281
363
; CHECK-SD-NOFP16-NEXT: ldr d1, [x0, x8]
282
364
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
@@ -294,31 +376,31 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
294
376
; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
295
377
; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
296
378
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
297
- ; CHECK-SD-NOFP16-NEXT: b.ne .LBB10_1
379
+ ; CHECK-SD-NOFP16-NEXT: b.ne .LBB12_1
298
380
; CHECK-SD-NOFP16-NEXT: // %bb.2: // %exit
299
381
; CHECK-SD-NOFP16-NEXT: ret
300
382
;
301
383
; CHECK-SD-FP16-LABEL: fadd_reduction_v4f16_in_loop:
302
384
; CHECK-SD-FP16: // %bb.0: // %entry
303
385
; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
304
386
; CHECK-SD-FP16-NEXT: mov x8, xzr
305
- ; CHECK-SD-FP16-NEXT: .LBB10_1 : // %loop
387
+ ; CHECK-SD-FP16-NEXT: .LBB12_1 : // %loop
306
388
; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
307
389
; CHECK-SD-FP16-NEXT: ldr d1, [x0, x8]
308
390
; CHECK-SD-FP16-NEXT: add x8, x8, #8
309
391
; CHECK-SD-FP16-NEXT: cmp w8, #56
310
392
; CHECK-SD-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
311
393
; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
312
394
; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
313
- ; CHECK-SD-FP16-NEXT: b.ne .LBB10_1
395
+ ; CHECK-SD-FP16-NEXT: b.ne .LBB12_1
314
396
; CHECK-SD-FP16-NEXT: // %bb.2: // %exit
315
397
; CHECK-SD-FP16-NEXT: ret
316
398
;
317
399
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
318
400
; CHECK-GI-NOFP16: // %bb.0: // %entry
319
401
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
320
402
; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
321
- ; CHECK-GI-NOFP16-NEXT: .LBB10_1 : // %loop
403
+ ; CHECK-GI-NOFP16-NEXT: .LBB12_1 : // %loop
322
404
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
323
405
; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
324
406
; CHECK-GI-NOFP16-NEXT: fmov s1, w9
@@ -333,7 +415,7 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
333
415
; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
334
416
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
335
417
; CHECK-GI-NOFP16-NEXT: fmov w9, s0
336
- ; CHECK-GI-NOFP16-NEXT: b.ne .LBB10_1
418
+ ; CHECK-GI-NOFP16-NEXT: b.ne .LBB12_1
337
419
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
338
420
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
339
421
; CHECK-GI-NOFP16-NEXT: ret
@@ -342,15 +424,15 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
342
424
; CHECK-GI-FP16: // %bb.0: // %entry
343
425
; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
344
426
; CHECK-GI-FP16-NEXT: mov x8, xzr
345
- ; CHECK-GI-FP16-NEXT: .LBB10_1 : // %loop
427
+ ; CHECK-GI-FP16-NEXT: .LBB12_1 : // %loop
346
428
; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
347
429
; CHECK-GI-FP16-NEXT: ldr d1, [x0, x8]
348
430
; CHECK-GI-FP16-NEXT: add x8, x8, #8
349
431
; CHECK-GI-FP16-NEXT: cmp w8, #56
350
432
; CHECK-GI-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
351
433
; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
352
434
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
353
- ; CHECK-GI-FP16-NEXT: b.ne .LBB10_1
435
+ ; CHECK-GI-FP16-NEXT: b.ne .LBB12_1
354
436
; CHECK-GI-FP16-NEXT: // %bb.2: // %exit
355
437
; CHECK-GI-FP16-NEXT: ret
356
438
entry:
@@ -379,7 +461,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
379
461
; CHECK-SD-NOFP16: // %bb.0: // %entry
380
462
; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
381
463
; CHECK-SD-NOFP16-NEXT: mov x8, xzr
382
- ; CHECK-SD-NOFP16-NEXT: .LBB11_1 : // %loop
464
+ ; CHECK-SD-NOFP16-NEXT: .LBB13_1 : // %loop
383
465
; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
384
466
; CHECK-SD-NOFP16-NEXT: ldr q1, [x0, x8]
385
467
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
@@ -409,15 +491,15 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
409
491
; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
410
492
; CHECK-SD-NOFP16-NEXT: fadd s0, s2, s0
411
493
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
412
- ; CHECK-SD-NOFP16-NEXT: b.ne .LBB11_1
494
+ ; CHECK-SD-NOFP16-NEXT: b.ne .LBB13_1
413
495
; CHECK-SD-NOFP16-NEXT: // %bb.2: // %exit
414
496
; CHECK-SD-NOFP16-NEXT: ret
415
497
;
416
498
; CHECK-SD-FP16-LABEL: fadd_reduction_v8f16_in_loop:
417
499
; CHECK-SD-FP16: // %bb.0: // %entry
418
500
; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
419
501
; CHECK-SD-FP16-NEXT: mov x8, xzr
420
- ; CHECK-SD-FP16-NEXT: .LBB11_1 : // %loop
502
+ ; CHECK-SD-FP16-NEXT: .LBB13_1 : // %loop
421
503
; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
422
504
; CHECK-SD-FP16-NEXT: ldr q1, [x0, x8]
423
505
; CHECK-SD-FP16-NEXT: add x8, x8, #8
@@ -426,15 +508,15 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
426
508
; CHECK-SD-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
427
509
; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
428
510
; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
429
- ; CHECK-SD-FP16-NEXT: b.ne .LBB11_1
511
+ ; CHECK-SD-FP16-NEXT: b.ne .LBB13_1
430
512
; CHECK-SD-FP16-NEXT: // %bb.2: // %exit
431
513
; CHECK-SD-FP16-NEXT: ret
432
514
;
433
515
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
434
516
; CHECK-GI-NOFP16: // %bb.0: // %entry
435
517
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
436
518
; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
437
- ; CHECK-GI-NOFP16-NEXT: .LBB11_1 : // %loop
519
+ ; CHECK-GI-NOFP16-NEXT: .LBB13_1 : // %loop
438
520
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
439
521
; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
440
522
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
@@ -451,7 +533,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
451
533
; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
452
534
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
453
535
; CHECK-GI-NOFP16-NEXT: fmov w9, s0
454
- ; CHECK-GI-NOFP16-NEXT: b.ne .LBB11_1
536
+ ; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1
455
537
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
456
538
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
457
539
; CHECK-GI-NOFP16-NEXT: ret
@@ -460,7 +542,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
460
542
; CHECK-GI-FP16: // %bb.0: // %entry
461
543
; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
462
544
; CHECK-GI-FP16-NEXT: mov x8, xzr
463
- ; CHECK-GI-FP16-NEXT: .LBB11_1 : // %loop
545
+ ; CHECK-GI-FP16-NEXT: .LBB13_1 : // %loop
464
546
; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
465
547
; CHECK-GI-FP16-NEXT: ldr q1, [x0, x8]
466
548
; CHECK-GI-FP16-NEXT: add x8, x8, #8
@@ -469,7 +551,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
469
551
; CHECK-GI-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
470
552
; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
471
553
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
472
- ; CHECK-GI-FP16-NEXT: b.ne .LBB11_1
554
+ ; CHECK-GI-FP16-NEXT: b.ne .LBB13_1
473
555
; CHECK-GI-FP16-NEXT: // %bb.2: // %exit
474
556
; CHECK-GI-FP16-NEXT: ret
475
557
entry:
0 commit comments