@@ -510,6 +510,103 @@ define bfloat @fold_ext_trunc2(bfloat %a) nounwind {
510
510
ret bfloat %trunc
511
511
}
512
512
513
+ define bfloat @fold_from_half (half %a ) nounwind {
514
+ ; X86-LABEL: fold_from_half:
515
+ ; X86: # %bb.0:
516
+ ; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
517
+ ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
518
+ ; X86-NEXT: vcvtneps2bf16 %xmm0, %xmm0
519
+ ; X86-NEXT: retl
520
+ ;
521
+ ; SSE2-LABEL: fold_from_half:
522
+ ; SSE2: # %bb.0:
523
+ ; SSE2-NEXT: pushq %rax
524
+ ; SSE2-NEXT: callq __extendhfsf2@PLT
525
+ ; SSE2-NEXT: callq __truncsfbf2@PLT
526
+ ; SSE2-NEXT: popq %rax
527
+ ; SSE2-NEXT: retq
528
+ ;
529
+ ; FP16-LABEL: fold_from_half:
530
+ ; FP16: # %bb.0:
531
+ ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
532
+ ; FP16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
533
+ ; FP16-NEXT: retq
534
+ ;
535
+ ; AVXNC-LABEL: fold_from_half:
536
+ ; AVXNC: # %bb.0:
537
+ ; AVXNC-NEXT: vcvtph2ps %xmm0, %xmm0
538
+ ; AVXNC-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
539
+ ; AVXNC-NEXT: retq
540
+ %ext = fpext half %a to float
541
+ %trunc = fptrunc float %ext to bfloat
542
+ ret bfloat %trunc
543
+ }
544
+
545
+ define half @fold_to_half (bfloat %a ) nounwind {
546
+ ; X86-LABEL: fold_to_half:
547
+ ; X86: # %bb.0:
548
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
549
+ ; X86-NEXT: shll $16, %eax
550
+ ; X86-NEXT: vmovd %eax, %xmm0
551
+ ; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
552
+ ; X86-NEXT: retl
553
+ ;
554
+ ; SSE2-LABEL: fold_to_half:
555
+ ; SSE2: # %bb.0:
556
+ ; SSE2-NEXT: pushq %rax
557
+ ; SSE2-NEXT: pextrw $0, %xmm0, %eax
558
+ ; SSE2-NEXT: shll $16, %eax
559
+ ; SSE2-NEXT: movd %eax, %xmm0
560
+ ; SSE2-NEXT: callq __truncsfhf2@PLT
561
+ ; SSE2-NEXT: popq %rax
562
+ ; SSE2-NEXT: retq
563
+ ;
564
+ ; BF16-LABEL: fold_to_half:
565
+ ; BF16: # %bb.0:
566
+ ; BF16-NEXT: vpextrw $0, %xmm0, %eax
567
+ ; BF16-NEXT: shll $16, %eax
568
+ ; BF16-NEXT: vmovd %eax, %xmm0
569
+ ; BF16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
570
+ ; BF16-NEXT: retq
571
+ ;
572
+ ; FP16-LABEL: fold_to_half:
573
+ ; FP16: # %bb.0:
574
+ ; FP16-NEXT: vmovw %xmm0, %eax
575
+ ; FP16-NEXT: shll $16, %eax
576
+ ; FP16-NEXT: vmovd %eax, %xmm0
577
+ ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
578
+ ; FP16-NEXT: retq
579
+ %ext = fpext bfloat %a to float
580
+ %trunc = fptrunc float %ext to half
581
+ ret half %trunc
582
+ }
583
+
584
+ define bfloat @bitcast_from_half (half %a ) nounwind {
585
+ ; X86-LABEL: bitcast_from_half:
586
+ ; X86: # %bb.0:
587
+ ; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
588
+ ; X86-NEXT: retl
589
+ ;
590
+ ; CHECK-LABEL: bitcast_from_half:
591
+ ; CHECK: # %bb.0:
592
+ ; CHECK-NEXT: retq
593
+ %bc = bitcast half %a to bfloat
594
+ ret bfloat %bc
595
+ }
596
+
597
+ define half @bitcast_to_half (bfloat %a ) nounwind {
598
+ ; X86-LABEL: bitcast_to_half:
599
+ ; X86: # %bb.0:
600
+ ; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
601
+ ; X86-NEXT: retl
602
+ ;
603
+ ; CHECK-LABEL: bitcast_to_half:
604
+ ; CHECK: # %bb.0:
605
+ ; CHECK-NEXT: retq
606
+ %bc = bitcast bfloat %a to half
607
+ ret half %bc
608
+ }
609
+
513
610
define <8 x bfloat> @addv (<8 x bfloat> %a , <8 x bfloat> %b ) nounwind {
514
611
; X86-LABEL: addv:
515
612
; X86: # %bb.0:
@@ -747,15 +844,15 @@ define <32 x bfloat> @pr63017_2() nounwind {
747
844
; SSE2: # %bb.0:
748
845
; SSE2-NEXT: xorl %eax, %eax
749
846
; SSE2-NEXT: testb %al, %al
750
- ; SSE2-NEXT: jne .LBB12_1
847
+ ; SSE2-NEXT: jne .LBB16_1
751
848
; SSE2-NEXT: # %bb.2: # %cond.load
752
849
; SSE2-NEXT: movzwl (%rax), %eax
753
850
; SSE2-NEXT: shll $16, %eax
754
851
; SSE2-NEXT: movd %eax, %xmm0
755
- ; SSE2-NEXT: jmp .LBB12_3
756
- ; SSE2-NEXT: .LBB12_1 :
852
+ ; SSE2-NEXT: jmp .LBB16_3
853
+ ; SSE2-NEXT: .LBB16_1 :
757
854
; SSE2-NEXT: movd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
758
- ; SSE2-NEXT: .LBB12_3 :
855
+ ; SSE2-NEXT: .LBB16_3 :
759
856
; SSE2-NEXT: pushq %r14
760
857
; SSE2-NEXT: pushq %rbx
761
858
; SSE2-NEXT: subq $88, %rsp
@@ -992,10 +1089,10 @@ define <32 x bfloat> @pr63017_2() nounwind {
992
1089
; AVXNC-NEXT: vbroadcastss {{.*#+}} ymm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024]
993
1090
; AVXNC-NEXT: xorl %eax, %eax
994
1091
; AVXNC-NEXT: testb %al, %al
995
- ; AVXNC-NEXT: jne .LBB12_2
1092
+ ; AVXNC-NEXT: jne .LBB16_2
996
1093
; AVXNC-NEXT: # %bb.1: # %cond.load
997
1094
; AVXNC-NEXT: vmovups (%rax), %ymm0
998
- ; AVXNC-NEXT: .LBB12_2 :
1095
+ ; AVXNC-NEXT: .LBB16_2 :
999
1096
; AVXNC-NEXT: vmovaps %ymm0, %ymm1
1000
1097
; AVXNC-NEXT: retq
1001
1098
%1 = call <32 x bfloat> @llvm.masked.load.v32bf16.p0 (ptr poison, i32 2 , <32 x i1 > poison, <32 x bfloat> <bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80>)
0 commit comments