Skip to content

Commit bfcec19

Browse files
committed
[X86] Add test coverage for bfloat <-> half conversion
Inspired by #90738 (although that is a clang codegen issue)
1 parent 6682753 commit bfcec19

File tree

1 file changed

+103
-6
lines changed

1 file changed

+103
-6
lines changed

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 103 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,103 @@ define bfloat @fold_ext_trunc2(bfloat %a) nounwind {
510510
ret bfloat %trunc
511511
}
512512

513+
define bfloat @fold_from_half(half %a) nounwind {
514+
; X86-LABEL: fold_from_half:
515+
; X86: # %bb.0:
516+
; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
517+
; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
518+
; X86-NEXT: vcvtneps2bf16 %xmm0, %xmm0
519+
; X86-NEXT: retl
520+
;
521+
; SSE2-LABEL: fold_from_half:
522+
; SSE2: # %bb.0:
523+
; SSE2-NEXT: pushq %rax
524+
; SSE2-NEXT: callq __extendhfsf2@PLT
525+
; SSE2-NEXT: callq __truncsfbf2@PLT
526+
; SSE2-NEXT: popq %rax
527+
; SSE2-NEXT: retq
528+
;
529+
; FP16-LABEL: fold_from_half:
530+
; FP16: # %bb.0:
531+
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
532+
; FP16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
533+
; FP16-NEXT: retq
534+
;
535+
; AVXNC-LABEL: fold_from_half:
536+
; AVXNC: # %bb.0:
537+
; AVXNC-NEXT: vcvtph2ps %xmm0, %xmm0
538+
; AVXNC-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
539+
; AVXNC-NEXT: retq
540+
%ext = fpext half %a to float
541+
%trunc = fptrunc float %ext to bfloat
542+
ret bfloat %trunc
543+
}
544+
545+
define half @fold_to_half(bfloat %a) nounwind {
546+
; X86-LABEL: fold_to_half:
547+
; X86: # %bb.0:
548+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
549+
; X86-NEXT: shll $16, %eax
550+
; X86-NEXT: vmovd %eax, %xmm0
551+
; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
552+
; X86-NEXT: retl
553+
;
554+
; SSE2-LABEL: fold_to_half:
555+
; SSE2: # %bb.0:
556+
; SSE2-NEXT: pushq %rax
557+
; SSE2-NEXT: pextrw $0, %xmm0, %eax
558+
; SSE2-NEXT: shll $16, %eax
559+
; SSE2-NEXT: movd %eax, %xmm0
560+
; SSE2-NEXT: callq __truncsfhf2@PLT
561+
; SSE2-NEXT: popq %rax
562+
; SSE2-NEXT: retq
563+
;
564+
; BF16-LABEL: fold_to_half:
565+
; BF16: # %bb.0:
566+
; BF16-NEXT: vpextrw $0, %xmm0, %eax
567+
; BF16-NEXT: shll $16, %eax
568+
; BF16-NEXT: vmovd %eax, %xmm0
569+
; BF16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
570+
; BF16-NEXT: retq
571+
;
572+
; FP16-LABEL: fold_to_half:
573+
; FP16: # %bb.0:
574+
; FP16-NEXT: vmovw %xmm0, %eax
575+
; FP16-NEXT: shll $16, %eax
576+
; FP16-NEXT: vmovd %eax, %xmm0
577+
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
578+
; FP16-NEXT: retq
579+
%ext = fpext bfloat %a to float
580+
%trunc = fptrunc float %ext to half
581+
ret half %trunc
582+
}
583+
584+
define bfloat @bitcast_from_half(half %a) nounwind {
585+
; X86-LABEL: bitcast_from_half:
586+
; X86: # %bb.0:
587+
; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
588+
; X86-NEXT: retl
589+
;
590+
; CHECK-LABEL: bitcast_from_half:
591+
; CHECK: # %bb.0:
592+
; CHECK-NEXT: retq
593+
%bc = bitcast half %a to bfloat
594+
ret bfloat %bc
595+
}
596+
597+
define half @bitcast_to_half(bfloat %a) nounwind {
598+
; X86-LABEL: bitcast_to_half:
599+
; X86: # %bb.0:
600+
; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
601+
; X86-NEXT: retl
602+
;
603+
; CHECK-LABEL: bitcast_to_half:
604+
; CHECK: # %bb.0:
605+
; CHECK-NEXT: retq
606+
%bc = bitcast bfloat %a to half
607+
ret half %bc
608+
}
609+
513610
define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
514611
; X86-LABEL: addv:
515612
; X86: # %bb.0:
@@ -747,15 +844,15 @@ define <32 x bfloat> @pr63017_2() nounwind {
747844
; SSE2: # %bb.0:
748845
; SSE2-NEXT: xorl %eax, %eax
749846
; SSE2-NEXT: testb %al, %al
750-
; SSE2-NEXT: jne .LBB12_1
847+
; SSE2-NEXT: jne .LBB16_1
751848
; SSE2-NEXT: # %bb.2: # %cond.load
752849
; SSE2-NEXT: movzwl (%rax), %eax
753850
; SSE2-NEXT: shll $16, %eax
754851
; SSE2-NEXT: movd %eax, %xmm0
755-
; SSE2-NEXT: jmp .LBB12_3
756-
; SSE2-NEXT: .LBB12_1:
852+
; SSE2-NEXT: jmp .LBB16_3
853+
; SSE2-NEXT: .LBB16_1:
757854
; SSE2-NEXT: movd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
758-
; SSE2-NEXT: .LBB12_3:
855+
; SSE2-NEXT: .LBB16_3:
759856
; SSE2-NEXT: pushq %r14
760857
; SSE2-NEXT: pushq %rbx
761858
; SSE2-NEXT: subq $88, %rsp
@@ -992,10 +1089,10 @@ define <32 x bfloat> @pr63017_2() nounwind {
9921089
; AVXNC-NEXT: vbroadcastss {{.*#+}} ymm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024]
9931090
; AVXNC-NEXT: xorl %eax, %eax
9941091
; AVXNC-NEXT: testb %al, %al
995-
; AVXNC-NEXT: jne .LBB12_2
1092+
; AVXNC-NEXT: jne .LBB16_2
9961093
; AVXNC-NEXT: # %bb.1: # %cond.load
9971094
; AVXNC-NEXT: vmovups (%rax), %ymm0
998-
; AVXNC-NEXT: .LBB12_2:
1095+
; AVXNC-NEXT: .LBB16_2:
9991096
; AVXNC-NEXT: vmovaps %ymm0, %ymm1
10001097
; AVXNC-NEXT: retq
10011098
%1 = call <32 x bfloat> @llvm.masked.load.v32bf16.p0(ptr poison, i32 2, <32 x i1> poison, <32 x bfloat> <bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80>)

0 commit comments

Comments
 (0)