[X86] Add test coverage for bfloat <-> half conversion

RKSimon · RKSimon · commit bfcec1999c7e · 2025-02-18T16:49:51.000Z
Inspired by #90738 (although that is a clang codegen issue)
diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll
@@ -510,6 +510,103 @@ define bfloat @fold_ext_trunc2(bfloat %a) nounwind {
   ret bfloat %trunc
 }
 
+define bfloat @fold_from_half(half %a) nounwind {
+; X86-LABEL: fold_from_half:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; SSE2-LABEL: fold_from_half:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pushq %rax
+; SSE2-NEXT:    callq __extendhfsf2@PLT
+; SSE2-NEXT:    callq __truncsfbf2@PLT
+; SSE2-NEXT:    popq %rax
+; SSE2-NEXT:    retq
+;
+; FP16-LABEL: fold_from_half:
+; FP16:       # %bb.0:
+; FP16-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; FP16-NEXT:    retq
+;
+; AVXNC-LABEL: fold_from_half:
+; AVXNC:       # %bb.0:
+; AVXNC-NEXT:    vcvtph2ps %xmm0, %xmm0
+; AVXNC-NEXT:    {vex} vcvtneps2bf16 %xmm0, %xmm0
+; AVXNC-NEXT:    retq
+  %ext = fpext half %a to float
+  %trunc = fptrunc float %ext to bfloat
+  ret bfloat %trunc
+}
+
+define half @fold_to_half(bfloat %a) nounwind {
+; X86-LABEL: fold_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $16, %eax
+; X86-NEXT:    vmovd %eax, %xmm0
+; X86-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; SSE2-LABEL: fold_to_half:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pushq %rax
+; SSE2-NEXT:    pextrw $0, %xmm0, %eax
+; SSE2-NEXT:    shll $16, %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    callq __truncsfhf2@PLT
+; SSE2-NEXT:    popq %rax
+; SSE2-NEXT:    retq
+;
+; BF16-LABEL: fold_to_half:
+; BF16:       # %bb.0:
+; BF16-NEXT:    vpextrw $0, %xmm0, %eax
+; BF16-NEXT:    shll $16, %eax
+; BF16-NEXT:    vmovd %eax, %xmm0
+; BF16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; BF16-NEXT:    retq
+;
+; FP16-LABEL: fold_to_half:
+; FP16:       # %bb.0:
+; FP16-NEXT:    vmovw %xmm0, %eax
+; FP16-NEXT:    shll $16, %eax
+; FP16-NEXT:    vmovd %eax, %xmm0
+; FP16-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    retq
+  %ext = fpext bfloat %a to float
+  %trunc = fptrunc float %ext to half
+  ret half %trunc
+}
+
+define bfloat @bitcast_from_half(half %a) nounwind {
+; X86-LABEL: bitcast_from_half:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT:    retl
+;
+; CHECK-LABEL: bitcast_from_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    retq
+  %bc = bitcast half %a to bfloat
+  ret bfloat %bc
+}
+
+define half @bitcast_to_half(bfloat %a) nounwind {
+; X86-LABEL: bitcast_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT:    retl
+;
+; CHECK-LABEL: bitcast_to_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    retq
+  %bc = bitcast bfloat %a to half
+  ret half %bc
+}
+
 define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
 ; X86-LABEL: addv:
 ; X86:       # %bb.0:
@@ -747,15 +844,15 @@ define <32 x bfloat> @pr63017_2() nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    xorl %eax, %eax
 ; SSE2-NEXT:    testb %al, %al
-; SSE2-NEXT:    jne .LBB12_1
+; SSE2-NEXT:    jne .LBB16_1
 ; SSE2-NEXT:  # %bb.2: # %cond.load
 ; SSE2-NEXT:    movzwl (%rax), %eax
 ; SSE2-NEXT:    shll $16, %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    jmp .LBB12_3
-; SSE2-NEXT:  .LBB12_1:
+; SSE2-NEXT:    jmp .LBB16_3
+; SSE2-NEXT:  .LBB16_1:
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; SSE2-NEXT:  .LBB12_3:
+; SSE2-NEXT:  .LBB16_3:
 ; SSE2-NEXT:    pushq %r14
 ; SSE2-NEXT:    pushq %rbx
 ; SSE2-NEXT:    subq $88, %rsp
@@ -992,10 +1089,10 @@ define <32 x bfloat> @pr63017_2() nounwind {
 ; AVXNC-NEXT:    vbroadcastss {{.*#+}} ymm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024]
 ; AVXNC-NEXT:    xorl %eax, %eax
 ; AVXNC-NEXT:    testb %al, %al
-; AVXNC-NEXT:    jne .LBB12_2
+; AVXNC-NEXT:    jne .LBB16_2
 ; AVXNC-NEXT:  # %bb.1: # %cond.load
 ; AVXNC-NEXT:    vmovups (%rax), %ymm0
-; AVXNC-NEXT:  .LBB12_2:
+; AVXNC-NEXT:  .LBB16_2:
 ; AVXNC-NEXT:    vmovaps %ymm0, %ymm1
 ; AVXNC-NEXT:    retq
   %1 = call <32 x bfloat> @llvm.masked.load.v32bf16.p0(ptr poison, i32 2, <32 x i1> poison, <32 x bfloat> <bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80, bfloat 0xRBF80>)