@@ -365,6 +365,85 @@ pub unsafe fn _mm512_maskz_dpbf16_ps(
365
365
transmute ( simd_select_bitmask ( k, rst, zero) )
366
366
}
367
367
368
+ #[ inline]
369
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
370
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
371
+ pub unsafe fn _mm512_cvtpbh_ps ( a : __m256bh ) -> __m512 {
372
+ _mm512_castsi512_ps ( _mm512_slli_epi32 :: < 16 > ( _mm512_cvtepi16_epi32 ( transmute ( a) ) ) )
373
+ }
374
+
375
+ #[ inline]
376
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
377
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
378
+ pub unsafe fn _mm512_mask_cvtpbh_ps ( src : __m512 , k : __mmask16 , a : __m256bh ) -> __m512 {
379
+ let cvt = _mm512_cvtpbh_ps ( a) ;
380
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x16 ( ) , src. as_f32x16 ( ) ) )
381
+ }
382
+
383
+ #[ inline]
384
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
385
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
386
+ pub unsafe fn _mm512_maskz_cvtpbh_ps ( k : __mmask16 , a : __m256bh ) -> __m512 {
387
+ let cvt = _mm512_cvtpbh_ps ( a) ;
388
+ let zero = _mm512_setzero_ps ( ) ;
389
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x16 ( ) , zero. as_f32x16 ( ) ) )
390
+ }
391
+
392
+ #[ inline]
393
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
394
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
395
+ pub unsafe fn _mm256_cvtpbh_ps ( a : __m128bh ) -> __m256 {
396
+ _mm256_castsi256_ps ( _mm256_slli_epi32 :: < 16 > ( _mm256_cvtepi16_epi32 ( transmute ( a) ) ) )
397
+ }
398
+
399
+ #[ inline]
400
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
401
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
402
+ pub unsafe fn _mm256_mask_cvtpbh_ps ( src : __m256 , k : __mmask8 , a : __m128bh ) -> __m256 {
403
+ let cvt = _mm256_cvtpbh_ps ( a) ;
404
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x8 ( ) , src. as_f32x8 ( ) ) )
405
+ }
406
+
407
+ #[ inline]
408
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
409
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
410
+ pub unsafe fn _mm256_maskz_cvtpbh_ps ( k : __mmask8 , a : __m128bh ) -> __m256 {
411
+ let cvt = _mm256_cvtpbh_ps ( a) ;
412
+ let zero = _mm256_setzero_ps ( ) ;
413
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x8 ( ) , zero. as_f32x8 ( ) ) )
414
+ }
415
+
416
+ #[ inline]
417
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
418
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
419
+ pub unsafe fn _mm_cvtpbh_ps ( a : __m128bh ) -> __m128 {
420
+ _mm_castsi128_ps ( _mm_slli_epi32 :: < 16 > ( _mm_cvtepi16_epi32 ( transmute ( a) ) ) )
421
+ }
422
+
423
+ #[ inline]
424
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
425
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
426
+ pub unsafe fn _mm_mask_cvtpbh_ps ( src : __m128 , k : __mmask8 , a : __m128bh ) -> __m128 {
427
+ let cvt = _mm_cvtpbh_ps ( a) ;
428
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x4 ( ) , src. as_f32x4 ( ) ) )
429
+ }
430
+
431
+ #[ inline]
432
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
433
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
434
+ pub unsafe fn _mm_maskz_cvtpbh_ps ( k : __mmask8 , a : __m128bh ) -> __m128 {
435
+ let cvt = _mm_cvtpbh_ps ( a) ;
436
+ let zero = _mm_setzero_ps ( ) ;
437
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x4 ( ) , zero. as_f32x4 ( ) ) )
438
+ }
439
+
440
+ #[ inline]
441
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
442
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
443
+ pub unsafe fn _mm_cvtsbh_ss ( a : u16 ) -> f32 {
444
+ f32:: from_bits ( ( a as u32 ) << 16 )
445
+ }
446
+
368
447
#[ cfg( test) ]
369
448
mod tests {
370
449
use crate :: { core_arch:: x86:: * , mem:: transmute} ;
@@ -1592,4 +1671,123 @@ mod tests {
1592
1671
] ;
1593
1672
assert_eq ! ( result, expected_result) ;
1594
1673
}
1674
+
1675
+ const BF16_ONE : u16 = 0b0_01111111_0000000 ;
1676
+ const BF16_TWO : u16 = 0b0_10000000_0000000 ;
1677
+ const BF16_THREE : u16 = 0b0_10000000_1000000 ;
1678
+ const BF16_FOUR : u16 = 0b0_10000001_0000000 ;
1679
+ const BF16_FIVE : u16 = 0b0_10000001_0100000 ;
1680
+ const BF16_SIX : u16 = 0b0_10000001_1000000 ;
1681
+ const BF16_SEVEN : u16 = 0b0_10000001_1100000 ;
1682
+ const BF16_EIGHT : u16 = 0b0_10000010_0000000 ;
1683
+
1684
+ #[ simd_test( enable = "avx512bf16" ) ]
1685
+ unsafe fn test_mm512_cvtpbh_ps ( ) {
1686
+ let a = __m256bh (
1687
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1688
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1689
+ ) ;
1690
+ let r = _mm512_cvtpbh_ps ( a) ;
1691
+ let e = _mm512_setr_ps (
1692
+ 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 ,
1693
+ ) ;
1694
+ assert_eq_m512 ( r, e) ;
1695
+ }
1696
+
1697
+ #[ simd_test( enable = "avx512bf16" ) ]
1698
+ unsafe fn test_mm512_mask_cvtpbh_ps ( ) {
1699
+ let a = __m256bh (
1700
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1701
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1702
+ ) ;
1703
+ let src = _mm512_setr_ps (
1704
+ 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. , 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. ,
1705
+ ) ;
1706
+ let k = 0b1010_1010_1010_1010 ;
1707
+ let r = _mm512_mask_cvtpbh_ps ( src, k, a) ;
1708
+ let e = _mm512_setr_ps (
1709
+ 9. , 2. , 11. , 4. , 13. , 6. , 15. , 8. , 9. , 2. , 11. , 4. , 13. , 6. , 15. , 8. ,
1710
+ ) ;
1711
+ assert_eq_m512 ( r, e) ;
1712
+ }
1713
+
1714
+ #[ simd_test( enable = "avx512bf16" ) ]
1715
+ unsafe fn test_mm512_maskz_cvtpbh_ps ( ) {
1716
+ let a = __m256bh (
1717
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1718
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1719
+ ) ;
1720
+ let k = 0b1010_1010_1010_1010 ;
1721
+ let r = _mm512_maskz_cvtpbh_ps ( k, a) ;
1722
+ let e = _mm512_setr_ps (
1723
+ 0. , 2. , 0. , 4. , 0. , 6. , 0. , 8. , 0. , 2. , 0. , 4. , 0. , 6. , 0. , 8. ,
1724
+ ) ;
1725
+ assert_eq_m512 ( r, e) ;
1726
+ }
1727
+
1728
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1729
+ unsafe fn test_mm256_cvtpbh_ps ( ) {
1730
+ let a = __m128bh (
1731
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1732
+ ) ;
1733
+ let r = _mm256_cvtpbh_ps ( a) ;
1734
+ let e = _mm256_setr_ps ( 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 ) ;
1735
+ assert_eq_m256 ( r, e) ;
1736
+ }
1737
+
1738
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1739
+ unsafe fn test_mm256_mask_cvtpbh_ps ( ) {
1740
+ let a = __m128bh (
1741
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1742
+ ) ;
1743
+ let src = _mm256_setr_ps ( 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. ) ;
1744
+ let k = 0b1010_1010 ;
1745
+ let r = _mm256_mask_cvtpbh_ps ( src, k, a) ;
1746
+ let e = _mm256_setr_ps ( 9. , 2. , 11. , 4. , 13. , 6. , 15. , 8. ) ;
1747
+ assert_eq_m256 ( r, e) ;
1748
+ }
1749
+
1750
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1751
+ unsafe fn test_mm256_maskz_cvtpbh_ps ( ) {
1752
+ let a = __m128bh (
1753
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1754
+ ) ;
1755
+ let k = 0b1010_1010 ;
1756
+ let r = _mm256_maskz_cvtpbh_ps ( k, a) ;
1757
+ let e = _mm256_setr_ps ( 0. , 2. , 0. , 4. , 0. , 6. , 0. , 8. ) ;
1758
+ assert_eq_m256 ( r, e) ;
1759
+ }
1760
+
1761
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1762
+ unsafe fn test_mm_cvtpbh_ps ( ) {
1763
+ let a = __m128bh ( BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , 0 , 0 , 0 , 0 ) ;
1764
+ let r = _mm_cvtpbh_ps ( a) ;
1765
+ let e = _mm_setr_ps ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
1766
+ assert_eq_m128 ( r, e) ;
1767
+ }
1768
+
1769
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1770
+ unsafe fn test_mm_mask_cvtpbh_ps ( ) {
1771
+ let a = __m128bh ( BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , 0 , 0 , 0 , 0 ) ;
1772
+ let src = _mm_setr_ps ( 9. , 10. , 11. , 12. ) ;
1773
+ let k = 0b1010 ;
1774
+ let r = _mm_mask_cvtpbh_ps ( src, k, a) ;
1775
+ let e = _mm_setr_ps ( 9. , 2. , 11. , 4. ) ;
1776
+ assert_eq_m128 ( r, e) ;
1777
+ }
1778
+
1779
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1780
+ unsafe fn test_mm_maskz_cvtpbh_ps ( ) {
1781
+ let a = __m128bh ( BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , 0 , 0 , 0 , 0 ) ;
1782
+ let k = 0b1010 ;
1783
+ let r = _mm_maskz_cvtpbh_ps ( k, a) ;
1784
+ let e = _mm_setr_ps ( 0. , 2. , 0. , 4. ) ;
1785
+ assert_eq_m128 ( r, e) ;
1786
+ }
1787
+
1788
+ #[ simd_test( enable = "avx512bf16" ) ]
1789
+ unsafe fn test_mm_cvtsbh_ss ( ) {
1790
+ let r = _mm_cvtsbh_ss ( BF16_ONE ) ;
1791
+ assert_eq ! ( r, 1. ) ;
1792
+ }
1595
1793
}
0 commit comments