Use simd_fma where possible

bjorn3 · gnzlbg · commit 7c2cab40317b · 2019-12-18T17:41:21.000+01:00
diff --git a/crates/core_arch/src/x86/fma.rs b/crates/core_arch/src/x86/fma.rs
@@ -19,6 +19,7 @@
 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
 
 use crate::core_arch::x86::*;
+use crate::core_arch::simd_llvm::simd_fma;
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
@@ -32,7 +33,7 @@ use stdarch_test::assert_instr;
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    vfmaddpd(a, b, c)
+    simd_fma(a, b, c)
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -44,7 +45,7 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    vfmaddpd256(a, b, c)
+    simd_fma(a, b, c)
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -56,7 +57,7 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    vfmaddps(a, b, c)
+    simd_fma(a, b, c)
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -68,7 +69,7 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    vfmaddps256(a, b, c)
+    simd_fma(a, b, c)
 }
 
 /// Multiplies the lower double-precision (64-bit) floating-point elements in