diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 0363004674..e878602036 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7,6 +7,8 @@ use crate::{ #[cfg(test)] use stdarch_test::assert_instr; +use super::avx512f::{vpl, vps}; + /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30) @@ -4237,11 +4239,11 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) { pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i { let mut dst: __m512i = src; asm!( - "vmovdqu16 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu16 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4256,11 +4258,11 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i { let mut dst: __m512i; asm!( - "vmovdqu16 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4275,11 +4277,11 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i { let mut dst: __m512i = src; asm!( - "vmovdqu8 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu8 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4294,11 +4296,11 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i { let mut dst: __m512i; asm!( - "vmovdqu8 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4313,11 +4315,11 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5 pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i { let mut dst: __m256i = src; asm!( - "vmovdqu16 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu16 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4332,11 +4334,11 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i { let mut dst: __m256i; asm!( - "vmovdqu16 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4351,11 +4353,11 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i { let mut dst: __m256i = src; asm!( - "vmovdqu8 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu8 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4370,11 +4372,11 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i { let mut dst: __m256i; asm!( - "vmovdqu8 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4389,11 +4391,11 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2 pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i { let mut dst: __m128i = src; asm!( - "vmovdqu16 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu16 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4408,11 +4410,11 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i { let mut dst: __m128i; asm!( - "vmovdqu16 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4427,11 +4429,11 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128 pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i { let mut dst: __m128i = src; asm!( - "vmovdqu8 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu8 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4446,11 +4448,11 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i { let mut dst: __m128i; asm!( - "vmovdqu8 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -4463,11 +4465,11 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i #[target_feature(enable = "avx512f,avx512bw")] pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) { asm!( - "vmovdqu16 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovdqu16", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -4479,11 +4481,11 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _ #[target_feature(enable = "avx512f,avx512bw")] pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) { asm!( - "vmovdqu8 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovdqu8", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -4495,11 +4497,11 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) { asm!( - "vmovdqu16 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovdqu16", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -4511,11 +4513,11 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _ #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) { asm!( - "vmovdqu8 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovdqu8", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -4527,11 +4529,11 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) { asm!( - "vmovdqu16 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovdqu16", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -4543,11 +4545,11 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12 #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) { asm!( - "vmovdqu8 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovdqu8", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 7633442aa6..f735559fed 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -4,6 +4,37 @@ use crate::{ ptr, }; +// x86-32 wants to use a 32-bit address size, but asm! defaults to using the full +// register name (e.g. rax). We have to explicitly override the placeholder to +// use the 32-bit register name in that case. + +#[cfg(target_pointer_width = "32")] +macro_rules! vpl { + ($inst:expr) => { + concat!($inst, ", [{p:e}]") + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! vpl { + ($inst:expr) => { + concat!($inst, ", [{p}]") + }; +} +#[cfg(target_pointer_width = "32")] +macro_rules! vps { + ($inst1:expr, $inst2:expr) => { + concat!($inst1, " [{p:e}]", $inst2) + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! vps { + ($inst1:expr, $inst2:expr) => { + concat!($inst1, " [{p}]", $inst2) + }; +} + +pub(crate) use {vpl, vps}; + #[cfg(test)] use stdarch_test::assert_instr; @@ -30333,11 +30364,11 @@ pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) { pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i { let mut dst: __m512i = src; asm!( - "vmovdqu32 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu32 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30352,11 +30383,11 @@ pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *con pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i { let mut dst: __m512i; asm!( - "vmovdqu32 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30371,11 +30402,11 @@ pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __ pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i { let mut dst: __m512i = src; asm!( - "vmovdqu64 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu64 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30390,11 +30421,11 @@ pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *cons pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i { let mut dst: __m512i; asm!( - "vmovdqu64 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30409,11 +30440,11 @@ pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 { let mut dst: __m512 = src; asm!( - "vmovups {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovups {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30428,11 +30459,11 @@ pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 { let mut dst: __m512; asm!( - "vmovups {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovups {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30447,11 +30478,11 @@ pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m51 pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d { let mut dst: __m512d = src; asm!( - "vmovupd {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovupd {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30466,11 +30497,11 @@ pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d { let mut dst: __m512d; asm!( - "vmovupd {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovupd {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30485,11 +30516,11 @@ pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512 pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i { let mut dst: __m256i = src; asm!( - "vmovdqu32 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu32 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30504,11 +30535,11 @@ pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *cons pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i { let mut dst: __m256i; asm!( - "vmovdqu32 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30523,11 +30554,11 @@ pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i { let mut dst: __m256i = src; asm!( - "vmovdqu64 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu64 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30542,11 +30573,11 @@ pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *cons pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i { let mut dst: __m256i; asm!( - "vmovdqu64 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30561,11 +30592,11 @@ pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 { let mut dst: __m256 = src; asm!( - "vmovups {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovups {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30580,11 +30611,11 @@ pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f3 pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 { let mut dst: __m256; asm!( - "vmovups {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovups {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30599,11 +30630,11 @@ pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d { let mut dst: __m256d = src; asm!( - "vmovupd {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovupd {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30618,11 +30649,11 @@ pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d { let mut dst: __m256d; asm!( - "vmovupd {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovupd {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30637,11 +30668,11 @@ pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256 pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i { let mut dst: __m128i = src; asm!( - "vmovdqu32 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu32 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30656,11 +30687,11 @@ pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i { let mut dst: __m128i; asm!( - "vmovdqu32 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30675,11 +30706,11 @@ pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128 pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i { let mut dst: __m128i = src; asm!( - "vmovdqu64 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu64 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30694,11 +30725,11 @@ pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i { let mut dst: __m128i; asm!( - "vmovdqu64 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30713,11 +30744,11 @@ pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128 pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 { let mut dst: __m128 = src; asm!( - "vmovups {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovups {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30732,11 +30763,11 @@ pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 { let mut dst: __m128; asm!( - "vmovups {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovups {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30751,11 +30782,11 @@ pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 { pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d { let mut dst: __m128d = src; asm!( - "vmovupd {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovupd {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30770,11 +30801,11 @@ pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d { let mut dst: __m128d; asm!( - "vmovupd {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovupd {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30789,11 +30820,11 @@ pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d { pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i { let mut dst: __m512i = src; asm!( - "vmovdqa32 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa32 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30808,11 +30839,11 @@ pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *cons pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i { let mut dst: __m512i; asm!( - "vmovdqa32 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30827,11 +30858,11 @@ pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i { let mut dst: __m512i = src; asm!( - "vmovdqa64 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa64 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30846,11 +30877,11 @@ pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i { let mut dst: __m512i; asm!( - "vmovdqa64 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30865,11 +30896,11 @@ pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m5 pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 { let mut dst: __m512 = src; asm!( - "vmovaps {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovaps {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30884,11 +30915,11 @@ pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f3 pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 { let mut dst: __m512; asm!( - "vmovaps {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovaps {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30903,11 +30934,11 @@ pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d { let mut dst: __m512d = src; asm!( - "vmovapd {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovapd {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30922,11 +30953,11 @@ pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f6 pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d { let mut dst: __m512d; asm!( - "vmovapd {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(zmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovapd {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(zmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30941,11 +30972,11 @@ pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i { let mut dst: __m256i = src; asm!( - "vmovdqa32 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa32 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30960,11 +30991,11 @@ pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i { let mut dst: __m256i; asm!( - "vmovdqa32 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30979,11 +31010,11 @@ pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m2 pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i { let mut dst: __m256i = src; asm!( - "vmovdqa64 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa64 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -30998,11 +31029,11 @@ pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i { let mut dst: __m256i; asm!( - "vmovdqa64 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31017,11 +31048,11 @@ pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m2 pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 { let mut dst: __m256 = src; asm!( - "vmovaps {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovaps {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31036,11 +31067,11 @@ pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32 pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 { let mut dst: __m256; asm!( - "vmovaps {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovaps {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31055,11 +31086,11 @@ pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d { let mut dst: __m256d = src; asm!( - "vmovapd {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovapd {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31074,11 +31105,11 @@ pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f6 pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d { let mut dst: __m256d; asm!( - "vmovapd {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(ymm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovapd {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(ymm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31093,11 +31124,11 @@ pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i { let mut dst: __m128i = src; asm!( - "vmovdqa32 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa32 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31112,11 +31143,11 @@ pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i3 pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i { let mut dst: __m128i; asm!( - "vmovdqa32 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31131,11 +31162,11 @@ pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i { let mut dst: __m128i = src; asm!( - "vmovdqa64 {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa64 {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31150,11 +31181,11 @@ pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i6 pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i { let mut dst: __m128i; asm!( - "vmovdqa64 {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31169,11 +31200,11 @@ pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 { let mut dst: __m128 = src; asm!( - "vmovaps {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovaps {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31188,11 +31219,11 @@ pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) - pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 { let mut dst: __m128; asm!( - "vmovaps {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovaps {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31207,11 +31238,11 @@ pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 { pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d { let mut dst: __m128d = src; asm!( - "vmovapd {2}{{{1}}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - inout(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovapd {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31226,11 +31257,11 @@ pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d { let mut dst: __m128d; asm!( - "vmovapd {2}{{{1}}} {{z}}, [{0}]", - in(reg) mem_addr, - in(kreg) k, - out(xmm_reg) dst, - options(pure, readonly, nostack) + vpl!("vmovapd {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack) ); dst } @@ -31243,11 +31274,11 @@ pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d { #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) { asm!( - "vmovdqu32 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovdqu32", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31259,11 +31290,11 @@ pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: _ #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) { asm!( - "vmovdqu64 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovdqu64", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31275,11 +31306,11 @@ pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __ #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) { asm!( - "vmovups [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovups", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31291,11 +31322,11 @@ pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m5 #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) { asm!( - "vmovupd [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovupd", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31307,11 +31338,11 @@ pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m51 #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) { asm!( - "vmovdqu32 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovdqu32", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31323,11 +31354,11 @@ pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __ #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) { asm!( - "vmovdqu64 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovdqu64", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31339,11 +31370,11 @@ pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __ #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) { asm!( - "vmovups [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovups", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31355,11 +31386,11 @@ pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m25 #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) { asm!( - "vmovupd [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovupd", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31371,11 +31402,11 @@ pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m25 #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) { asm!( - "vmovdqu32 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovdqu32", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -31387,11 +31418,11 @@ pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m12 #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) { asm!( - "vmovdqu64 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovdqu64", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -31403,11 +31434,11 @@ pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m12 #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) { asm!( - "vmovups [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovups", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -31419,11 +31450,11 @@ pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) { asm!( - "vmovupd [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovupd", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -31435,11 +31466,11 @@ pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) { asm!( - "vmovdqa32 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovdqa32", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31451,11 +31482,11 @@ pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __ #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) { asm!( - "vmovdqa64 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovdqa64", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31467,11 +31498,11 @@ pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) { asm!( - "vmovaps [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovaps", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31483,11 +31514,11 @@ pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m51 #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) { asm!( - "vmovapd [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(zmm_reg) a, - options(nostack) + vps!("vmovapd", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(zmm_reg) a, + options(nostack) ); } @@ -31499,11 +31530,11 @@ pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512 #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) { asm!( - "vmovdqa32 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovdqa32", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31515,11 +31546,11 @@ pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) { asm!( - "vmovdqa64 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovdqa64", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31531,11 +31562,11 @@ pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) { asm!( - "vmovaps [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovaps", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31547,11 +31578,11 @@ pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256 #[target_feature(enable = "avx512f,avx512vl,avx")] pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) { asm!( - "vmovapd [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(ymm_reg) a, - options(nostack) + vps!("vmovapd", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(ymm_reg) a, + options(nostack) ); } @@ -31563,11 +31594,11 @@ pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256 #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) { asm!( - "vmovdqa32 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovdqa32", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -31579,11 +31610,11 @@ pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128 #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) { asm!( - "vmovdqa64 [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovdqa64", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -31595,11 +31626,11 @@ pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128 #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) { asm!( - "vmovaps [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovaps", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } @@ -31611,11 +31642,11 @@ pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) { #[target_feature(enable = "avx512f,avx512vl,avx,sse")] pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) { asm!( - "vmovapd [{0}]{{{1}}}, {2}", - in(reg) mem_addr, - in(kreg) mask, - in(xmm_reg) a, - options(nostack) + vps!("vmovapd", "{{{mask}}}, {a}"), + p = in(reg) mem_addr, + mask = in(kreg) mask, + a = in(xmm_reg) a, + options(nostack) ); } diff --git a/crates/core_arch/src/x86/avx512gfni.rs b/crates/core_arch/src/x86/avx512gfni.rs index 7b6d796237..d8ac5c29cc 100644 --- a/crates/core_arch/src/x86/avx512gfni.rs +++ b/crates/core_arch/src/x86/avx512gfni.rs @@ -819,7 +819,7 @@ mod tests { for i in 0..NUM_TEST_ENTRIES { left[i] = (i % 256) as u8; - right[i] = left[i] * 101; + right[i] = left[i].wrapping_mul(101); result[i] = mulbyte(left[i], right[i]); }