Skip to content

Commit be5b04b

Browse files
bjorn3gnzlbg
authored andcommitted
Use simd_* in x86/avx2.rs where possible
1 parent 806cc71 commit be5b04b

File tree

1 file changed

+8
-24
lines changed

1 file changed

+8
-24
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
111111
#[cfg_attr(test, assert_instr(vpaddsb))]
112112
#[stable(feature = "simd_x86", since = "1.27.0")]
113113
pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
114-
transmute(paddsb(a.as_i8x32(), b.as_i8x32()))
114+
transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32()))
115115
}
116116

117117
/// Adds packed 16-bit integers in `a` and `b` using saturation.
@@ -122,7 +122,7 @@ pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
122122
#[cfg_attr(test, assert_instr(vpaddsw))]
123123
#[stable(feature = "simd_x86", since = "1.27.0")]
124124
pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
125-
transmute(paddsw(a.as_i16x16(), b.as_i16x16()))
125+
transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16()))
126126
}
127127

128128
/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
@@ -133,7 +133,7 @@ pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
133133
#[cfg_attr(test, assert_instr(vpaddusb))]
134134
#[stable(feature = "simd_x86", since = "1.27.0")]
135135
pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
136-
transmute(paddusb(a.as_u8x32(), b.as_u8x32()))
136+
transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32()))
137137
}
138138

139139
/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
@@ -144,7 +144,7 @@ pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
144144
#[cfg_attr(test, assert_instr(vpaddusw))]
145145
#[stable(feature = "simd_x86", since = "1.27.0")]
146146
pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
147-
transmute(paddusw(a.as_u16x16(), b.as_u16x16()))
147+
transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16()))
148148
}
149149

150150
/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
@@ -3331,7 +3331,7 @@ pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
33313331
#[cfg_attr(test, assert_instr(vpsubsw))]
33323332
#[stable(feature = "simd_x86", since = "1.27.0")]
33333333
pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3334-
transmute(psubsw(a.as_i16x16(), b.as_i16x16()))
3334+
transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16()))
33353335
}
33363336

33373337
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
@@ -3343,7 +3343,7 @@ pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
33433343
#[cfg_attr(test, assert_instr(vpsubsb))]
33443344
#[stable(feature = "simd_x86", since = "1.27.0")]
33453345
pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3346-
transmute(psubsb(a.as_i8x32(), b.as_i8x32()))
3346+
transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32()))
33473347
}
33483348

33493349
/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
@@ -3355,7 +3355,7 @@ pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
33553355
#[cfg_attr(test, assert_instr(vpsubusw))]
33563356
#[stable(feature = "simd_x86", since = "1.27.0")]
33573357
pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3358-
transmute(psubusw(a.as_u16x16(), b.as_u16x16()))
3358+
transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16()))
33593359
}
33603360

33613361
/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
@@ -3367,7 +3367,7 @@ pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
33673367
#[cfg_attr(test, assert_instr(vpsubusb))]
33683368
#[stable(feature = "simd_x86", since = "1.27.0")]
33693369
pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3370-
transmute(psubusb(a.as_u8x32(), b.as_u8x32()))
3370+
transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32()))
33713371
}
33723372

33733373
/// Unpacks and interleave 8-bit integers from the high half of each
@@ -3807,14 +3807,6 @@ extern "C" {
38073807
fn pabsw(a: i16x16) -> u16x16;
38083808
#[link_name = "llvm.x86.avx2.pabs.d"]
38093809
fn pabsd(a: i32x8) -> u32x8;
3810-
#[link_name = "llvm.x86.avx2.padds.b"]
3811-
fn paddsb(a: i8x32, b: i8x32) -> i8x32;
3812-
#[link_name = "llvm.x86.avx2.padds.w"]
3813-
fn paddsw(a: i16x16, b: i16x16) -> i16x16;
3814-
#[link_name = "llvm.x86.avx2.paddus.b"]
3815-
fn paddusb(a: u8x32, b: u8x32) -> u8x32;
3816-
#[link_name = "llvm.x86.avx2.paddus.w"]
3817-
fn paddusw(a: u16x16, b: u16x16) -> u16x16;
38183810
#[link_name = "llvm.x86.avx2.pavg.b"]
38193811
fn pavgb(a: u8x32, b: u8x32) -> u8x32;
38203812
#[link_name = "llvm.x86.avx2.pavg.w"]
@@ -3959,14 +3951,6 @@ extern "C" {
39593951
fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
39603952
#[link_name = "llvm.x86.avx2.psrlv.q.256"]
39613953
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3962-
#[link_name = "llvm.x86.avx2.psubs.b"]
3963-
fn psubsb(a: i8x32, b: i8x32) -> i8x32;
3964-
#[link_name = "llvm.x86.avx2.psubs.w"]
3965-
fn psubsw(a: i16x16, b: i16x16) -> i16x16;
3966-
#[link_name = "llvm.x86.avx2.psubus.b"]
3967-
fn psubusb(a: u8x32, b: u8x32) -> u8x32;
3968-
#[link_name = "llvm.x86.avx2.psubus.w"]
3969-
fn psubusw(a: u16x16, b: u16x16) -> u16x16;
39703954
#[link_name = "llvm.x86.avx2.pshuf.b"]
39713955
fn pshufb(a: u8x32, b: u8x32) -> u8x32;
39723956
#[link_name = "llvm.x86.avx2.permd"]

0 commit comments

Comments
 (0)