Skip to content

Commit 49f88c3

Browse files
satiscugcatAmanieu
authored andcommitted
Changed implementation of _mm{256,512}_alignr_epi8 to match that of _mm_alignr_epi8 in ssse3.rs, also removed the import of the unreachable unchecked hint as it was no longer necessary
1 parent a182113 commit 49f88c3

File tree

2 files changed

+149
-350
lines changed

2 files changed

+149
-350
lines changed

stdarch/crates/core_arch/src/x86/avx2.rs

Lines changed: 68 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
1919
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
2020
21-
use core::hint::unreachable_unchecked;
21+
2222

2323
use crate::core_arch::{simd::*, x86::*};
2424
use crate::intrinsics::simd::*;
@@ -170,158 +170,74 @@ pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
170170
#[stable(feature = "simd_x86", since = "1.27.0")]
171171
pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
172172
static_assert_uimm_bits!(IMM8, 8);
173+
174+
175+
// If palignr is shifting the pair of vectors more than the size of two
176+
// lanes, emit zero.
177+
if IMM8 >= 32 {
178+
return _mm256_setzero_si256();
179+
}
180+
// If palignr is shifting the pair of input vectors more than one lane,
181+
// but less than two lanes, convert to shifting in zeroes.
182+
let (a, b) = if IMM8 > 16 {
183+
(_mm256_setzero_si256(), a)
184+
} else {
185+
(a, b)
186+
};
173187
unsafe {
174-
// If palignr is shifting the pair of vectors more than the size of two
175-
// lanes, emit zero.
176-
if IMM8 >= 32 {
177-
return _mm256_setzero_si256();
178-
}
179-
// If palignr is shifting the pair of input vectors more than one lane,
180-
// but less than two lanes, convert to shifting in zeroes.
181-
let (a, b) = if IMM8 > 16 {
182-
(_mm256_setzero_si256(), a)
183-
} else {
184-
(a, b)
185-
};
186-
187-
let a = a.as_i8x32();
188-
let b = b.as_i8x32();
189-
190-
if IMM8 == 16 {
191-
return transmute(a);
192-
}
193-
194-
let r: i8x32 = match IMM8 % 16 {
195-
0 => simd_shuffle!(
196-
b,
197-
a,
198-
[
199-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
200-
22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
201-
],
202-
),
203-
1 => simd_shuffle!(
204-
b,
205-
a,
206-
[
207-
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
208-
23, 24, 25, 26, 27, 28, 29, 30, 31, 48,
209-
],
210-
),
211-
2 => simd_shuffle!(
212-
b,
213-
a,
214-
[
215-
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23,
216-
24, 25, 26, 27, 28, 29, 30, 31, 48, 49,
217-
],
218-
),
219-
3 => simd_shuffle!(
220-
b,
221-
a,
222-
[
223-
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23,
224-
24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
225-
],
226-
),
227-
4 => simd_shuffle!(
228-
b,
229-
a,
230-
[
231-
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24,
232-
25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
233-
],
234-
),
235-
5 => simd_shuffle!(
236-
b,
237-
a,
238-
[
239-
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25,
240-
26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
241-
],
242-
),
243-
6 => simd_shuffle!(
244-
b,
245-
a,
246-
[
247-
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26,
248-
27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
249-
],
250-
),
251-
7 => simd_shuffle!(
252-
b,
253-
a,
254-
[
255-
7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26,
256-
27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
257-
],
258-
),
259-
8 => simd_shuffle!(
260-
b,
261-
a,
262-
[
263-
8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27,
264-
28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
265-
],
266-
),
267-
9 => simd_shuffle!(
268-
b,
269-
a,
270-
[
271-
9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28,
272-
29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
273-
],
274-
),
275-
10 => simd_shuffle!(
276-
b,
277-
a,
278-
[
279-
10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29,
280-
30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
281-
],
282-
),
283-
11 => simd_shuffle!(
284-
b,
285-
a,
286-
[
287-
11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30,
288-
31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
289-
],
290-
),
291-
12 => simd_shuffle!(
292-
b,
293-
a,
294-
[
295-
12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31,
296-
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
297-
],
298-
),
299-
13 => simd_shuffle!(
300-
b,
301-
a,
302-
[
303-
13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48,
304-
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
305-
],
306-
),
307-
14 => simd_shuffle!(
308-
b,
309-
a,
310-
[
311-
14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49,
312-
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
313-
],
314-
),
315-
15 => simd_shuffle!(
316-
b,
317-
a,
318-
[
319-
15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50,
320-
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
321-
],
322-
),
323-
_ => unreachable_unchecked(),
324-
};
188+
if IMM8 == 16 {
189+
return transmute(a)
190+
}
191+
}
192+
const fn mask(shift: u32, i: u32) -> u32 {
193+
let shift = shift % 16;
194+
let mod_i = i%16;
195+
if mod_i < (16 - shift) {
196+
i + shift
197+
} else {
198+
i + 16 + shift
199+
}
200+
}
201+
202+
unsafe {
203+
let r: i8x32 = simd_shuffle!(
204+
b.as_i8x32(),
205+
a.as_i8x32(),
206+
[
207+
mask(IMM8 as u32, 0),
208+
mask(IMM8 as u32, 1),
209+
mask(IMM8 as u32, 2),
210+
mask(IMM8 as u32, 3),
211+
mask(IMM8 as u32, 4),
212+
mask(IMM8 as u32, 5),
213+
mask(IMM8 as u32, 6),
214+
mask(IMM8 as u32, 7),
215+
mask(IMM8 as u32, 8),
216+
mask(IMM8 as u32, 9),
217+
mask(IMM8 as u32, 10),
218+
mask(IMM8 as u32, 11),
219+
mask(IMM8 as u32, 12),
220+
mask(IMM8 as u32, 13),
221+
mask(IMM8 as u32, 14),
222+
mask(IMM8 as u32, 15),
223+
mask(IMM8 as u32, 16),
224+
mask(IMM8 as u32, 17),
225+
mask(IMM8 as u32, 18),
226+
mask(IMM8 as u32, 19),
227+
mask(IMM8 as u32, 20),
228+
mask(IMM8 as u32, 21),
229+
mask(IMM8 as u32, 22),
230+
mask(IMM8 as u32, 23),
231+
mask(IMM8 as u32, 24),
232+
mask(IMM8 as u32, 25),
233+
mask(IMM8 as u32, 26),
234+
mask(IMM8 as u32, 27),
235+
mask(IMM8 as u32, 28),
236+
mask(IMM8 as u32, 29),
237+
mask(IMM8 as u32, 30),
238+
mask(IMM8 as u32, 31),
239+
],
240+
);
325241
transmute(r)
326242
}
327243
}

0 commit comments

Comments
 (0)