Skip to content

Commit 10168fb

Browse files
calebzulawskiworkingjubilee
authored andcommitted
Add new swizzle API
Expand swizzle API and migrate existing functions. Add rotate_left, rotate_right. Hide implementation details Add simd_shuffle macro
1 parent a16b481 commit 10168fb

File tree

8 files changed

+491
-261
lines changed

8 files changed

+491
-261
lines changed

crates/core_simd/examples/matrix_inversion.rs

Lines changed: 59 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Code ported from the `packed_simd` crate
33
// Run this code with `cargo test --example matrix_inversion`
44
#![feature(array_chunks, portable_simd)]
5+
use core_simd::Which::*;
56
use core_simd::*;
67

78
// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
@@ -163,86 +164,81 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
163164
let m_2 = f32x4::from_array(m[2]);
164165
let m_3 = f32x4::from_array(m[3]);
165166

166-
// 2 argument shuffle, returns an f32x4
167-
// the first f32x4 is indexes 0..=3
168-
// the second f32x4 is indexed 4..=7
169-
let tmp1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_0, m_1);
170-
let row1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_2, m_3);
167+
const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
168+
const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
169+
const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
170+
const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
171171

172-
let row0 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row1);
173-
let row1 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row1, tmp1);
172+
let tmp = simd_shuffle!(m_0, m_1, SHUFFLE01);
173+
let row1 = simd_shuffle!(m_2, m_3, SHUFFLE01);
174174

175-
let tmp1 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_0, m_1);
176-
let row3 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_2, m_3);
177-
let row2 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row3);
178-
let row3 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row3, tmp1);
175+
let row0 = simd_shuffle!(tmp, row1, SHUFFLE02);
176+
let row1 = simd_shuffle!(row1, tmp, SHUFFLE13);
179177

180-
let tmp1 = row2 * row3;
181-
// there's no syntax for a 1 arg shuffle yet,
182-
// so we just pass the same f32x4 twice
183-
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
178+
let tmp = simd_shuffle!(m_0, m_1, SHUFFLE23);
179+
let row3 = simd_shuffle!(m_2, m_3, SHUFFLE23);
180+
let row2 = simd_shuffle!(tmp, row3, SHUFFLE02);
181+
let row3 = simd_shuffle!(row3, tmp, SHUFFLE13);
184182

185-
let minor0 = row1 * tmp1;
186-
let minor1 = row0 * tmp1;
187-
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
188-
let minor0 = (row1 * tmp1) - minor0;
189-
let minor1 = (row0 * tmp1) - minor1;
190-
let minor1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor1, minor1);
183+
let tmp = (row2 * row3).reverse().rotate_right::<2>();
184+
let minor0 = row1 * tmp;
185+
let minor1 = row0 * tmp;
186+
let tmp = tmp.rotate_right::<2>();
187+
let minor0 = (row1 * tmp) - minor0;
188+
let minor1 = (row0 * tmp) - minor1;
189+
let minor1 = minor1.rotate_right::<2>();
191190

192-
let tmp1 = row1 * row2;
193-
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
194-
let minor0 = (row3 * tmp1) + minor0;
195-
let minor3 = row0 * tmp1;
196-
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
191+
let tmp = (row1 * row2).reverse().rotate_right::<2>();
192+
let minor0 = (row3 * tmp) + minor0;
193+
let minor3 = row0 * tmp;
194+
let tmp = tmp.rotate_right::<2>();
197195

198-
let minor0 = minor0 - row3 * tmp1;
199-
let minor3 = row0 * tmp1 - minor3;
200-
let minor3 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor3, minor3);
196+
let minor0 = minor0 - row3 * tmp;
197+
let minor3 = row0 * tmp - minor3;
198+
let minor3 = minor3.rotate_right::<2>();
201199

202-
let tmp1 = row3 * f32x4::shuffle::<{ [2, 3, 0, 1] }>(row1, row1);
203-
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
204-
let row2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(row2, row2);
205-
let minor0 = row2 * tmp1 + minor0;
206-
let minor2 = row0 * tmp1;
207-
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
208-
let minor0 = minor0 - row2 * tmp1;
209-
let minor2 = row0 * tmp1 - minor2;
210-
let minor2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor2, minor2);
200+
let tmp = (row3 * row1.rotate_right::<2>())
201+
.reverse()
202+
.rotate_right::<2>();
203+
let row2 = row2.rotate_right::<2>();
204+
let minor0 = row2 * tmp + minor0;
205+
let minor2 = row0 * tmp;
206+
let tmp = tmp.rotate_right::<2>();
207+
let minor0 = minor0 - row2 * tmp;
208+
let minor2 = row0 * tmp - minor2;
209+
let minor2 = minor2.rotate_right::<2>();
211210

212-
let tmp1 = row0 * row1;
213-
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
214-
let minor2 = minor2 + row3 * tmp1;
215-
let minor3 = row2 * tmp1 - minor3;
216-
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
217-
let minor2 = row3 * tmp1 - minor2;
218-
let minor3 = minor3 - row2 * tmp1;
211+
let tmp = (row0 * row1).reverse().rotate_right::<2>();
212+
let minor2 = minor2 + row3 * tmp;
213+
let minor3 = row2 * tmp - minor3;
214+
let tmp = tmp.rotate_right::<2>();
215+
let minor2 = row3 * tmp - minor2;
216+
let minor3 = minor3 - row2 * tmp;
219217

220-
let tmp1 = row0 * row3;
221-
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
222-
let minor1 = minor1 - row2 * tmp1;
223-
let minor2 = row1 * tmp1 + minor2;
224-
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
225-
let minor1 = row2 * tmp1 + minor1;
226-
let minor2 = minor2 - row1 * tmp1;
218+
let tmp = (row0 * row3).reverse().rotate_right::<2>();
219+
let minor1 = minor1 - row2 * tmp;
220+
let minor2 = row1 * tmp + minor2;
221+
let tmp = tmp.rotate_right::<2>();
222+
let minor1 = row2 * tmp + minor1;
223+
let minor2 = minor2 - row1 * tmp;
227224

228-
let tmp1 = row0 * row2;
229-
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
230-
let minor1 = row3 * tmp1 + minor1;
231-
let minor3 = minor3 - row1 * tmp1;
232-
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
233-
let minor1 = minor1 - row3 * tmp1;
234-
let minor3 = row1 * tmp1 + minor3;
225+
let tmp = (row0 * row2).reverse().rotate_right::<2>();
226+
let minor1 = row3 * tmp + minor1;
227+
let minor3 = minor3 - row1 * tmp;
228+
let tmp = tmp.rotate_right::<2>();
229+
let minor1 = minor1 - row3 * tmp;
230+
let minor3 = row1 * tmp + minor3;
235231

236232
let det = row0 * minor0;
237-
let det = f32x4::shuffle::<{ [2, 3, 0, 1] }>(det, det) + det;
238-
let det = f32x4::shuffle::<{ [1, 0, 3, 2] }>(det, det) + det;
233+
let det = det.rotate_right::<2>() + det;
234+
let det = det.reverse().rotate_right::<2>() + det;
239235

240236
if det.horizontal_sum() == 0. {
241237
return None;
242238
}
243239
// calculate the reciprocal
244-
let tmp1 = f32x4::splat(1.0) / det;
245-
let det = tmp1 + tmp1 - det * tmp1 * tmp1;
240+
let tmp = f32x4::splat(1.0) / det;
241+
let det = tmp + tmp - det * tmp * tmp;
246242

247243
let res0 = minor0 * det;
248244
let res1 = minor1 * det;

crates/core_simd/src/intrinsics.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,7 @@ extern "platform-intrinsic" {
5454
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
5555

5656
// shufflevector
57-
pub(crate) fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
58-
pub(crate) fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
59-
pub(crate) fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
60-
pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
61-
pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
57+
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
6258

6359
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
6460
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);

crates/core_simd/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#![feature(
44
adt_const_params,
55
const_fn_trait_bound,
6+
const_panic,
67
platform_intrinsics,
78
repr_simd,
89
simd_ffi,

crates/core_simd/src/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#[macro_use]
2-
mod permute;
3-
#[macro_use]
42
mod reduction;
53

4+
#[macro_use]
5+
mod swizzle;
6+
67
pub(crate) mod intrinsics;
78

89
#[cfg(feature = "generic_const_exprs")]
@@ -27,5 +28,6 @@ pub mod simd {
2728
pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
2829
pub use crate::core_simd::masks::*;
2930
pub use crate::core_simd::select::Select;
31+
pub use crate::core_simd::swizzle::*;
3032
pub use crate::core_simd::vector::*;
3133
}

crates/core_simd/src/permute.rs

Lines changed: 0 additions & 154 deletions
This file was deleted.

0 commit comments

Comments
 (0)