|
2 | 2 | // Code ported from the `packed_simd` crate
|
3 | 3 | // Run this code with `cargo test --example matrix_inversion`
|
4 | 4 | #![feature(array_chunks, portable_simd)]
|
| 5 | +use core_simd::Which::*; |
5 | 6 | use core_simd::*;
|
6 | 7 |
|
7 | 8 | // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
|
@@ -163,86 +164,81 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
|
163 | 164 | let m_2 = f32x4::from_array(m[2]);
|
164 | 165 | let m_3 = f32x4::from_array(m[3]);
|
165 | 166 |
|
166 |
| - // 2 argument shuffle, returns an f32x4 |
167 |
| - // the first f32x4 is indexes 0..=3 |
168 |
| - // the second f32x4 is indexed 4..=7 |
169 |
| - let tmp1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_0, m_1); |
170 |
| - let row1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_2, m_3); |
| 167 | + const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)]; |
| 168 | + const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)]; |
| 169 | + const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)]; |
| 170 | + const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)]; |
171 | 171 |
|
172 |
| - let row0 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row1); |
173 |
| - let row1 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row1, tmp1); |
| 172 | + let tmp = simd_shuffle!(m_0, m_1, SHUFFLE01); |
| 173 | + let row1 = simd_shuffle!(m_2, m_3, SHUFFLE01); |
174 | 174 |
|
175 |
| - let tmp1 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_0, m_1); |
176 |
| - let row3 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_2, m_3); |
177 |
| - let row2 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row3); |
178 |
| - let row3 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row3, tmp1); |
| 175 | + let row0 = simd_shuffle!(tmp, row1, SHUFFLE02); |
| 176 | + let row1 = simd_shuffle!(row1, tmp, SHUFFLE13); |
179 | 177 |
|
180 |
| - let tmp1 = row2 * row3; |
181 |
| - // there's no syntax for a 1 arg shuffle yet, |
182 |
| - // so we just pass the same f32x4 twice |
183 |
| - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); |
| 178 | + let tmp = simd_shuffle!(m_0, m_1, SHUFFLE23); |
| 179 | + let row3 = simd_shuffle!(m_2, m_3, SHUFFLE23); |
| 180 | + let row2 = simd_shuffle!(tmp, row3, SHUFFLE02); |
| 181 | + let row3 = simd_shuffle!(row3, tmp, SHUFFLE13); |
184 | 182 |
|
185 |
| - let minor0 = row1 * tmp1; |
186 |
| - let minor1 = row0 * tmp1; |
187 |
| - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); |
188 |
| - let minor0 = (row1 * tmp1) - minor0; |
189 |
| - let minor1 = (row0 * tmp1) - minor1; |
190 |
| - let minor1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor1, minor1); |
| 183 | + let tmp = (row2 * row3).reverse().rotate_right::<2>(); |
| 184 | + let minor0 = row1 * tmp; |
| 185 | + let minor1 = row0 * tmp; |
| 186 | + let tmp = tmp.rotate_right::<2>(); |
| 187 | + let minor0 = (row1 * tmp) - minor0; |
| 188 | + let minor1 = (row0 * tmp) - minor1; |
| 189 | + let minor1 = minor1.rotate_right::<2>(); |
191 | 190 |
|
192 |
| - let tmp1 = row1 * row2; |
193 |
| - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); |
194 |
| - let minor0 = (row3 * tmp1) + minor0; |
195 |
| - let minor3 = row0 * tmp1; |
196 |
| - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); |
| 191 | + let tmp = (row1 * row2).reverse().rotate_right::<2>(); |
| 192 | + let minor0 = (row3 * tmp) + minor0; |
| 193 | + let minor3 = row0 * tmp; |
| 194 | + let tmp = tmp.rotate_right::<2>(); |
197 | 195 |
|
198 |
| - let minor0 = minor0 - row3 * tmp1; |
199 |
| - let minor3 = row0 * tmp1 - minor3; |
200 |
| - let minor3 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor3, minor3); |
| 196 | + let minor0 = minor0 - row3 * tmp; |
| 197 | + let minor3 = row0 * tmp - minor3; |
| 198 | + let minor3 = minor3.rotate_right::<2>(); |
201 | 199 |
|
202 |
| - let tmp1 = row3 * f32x4::shuffle::<{ [2, 3, 0, 1] }>(row1, row1); |
203 |
| - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); |
204 |
| - let row2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(row2, row2); |
205 |
| - let minor0 = row2 * tmp1 + minor0; |
206 |
| - let minor2 = row0 * tmp1; |
207 |
| - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); |
208 |
| - let minor0 = minor0 - row2 * tmp1; |
209 |
| - let minor2 = row0 * tmp1 - minor2; |
210 |
| - let minor2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor2, minor2); |
| 200 | + let tmp = (row3 * row1.rotate_right::<2>()) |
| 201 | + .reverse() |
| 202 | + .rotate_right::<2>(); |
| 203 | + let row2 = row2.rotate_right::<2>(); |
| 204 | + let minor0 = row2 * tmp + minor0; |
| 205 | + let minor2 = row0 * tmp; |
| 206 | + let tmp = tmp.rotate_right::<2>(); |
| 207 | + let minor0 = minor0 - row2 * tmp; |
| 208 | + let minor2 = row0 * tmp - minor2; |
| 209 | + let minor2 = minor2.rotate_right::<2>(); |
211 | 210 |
|
212 |
| - let tmp1 = row0 * row1; |
213 |
| - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); |
214 |
| - let minor2 = minor2 + row3 * tmp1; |
215 |
| - let minor3 = row2 * tmp1 - minor3; |
216 |
| - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); |
217 |
| - let minor2 = row3 * tmp1 - minor2; |
218 |
| - let minor3 = minor3 - row2 * tmp1; |
| 211 | + let tmp = (row0 * row1).reverse().rotate_right::<2>(); |
| 212 | + let minor2 = minor2 + row3 * tmp; |
| 213 | + let minor3 = row2 * tmp - minor3; |
| 214 | + let tmp = tmp.rotate_right::<2>(); |
| 215 | + let minor2 = row3 * tmp - minor2; |
| 216 | + let minor3 = minor3 - row2 * tmp; |
219 | 217 |
|
220 |
| - let tmp1 = row0 * row3; |
221 |
| - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); |
222 |
| - let minor1 = minor1 - row2 * tmp1; |
223 |
| - let minor2 = row1 * tmp1 + minor2; |
224 |
| - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); |
225 |
| - let minor1 = row2 * tmp1 + minor1; |
226 |
| - let minor2 = minor2 - row1 * tmp1; |
| 218 | + let tmp = (row0 * row3).reverse().rotate_right::<2>(); |
| 219 | + let minor1 = minor1 - row2 * tmp; |
| 220 | + let minor2 = row1 * tmp + minor2; |
| 221 | + let tmp = tmp.rotate_right::<2>(); |
| 222 | + let minor1 = row2 * tmp + minor1; |
| 223 | + let minor2 = minor2 - row1 * tmp; |
227 | 224 |
|
228 |
| - let tmp1 = row0 * row2; |
229 |
| - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); |
230 |
| - let minor1 = row3 * tmp1 + minor1; |
231 |
| - let minor3 = minor3 - row1 * tmp1; |
232 |
| - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); |
233 |
| - let minor1 = minor1 - row3 * tmp1; |
234 |
| - let minor3 = row1 * tmp1 + minor3; |
| 225 | + let tmp = (row0 * row2).reverse().rotate_right::<2>(); |
| 226 | + let minor1 = row3 * tmp + minor1; |
| 227 | + let minor3 = minor3 - row1 * tmp; |
| 228 | + let tmp = tmp.rotate_right::<2>(); |
| 229 | + let minor1 = minor1 - row3 * tmp; |
| 230 | + let minor3 = row1 * tmp + minor3; |
235 | 231 |
|
236 | 232 | let det = row0 * minor0;
|
237 |
| - let det = f32x4::shuffle::<{ [2, 3, 0, 1] }>(det, det) + det; |
238 |
| - let det = f32x4::shuffle::<{ [1, 0, 3, 2] }>(det, det) + det; |
| 233 | + let det = det.rotate_right::<2>() + det; |
| 234 | + let det = det.reverse().rotate_right::<2>() + det; |
239 | 235 |
|
240 | 236 | if det.horizontal_sum() == 0. {
|
241 | 237 | return None;
|
242 | 238 | }
|
243 | 239 | // calculate the reciprocal
|
244 |
| - let tmp1 = f32x4::splat(1.0) / det; |
245 |
| - let det = tmp1 + tmp1 - det * tmp1 * tmp1; |
| 240 | + let tmp = f32x4::splat(1.0) / det; |
| 241 | + let det = tmp + tmp - det * tmp * tmp; |
246 | 242 |
|
247 | 243 | let res0 = minor0 * det;
|
248 | 244 | let res1 = minor1 * det;
|
|
0 commit comments