Skip to content

Commit 6d23662

Browse files
Add {gather,scatter}_select_unchecked
This unsafe variant allows the thinnest API, in case LLVM cannot perform loop-invariant code motion on a hot loop when the safe form is used. An unchecked variant could be added to other forms, but doesn't seem likely to improve anything, since it would just add heavier codegen.
1 parent 436ca7f commit 6d23662

File tree

1 file changed

+66
-6
lines changed

1 file changed

+66
-6
lines changed

crates/core_simd/src/vector.rs

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,42 @@ where
119119
idxs: Simd<usize, LANES>,
120120
or: Self,
121121
) -> Self {
122-
let mask = (mask & idxs.lanes_lt(Simd::splat(slice.len()))).to_int();
122+
let mask: Mask<isize, LANES> = mask & idxs.lanes_lt(Simd::splat(slice.len()));
123+
// SAFETY: We have masked-off out-of-bounds lanes.
124+
unsafe { Self::gather_select_unchecked(slice, mask, idxs, or) }
125+
}
126+
127+
/// Unsafe SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
128+
/// Masked indices instead select the value from the "or" vector.
129+
/// `gather_select_unchecked` is unsound if any unmasked index is out-of-bounds of the slice.
130+
/// ```
131+
/// # #![feature(portable_simd)]
132+
/// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
133+
/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
134+
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
135+
/// let idxs = Simd::from_array([9, 3, 0, 5]);
136+
/// let alt = Simd::from_array([-5, -4, -3, -2]);
137+
/// let mask = Mask::from_array([true, true, true, false]); // Note the final mask lane.
138+
/// // If this mask was used to gather, it would be unsound. Let's fix that.
139+
/// let mask = mask & idxs.lanes_lt(Simd::splat(vec.len()));
140+
///
141+
/// // We have masked the OOB lane, so it's safe to gather now.
142+
/// let result = unsafe { Simd::gather_select_unchecked(&vec, mask, idxs, alt) };
143+
/// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
144+
/// ```
145+
#[must_use]
146+
#[inline]
147+
pub unsafe fn gather_select_unchecked(
148+
slice: &[T],
149+
mask: Mask<isize, LANES>,
150+
idxs: Simd<usize, LANES>,
151+
or: Self,
152+
) -> Self {
123153
let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
124154
// Ferris forgive me, I have done pointer arithmetic here.
125155
let ptrs = base_ptr.wrapping_add(idxs);
126156
// SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
127-
unsafe { intrinsics::simd_gather(or, ptrs, mask) }
157+
unsafe { intrinsics::simd_gather(or, ptrs, mask.to_int()) }
128158
}
129159

130160
/// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
@@ -168,12 +198,42 @@ where
168198
mask: Mask<isize, LANES>,
169199
idxs: Simd<usize, LANES>,
170200
) {
171-
// We must construct our scatter mask before we derive a pointer!
172-
let mask = (mask & idxs.lanes_lt(Simd::splat(slice.len()))).to_int();
201+
let mask: Mask<isize, LANES> = mask & idxs.lanes_lt(Simd::splat(slice.len()));
202+
// SAFETY: We have masked-off out-of-bounds lanes.
203+
unsafe { self.scatter_select_unchecked(slice, mask, idxs) }
204+
}
205+
206+
/// Unsafe SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
207+
/// Out-of-bounds or masked indices are not written.
208+
/// `scatter_select_unchecked` is unsound if any unmasked index is out of bounds of the slice.
209+
/// `scatter_select_unchecked` writes "in order", so if the same index receives two writes, only the last is guaranteed.
210+
/// ```
211+
/// # #![feature(portable_simd)]
212+
/// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
213+
/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
214+
/// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
215+
/// let idxs = Simd::from_array([9, 3, 0, 0]);
216+
/// let vals = Simd::from_array([-27, 82, -41, 124]);
217+
/// let mask = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
218+
/// // If this mask was used to scatter, it would be unsound. Let's fix that.
219+
/// let mask = mask & idxs.lanes_lt(Simd::splat(vec.len()));
220+
///
221+
/// // We have masked the OOB lane, so it's safe to gather now.
222+
/// unsafe { vals.scatter_select_unchecked(&mut vec, mask, idxs); }
223+
/// // index 0's second write is masked, thus was omitted.
224+
/// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
225+
/// ```
226+
#[inline]
227+
pub unsafe fn scatter_select_unchecked(
228+
self,
229+
slice: &mut [T],
230+
mask: Mask<isize, LANES>,
231+
idxs: Simd<usize, LANES>,
232+
) {
173233
// SAFETY: This block works with *mut T derived from &mut 'a [T],
174234
// which means it is delicate in Rust's borrowing model, circa 2021:
175235
// &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
176-
// Even though this block is largely safe methods, it must be almost exactly this way
236+
// Even though this block is largely safe methods, it must be exactly this way
177237
// to prevent invalidating the raw ptrs while they're live.
178238
// Thus, entering this block requires all values to use being already ready:
179239
// 0. idxs we want to write to, which are used to construct the mask.
@@ -186,7 +246,7 @@ where
186246
// Ferris forgive me, I have done pointer arithmetic here.
187247
let ptrs = base_ptr.wrapping_add(idxs);
188248
// The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
189-
intrinsics::simd_scatter(self, ptrs, mask)
249+
intrinsics::simd_scatter(self, ptrs, mask.to_int())
190250
// Cleared ☢️ *mut T Zone
191251
}
192252
}

0 commit comments

Comments
 (0)