Skip to content

Commit c41dcae

Browse files
committed
Auto merge of rust-lang#76971 - bugadani:issue-75659, r=Amanieu
Refactor memchr to allow optimization Closes rust-lang#75659 The implementation already uses naive search if the slice if short enough, but the case is complicated enough to not be optimized away. This PR refactors memchr so that it exists early when the slice is short enough. Codegen-wise, as shown in rust-lang#75659, memchr was not inlined previously so the only way I could find to test this is to check if there is no memchr call. Let me know if there is a more robust solution here.
2 parents 99ad0be + f9cf45b commit c41dcae

File tree

3 files changed

+27
-16
lines changed

3 files changed

+27
-16
lines changed

core/src/slice/cmp.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,14 @@ where
268268
}
269269

270270
impl SliceContains for u8 {
271+
#[inline]
271272
fn slice_contains(&self, x: &[Self]) -> bool {
272273
memchr::memchr(*self, x).is_some()
273274
}
274275
}
275276

276277
impl SliceContains for i8 {
278+
#[inline]
277279
fn slice_contains(&self, x: &[Self]) -> bool {
278280
let byte = *self as u8;
279281
// SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`

core/src/slice/memchr.rs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
1212
// Use truncation.
1313
const LO_USIZE: usize = LO_U64 as usize;
1414
const HI_USIZE: usize = HI_U64 as usize;
15+
const USIZE_BYTES: usize = mem::size_of::<usize>();
1516

1617
/// Returns `true` if `x` contains any zero byte.
1718
///
@@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize {
3839
}
3940

4041
/// Returns the first index matching the byte `x` in `text`.
42+
#[inline]
4143
pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
44+
// Fast path for small slices
45+
if text.len() < 2 * USIZE_BYTES {
46+
return text.iter().position(|elt| *elt == x);
47+
}
48+
49+
memchr_general_case(x, text)
50+
}
51+
52+
fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
4253
// Scan for a single byte value by reading two `usize` words at a time.
4354
//
4455
// Split `text` in three parts
4556
// - unaligned initial part, before the first word aligned address in text
4657
// - body, scan by 2 words at a time
4758
// - the last remaining part, < 2 word size
59+
60+
// search up to an aligned boundary
4861
let len = text.len();
4962
let ptr = text.as_ptr();
50-
let usize_bytes = mem::size_of::<usize>();
63+
let mut offset = ptr.align_offset(USIZE_BYTES);
5164

52-
// search up to an aligned boundary
53-
let mut offset = ptr.align_offset(usize_bytes);
5465
if offset > 0 {
5566
offset = cmp::min(offset, len);
5667
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
6071

6172
// search the body of the text
6273
let repeated_x = repeat_byte(x);
74+
while offset <= len - 2 * USIZE_BYTES {
75+
unsafe {
76+
let u = *(ptr.add(offset) as *const usize);
77+
let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);
6378

64-
if len >= 2 * usize_bytes {
65-
while offset <= len - 2 * usize_bytes {
66-
unsafe {
67-
let u = *(ptr.add(offset) as *const usize);
68-
let v = *(ptr.add(offset + usize_bytes) as *const usize);
69-
70-
// break if there is a matching byte
71-
let zu = contains_zero_byte(u ^ repeated_x);
72-
let zv = contains_zero_byte(v ^ repeated_x);
73-
if zu || zv {
74-
break;
75-
}
79+
// break if there is a matching byte
80+
let zu = contains_zero_byte(u ^ repeated_x);
81+
let zv = contains_zero_byte(v ^ repeated_x);
82+
if zu || zv {
83+
break;
7684
}
77-
offset += usize_bytes * 2;
7885
}
86+
offset += USIZE_BYTES * 2;
7987
}
8088

8189
// Find the byte after the point the body loop stopped.

core/src/slice/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,7 @@ impl<T> [T] {
16361636
/// assert!(!v.iter().any(|e| e == "hi"));
16371637
/// ```
16381638
#[stable(feature = "rust1", since = "1.0.0")]
1639+
#[inline]
16391640
pub fn contains(&self, x: &T) -> bool
16401641
where
16411642
T: PartialEq,

0 commit comments

Comments
 (0)