Skip to content

Commit cba098b

Browse files
committed
Allow optimizing out panic_bounds_check in Unicode checks.
1 parent bb10e81 commit cba098b

File tree

1 file changed

+34
-39
lines changed

1 file changed

+34
-39
lines changed

core/src/unicode/unicode_data.rs

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -55,24 +55,31 @@ fn decode_length(short_offset_run_header: u32) -> usize {
5555
(short_offset_run_header >> 21) as usize
5656
}
5757

58+
/// # Safety
59+
///
60+
/// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
5861
#[inline(always)]
59-
fn skip_search<const SOR: usize, const OFFSETS: usize>(
60-
needle: u32,
62+
unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
63+
needle: char,
6164
short_offset_runs: &[u32; SOR],
6265
offsets: &[u8; OFFSETS],
6366
) -> bool {
64-
// Note that this *cannot* be past the end of the array, as the last
65-
// element is greater than std::char::MAX (the largest possible needle).
66-
//
67-
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
68-
// location cannot be past it, so Err(idx) != length either.
69-
//
70-
// This means that we can avoid bounds checking for the accesses below, too.
67+
let needle = needle as u32;
68+
7169
let last_idx =
7270
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
7371
Ok(idx) => idx + 1,
7472
Err(idx) => idx,
7573
};
74+
// SAFETY: `last_idx` *cannot* be past the end of the array, as the last
75+
// element is greater than `std::char::MAX` (the largest possible needle)
76+
// as guaranteed by the caller.
77+
//
78+
// So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
79+
// correct location cannot be past it, so `Err(idx) => idx != length` either.
80+
//
81+
// This means that we can avoid bounds checking for the accesses below, too.
82+
unsafe { crate::hint::assert_unchecked(last_idx < SOR) };
7683

7784
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
7885
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
@@ -169,11 +176,9 @@ pub mod alphabetic {
169176
0, 0, 0, 0, 5, 0, 0,
170177
];
171178
pub fn lookup(c: char) -> bool {
172-
super::skip_search(
173-
c as u32,
174-
&SHORT_OFFSET_RUNS,
175-
&OFFSETS,
176-
)
179+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
180+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
181+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
177182
}
178183
}
179184

@@ -222,11 +227,9 @@ pub mod case_ignorable {
222227
1, 61, 4, 0, 5, 254, 2, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
223228
];
224229
pub fn lookup(c: char) -> bool {
225-
super::skip_search(
226-
c as u32,
227-
&SHORT_OFFSET_RUNS,
228-
&OFFSETS,
229-
)
230+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
231+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
232+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
230233
}
231234
}
232235

@@ -252,11 +255,9 @@ pub mod cased {
252255
8, 0, 10, 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
253256
];
254257
pub fn lookup(c: char) -> bool {
255-
super::skip_search(
256-
c as u32,
257-
&SHORT_OFFSET_RUNS,
258-
&OFFSETS,
259-
)
258+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
259+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
260+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
260261
}
261262
}
262263

@@ -269,11 +270,9 @@ pub mod cc {
269270
0, 32, 95, 33, 0,
270271
];
271272
pub fn lookup(c: char) -> bool {
272-
super::skip_search(
273-
c as u32,
274-
&SHORT_OFFSET_RUNS,
275-
&OFFSETS,
276-
)
273+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
274+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
275+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
277276
}
278277
}
279278

@@ -320,11 +319,9 @@ pub mod grapheme_extend {
320319
(c as u32) >= 0x300 && lookup_slow(c)
321320
}
322321
fn lookup_slow(c: char) -> bool {
323-
super::skip_search(
324-
c as u32,
325-
&SHORT_OFFSET_RUNS,
326-
&OFFSETS,
327-
)
322+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
323+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
324+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
328325
}
329326
}
330327

@@ -459,11 +456,9 @@ pub mod n {
459456
10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0,
460457
];
461458
pub fn lookup(c: char) -> bool {
462-
super::skip_search(
463-
c as u32,
464-
&SHORT_OFFSET_RUNS,
465-
&OFFSETS,
466-
)
459+
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
460+
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
461+
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
467462
}
468463
}
469464

0 commit comments

Comments
 (0)