Skip to content

Commit 9284fe0

Browse files
committed
Add a lower bound check to unicode-table-generator output
This adds a dedicated check for the lower bound (if it is outside of ASCII range) to the output of the `unicode-table-generator` tool. This generalized the ASCII-only fast-path, but only for the `Grapheme_Extend` property for now, as that is the only one with a lower bound outside of ASCII.
1 parent 0c8dc98 commit 9284fe0

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

core/src/char/methods.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ impl char {
927927
#[must_use]
928928
#[inline]
929929
pub(crate) fn is_grapheme_extended(self) -> bool {
930-
self > '\x7f' && unicode::Grapheme_Extend(self)
930+
unicode::Grapheme_Extend(self)
931931
}
932932

933933
/// Returns `true` if this `char` has one of the general categories for numbers.

core/src/unicode/unicode_data.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,11 @@ pub mod grapheme_extend {
315315
15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, 4, 0, 4, 0, 7, 109, 7, 0, 96,
316316
128, 240, 0,
317317
];
318+
#[inline]
318319
pub fn lookup(c: char) -> bool {
320+
(c as u32) >= 0x300 && lookup_slow(c)
321+
}
322+
fn lookup_slow(c: char) -> bool {
319323
super::skip_search(
320324
c as u32,
321325
&SHORT_OFFSET_RUNS,

0 commit comments

Comments
 (0)