From 82ff02b031e70b9ce692f4793f8225da9111286c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Nicola?= Date: Mon, 21 Sep 2020 20:25:52 +0300 Subject: [PATCH 1/3] Make char::is_ascii_whitespace branchless on 64-bit --- library/core/src/char/methods.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 2603ecf428c7d..afb4b8995225f 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1544,9 +1544,18 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_whitespace(&self) -> bool { - match *self { - '\t' | '\n' | '\x0C' | '\r' | ' ' => true, - _ => false, + #[cfg(target_pointer_width = "64")] + { + // Inspired from https://pdimov.github.io/blog/2020/07/19/llvm-and-memchr/ + const MASK: u64 = 1 << b'\t' | 1 << b'\n' | 1 << b'\x0C' | 1 << b'\r' | 1 << b' '; + *self <= ' ' && 1u64 << (*self as u8) & MASK != 0 + } + #[cfg(not(target_pointer_width = "64"))] + { + match *self { + '\t' | '\n' | '\x0C' | '\r' | ' ' => true, + _ => false, + } } } From 960c03920dae153a95bc4ec1b8b2a9caf24d2a8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Nicola?= Date: Mon, 21 Sep 2020 21:18:23 +0300 Subject: [PATCH 2/3] Update mailmap --- .mailmap | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index fa0728bd79461..69b12834b4bf0 100644 --- a/.mailmap +++ b/.mailmap @@ -160,7 +160,8 @@ Kyle J Strand Kyle J Strand Kyle J Strand Kyle J Strand -Laurențiu Nicola +Laurențiu Nicola Laurentiu Nicola +Laurențiu Nicola Lee Jeffery Lee Jeffery Lee Wondong Lennart Kudling From f1e749519cc967408a4b59b9305ee1cc0eafb2f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Nicola?= Date: Tue, 22 Sep 2020 12:13:51 +0300 Subject: [PATCH 3/3] Use a 32-bit implementation --- library/core/src/char/methods.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index afb4b8995225f..e83669cc476b8 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1544,13 +1544,18 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_whitespace(&self) -> bool { - #[cfg(target_pointer_width = "64")] + #[cfg(not(target_pointer_width = "16"))] { // Inspired from https://pdimov.github.io/blog/2020/07/19/llvm-and-memchr/ - const MASK: u64 = 1 << b'\t' | 1 << b'\n' | 1 << b'\x0C' | 1 << b'\r' | 1 << b' '; - *self <= ' ' && 1u64 << (*self as u8) & MASK != 0 + const MASK: u32 = 1 << (b'\t' - 1) + | 1 << (b'\n' - 1) + | 1 << (b'\x0C' - 1) + | 1 << (b'\r' - 1) + | 1 << (b' ' - 1); + let ch = (*self as u32).wrapping_sub(1); + ch < (' ' as u32) && 1 << (ch as u8) & MASK != 0 } - #[cfg(not(target_pointer_width = "64"))] + #[cfg(target_pointer_width = "16")] { match *self { '\t' | '\n' | '\x0C' | '\r' | ' ' => true,