Skip to content

Commit 7c7c2db

Browse files
tormolThomas Bahn
authored andcommitted
Improve AsciiChar compatibility with u8 and char
* Change is_whitespace() to also return true for AsciiCHar::VT and ::FF. * Rename a bunch of ctype methods and make them take self by reference: * is_digit() => is_ascii_digit() * is_hex() => is_ascii_hexdigit() + is_control() => is_ascii_control() + is_graphic() => is_ascii_graphic() + is_blank() => is_ascii_blank() + is_print() => is_ascii_printable() * is_punctuation() => is_ascii_punctuation() * Ddd identical _ascii methods when char also has methods without _ascii, except that the _ascii methods take self by reference: * is_ascii_alphabetic() = is_alphabetic() * is_ascii_alphanumeric() = is_alphanumeric() * is_ascii_uppercase() = is_uppercase() * is_ascii_lowercase() = is_lowercase() * Add is_digit() which takes base as parameter. * Add is_ascii_whitespace() which returns true for FF but not VT.
1 parent fc44e75 commit 7c7c2db

File tree

1 file changed

+161
-50
lines changed

1 file changed

+161
-50
lines changed

src/ascii_char.rs

Lines changed: 161 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,12 @@ impl AsciiChar {
383383
self as u8 as char
384384
}
385385

386-
// the following methods are like ctype, and the implementation is inspired by musl
386+
// the following methods are like ctype, and the implementation is inspired by musl.
387+
// The ascii_ methods take self by reference for maximum compatibility
388+
// with the corresponding methods on u8 and char.
389+
// It is bad for both usability and performance, but marking those
390+
// that doesn't have a non-ascii sibling #[inline] should
391+
// make the compiler optimize away the indirection.
387392

388393
/// Turns uppercase into lowercase, but also modifies '@' and '<'..='_'
389394
const fn to_not_upper(self) -> u8 {
@@ -396,56 +401,122 @@ impl AsciiChar {
396401
(self.to_not_upper() >= b'a') & (self.to_not_upper() <= b'z')
397402
}
398403

404+
/// Check if the character is a letter (a-z, A-Z).
405+
///
406+
/// This method is identical to [`is_alphabetic()`](#method.is_alphabetic)
407+
pub fn is_ascii_alphabetic(&self) -> bool {
408+
self.is_alphabetic()
409+
}
410+
411+
/// Check if the character is a digit in the given radix.
412+
///
413+
/// If the radix is always 10 or 16,
414+
/// [`is_ascii_digit()`](#method.is_ascii_digit) and
415+
/// [`is_ascii_hexdigit()`](#method.is_ascii_hexdigit()) will be faster.
416+
///
417+
/// # Panics
418+
///
419+
/// Radixes greater than 36 are not supported and will result in a panic.
420+
pub fn is_digit(self, radix: u32) -> bool {
421+
match (self as u8, radix) {
422+
(b'0'..=b'9', 0..=36) => u32::from(self as u8 - b'0') < radix,
423+
(b'a'..=b'z', 11..=36) => u32::from(self as u8 - b'a') < radix - 10,
424+
(b'A'..=b'Z', 11..=36) => u32::from(self as u8 - b'A') < radix - 10,
425+
(_, 0..=36) => false,
426+
(_, _) => panic!("radixes greater than 36 are not supported"),
427+
}
428+
}
429+
399430
/// Check if the character is a number (0-9)
431+
///
432+
/// # Examples
433+
/// ```
434+
/// # use ascii::AsciiChar;
435+
/// assert_eq!(AsciiChar::new('0').is_ascii_digit(), true);
436+
/// assert_eq!(AsciiChar::new('9').is_ascii_digit(), true);
437+
/// assert_eq!(AsciiChar::new('a').is_ascii_digit(), false);
438+
/// assert_eq!(AsciiChar::new('A').is_ascii_digit(), false);
439+
/// assert_eq!(AsciiChar::new('/').is_ascii_digit(), false);
440+
/// ```
400441
#[inline]
401-
pub const fn is_digit(self) -> bool {
402-
(self as u8 >= b'0') & (self as u8 <= b'9')
442+
pub const fn is_ascii_digit(&self) -> bool {
443+
(*self as u8 >= b'0') & (*self as u8 <= b'9')
403444
}
404445

405446
/// Check if the character is a letter or number
406447
#[inline]
407448
pub const fn is_alphanumeric(self) -> bool {
408-
self.is_alphabetic() | self.is_digit()
449+
self.is_alphabetic() | self.is_ascii_digit()
450+
}
451+
452+
/// Check if the character is a letter or number
453+
///
454+
/// This method is identical to [`is_alphanumeric()`](#method.is_alphanumeric)
455+
pub fn is_ascii_alphanumeric(&self) -> bool {
456+
self.is_alphanumeric()
409457
}
410458

411459
/// Check if the character is a space or horizontal tab
460+
///
461+
/// # Examples
462+
/// ```
463+
/// # use ascii::AsciiChar;
464+
/// assert!(AsciiChar::Space.is_ascii_blank());
465+
/// assert!(AsciiChar::Tab.is_ascii_blank());
466+
/// assert!(!AsciiChar::VT.is_ascii_blank());
467+
/// assert!(!AsciiChar::LineFeed.is_ascii_blank());
468+
/// assert!(!AsciiChar::CarriageReturn.is_ascii_blank());
469+
/// assert!(!AsciiChar::FF.is_ascii_blank());
470+
/// ```
412471
#[inline]
413-
pub const fn is_blank(self) -> bool {
472+
pub const fn is_ascii_blank(self) -> bool {
414473
(self as u8 == b' ') | (self as u8 == b'\t')
415474
}
416475

417-
/// Check if the character is a ' ', '\t', '\n' or '\r'
476+
/// Check if the character one of ' ', '\t', '\n', '\r',
477+
/// '\0xb' (vertical tab) or '\0xc' (form feed).
418478
#[inline]
419479
pub const fn is_whitespace(self) -> bool {
420-
self.is_blank() | (self as u8 == b'\n') | (self as u8 == b'\r')
480+
let b = self as u8;
481+
self.is_ascii_blank() | (b == b'\n') | (b == b'\r') | (b == 0x0b) | (b == 0x0c)
482+
}
483+
484+
/// Check if the character is a ' ', '\t', '\n', '\r' or '\0xc' (form feed).
485+
///
486+
/// This method is NOT identical to `is_whitespace()`.
487+
#[inline]
488+
pub const fn is_ascii_whitespace(self) -> bool {
489+
self.is_ascii_blank() | (self as u8 == b'\n') | (self as u8 == b'\r') | (self as u8 == 0x0c)
421490
}
422491

423492
/// Check if the character is a control character
424493
///
425494
/// # Examples
426495
/// ```
427496
/// # use ascii::AsciiChar;
428-
/// assert_eq!(AsciiChar::new('\0').is_control(), true);
429-
/// assert_eq!(AsciiChar::new('n').is_control(), false);
430-
/// assert_eq!(AsciiChar::new(' ').is_control(), false);
431-
/// assert_eq!(AsciiChar::new('\n').is_control(), true);
497+
/// assert_eq!(AsciiChar::new('\0').is_ascii_control(), true);
498+
/// assert_eq!(AsciiChar::new('n').is_ascii_control(), false);
499+
/// assert_eq!(AsciiChar::new(' ').is_ascii_control(), false);
500+
/// assert_eq!(AsciiChar::new('\n').is_ascii_control(), true);
501+
/// assert_eq!(AsciiChar::new('\t').is_ascii_control(), true);
502+
/// assert_eq!(AsciiChar::EOT.is_ascii_control(), true);
432503
/// ```
433504
#[inline]
434-
pub const fn is_control(self) -> bool {
435-
((self as u8) < b' ') | (self as u8 == 127)
505+
pub const fn is_ascii_control(&self) -> bool {
506+
((*self as u8) < b' ') | (*self as u8 == 127)
436507
}
437508

438509
/// Checks if the character is printable (except space)
439510
///
440511
/// # Examples
441512
/// ```
442513
/// # use ascii::AsciiChar;
443-
/// assert_eq!(AsciiChar::new('n').is_graph(), true);
444-
/// assert_eq!(AsciiChar::new(' ').is_graph(), false);
445-
/// assert_eq!(AsciiChar::new('\n').is_graph(), false);
514+
/// assert_eq!(AsciiChar::new('n').is_ascii_graphic(), true);
515+
/// assert_eq!(AsciiChar::new(' ').is_ascii_graphic(), false);
516+
/// assert_eq!(AsciiChar::new('\n').is_ascii_graphic(), false);
446517
/// ```
447518
#[inline]
448-
pub const fn is_graph(self) -> bool {
519+
pub const fn is_ascii_graphic(&self) -> bool {
449520
self.as_byte().wrapping_sub(b' ' + 1) < 0x5E
450521
}
451522

@@ -454,16 +525,16 @@ impl AsciiChar {
454525
/// # Examples
455526
/// ```
456527
/// # use ascii::AsciiChar;
457-
/// assert_eq!(AsciiChar::new('n').is_print(), true);
458-
/// assert_eq!(AsciiChar::new(' ').is_print(), true);
459-
/// assert_eq!(AsciiChar::new('\n').is_print(), false);
528+
/// assert_eq!(AsciiChar::new('n').is_ascii_printable(), true);
529+
/// assert_eq!(AsciiChar::new(' ').is_ascii_printable(), true);
530+
/// assert_eq!(AsciiChar::new('\n').is_ascii_printable(), false);
460531
/// ```
461532
#[inline]
462-
pub const fn is_print(self) -> bool {
533+
pub const fn is_ascii_printable(&self) -> bool {
463534
self.as_byte().wrapping_sub(b' ') < 0x5F
464535
}
465536

466-
/// Checks if the character is alphabetic and lowercase
537+
/// Checks if the character is alphabetic and lowercase (a-z).
467538
///
468539
/// # Examples
469540
/// ```
@@ -477,7 +548,14 @@ impl AsciiChar {
477548
self.as_byte().wrapping_sub(b'a') < 26
478549
}
479550

480-
/// Checks if the character is alphabetic and uppercase
551+
/// Checks if the character is alphabetic and lowercase (a-z).
552+
///
553+
/// This method is identical to [`is_lowercase()`](#method.is_lowercase)
554+
pub fn is_ascii_lowercase(&self) -> bool {
555+
self.is_lowercase()
556+
}
557+
558+
/// Checks if the character is alphabetic and uppercase (A-Z).
481559
///
482560
/// # Examples
483561
/// ```
@@ -491,35 +569,42 @@ impl AsciiChar {
491569
self.as_byte().wrapping_sub(b'A') < 26
492570
}
493571

572+
/// Checks if the character is alphabetic and uppercase (A-Z).
573+
///
574+
/// This method is identical to [`is_uppercase()`](#method.is_uppercase)
575+
pub fn is_ascii_uppercase(&self) -> bool {
576+
self.is_uppercase()
577+
}
578+
494579
/// Checks if the character is punctuation
495580
///
496581
/// # Examples
497582
/// ```
498583
/// # use ascii::AsciiChar;
499-
/// assert_eq!(AsciiChar::new('n').is_punctuation(), false);
500-
/// assert_eq!(AsciiChar::new(' ').is_punctuation(), false);
501-
/// assert_eq!(AsciiChar::new('_').is_punctuation(), true);
502-
/// assert_eq!(AsciiChar::new('~').is_punctuation(), true);
584+
/// assert_eq!(AsciiChar::new('n').is_ascii_punctuation(), false);
585+
/// assert_eq!(AsciiChar::new(' ').is_ascii_punctuation(), false);
586+
/// assert_eq!(AsciiChar::new('_').is_ascii_punctuation(), true);
587+
/// assert_eq!(AsciiChar::new('~').is_ascii_punctuation(), true);
503588
/// ```
504589
#[inline]
505-
pub const fn is_punctuation(self) -> bool {
506-
self.is_graph() & !self.is_alphanumeric()
590+
pub const fn is_ascii_punctuation(&self) -> bool {
591+
self.is_ascii_graphic() & !self.is_alphanumeric()
507592
}
508593

509594
/// Checks if the character is a valid hex digit
510595
///
511596
/// # Examples
512597
/// ```
513598
/// # use ascii::AsciiChar;
514-
/// assert_eq!(AsciiChar::new('5').is_hex(), true);
515-
/// assert_eq!(AsciiChar::new('a').is_hex(), true);
516-
/// assert_eq!(AsciiChar::new('F').is_hex(), true);
517-
/// assert_eq!(AsciiChar::new('G').is_hex(), false);
518-
/// assert_eq!(AsciiChar::new(' ').is_hex(), false);
599+
/// assert_eq!(AsciiChar::new('5').is_ascii_hexdigit(), true);
600+
/// assert_eq!(AsciiChar::new('a').is_ascii_hexdigit(), true);
601+
/// assert_eq!(AsciiChar::new('F').is_ascii_hexdigit(), true);
602+
/// assert_eq!(AsciiChar::new('G').is_ascii_hexdigit(), false);
603+
/// assert_eq!(AsciiChar::new(' ').is_ascii_hexdigit(), false);
519604
/// ```
520605
#[inline]
521-
pub const fn is_hex(self) -> bool {
522-
self.is_digit() | ((self as u8 | 0x20u8).wrapping_sub(b'a') < 6)
606+
pub const fn is_ascii_hexdigit(&self) -> bool {
607+
self.is_ascii_digit() | ((*self as u8 | 0x20u8).wrapping_sub(b'a') < 6)
523608
}
524609

525610
/// Unicode has printable versions of the ASCII control codes, like '␛'.
@@ -781,6 +866,12 @@ mod tests {
781866
assert!(generic('λ').is_err());
782867
}
783868

869+
#[test]
870+
fn as_byte_and_char() {
871+
assert_eq!(A.as_byte(), b'A');
872+
assert_eq!(A.as_char(), 'A');
873+
}
874+
784875
#[test]
785876
fn new_array_is_correct() {
786877
for byte in 0..128u8 {
@@ -789,26 +880,46 @@ mod tests {
789880
}
790881

791882
#[test]
792-
fn as_byte_and_char() {
793-
assert_eq!(A.as_byte(), b'A');
794-
assert_eq!(A.as_char(), 'A');
883+
fn is_all() {
884+
for byte in 0..128u8 {
885+
let ch = byte as char;
886+
let ascii = AsciiChar::new(ch);
887+
assert_eq!(ascii.is_alphabetic(), ch.is_alphabetic());
888+
assert_eq!(ascii.is_ascii_alphabetic(), ch.is_ascii_alphabetic());
889+
assert_eq!(ascii.is_alphanumeric(), ch.is_alphanumeric());
890+
assert_eq!(ascii.is_ascii_alphanumeric(), ch.is_ascii_alphanumeric());
891+
assert_eq!(ascii.is_digit(8), ch.is_digit(8), "is_digit(8) {:?}", ch);
892+
assert_eq!(ascii.is_digit(10), ch.is_digit(10), "is_digit(10) {:?}", ch);
893+
assert_eq!(ascii.is_digit(16), ch.is_digit(16), "is_digit(16) {:?}", ch);
894+
assert_eq!(ascii.is_digit(36), ch.is_digit(36), "is_digit(36) {:?}", ch);
895+
assert_eq!(ascii.is_ascii_digit(), ch.is_ascii_digit());
896+
assert_eq!(ascii.is_ascii_hexdigit(), ch.is_ascii_hexdigit());
897+
assert_eq!(ascii.is_ascii_control(), ch.is_ascii_control());
898+
assert_eq!(ascii.is_ascii_graphic(), ch.is_ascii_graphic());
899+
assert_eq!(ascii.is_ascii_punctuation(), ch.is_ascii_punctuation());
900+
assert_eq!(ascii.is_whitespace(), ch.is_whitespace(), "{:?} ({:#04x})", ch, byte);
901+
assert_eq!(ascii.is_ascii_whitespace(), ch.is_ascii_whitespace(), "{:?} ({:#04x})", ch, byte);
902+
assert_eq!(ascii.is_uppercase(), ch.is_uppercase());
903+
assert_eq!(ascii.is_ascii_uppercase(), ch.is_ascii_uppercase());
904+
assert_eq!(ascii.is_lowercase(), ch.is_lowercase());
905+
assert_eq!(ascii.is_ascii_lowercase(), ch.is_ascii_lowercase());
906+
assert_eq!(ascii.to_ascii_uppercase(), ch.to_ascii_uppercase());
907+
assert_eq!(ascii.to_ascii_lowercase(), ch.to_ascii_lowercase());
908+
}
795909
}
796910

797911
#[test]
798-
fn is_digit() {
799-
assert_eq!(_0.is_digit(), true);
800-
assert_eq!(_9.is_digit(), true);
801-
assert_eq!(O.is_digit(), false);
802-
assert_eq!(o.is_digit(), false);
803-
assert_eq!(Slash.is_digit(), false);
804-
assert_eq!(Colon.is_digit(), false);
912+
fn is_digit_strange_radixes() {
913+
assert_eq!(AsciiChar::_0.is_digit(0), '0'.is_digit(0));
914+
assert_eq!(AsciiChar::_0.is_digit(1), '0'.is_digit(1));
915+
assert_eq!(AsciiChar::_5.is_digit(5), '5'.is_digit(5));
916+
assert_eq!(AsciiChar::z.is_digit(35), 'z'.is_digit(35));
805917
}
806918

807919
#[test]
808-
fn is_control() {
809-
assert_eq!(US.is_control(), true);
810-
assert_eq!(DEL.is_control(), true);
811-
assert_eq!(Space.is_control(), false);
920+
#[should_panic]
921+
fn is_digit_bad_radix() {
922+
AsciiChar::_7.is_digit(37);
812923
}
813924

814925
#[test]

0 commit comments

Comments
 (0)