|
| 1 | +//! Character Tables |
| 2 | +use unicode_bidi::{bidi_class, BidiClass}; |
| 3 | + |
| 4 | +/// B.1 Commonly mapped to nothing |
| 5 | +pub fn commonly_mapped_to_nothing(c: char) -> bool { |
| 6 | + match c { |
| 7 | + '\u{00AD}' | '\u{034F}' | '\u{1806}' | '\u{180B}' | '\u{180C}' | '\u{180D}' | |
| 8 | + '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{2060}' | '\u{FE00}' | '\u{FE01}' | |
| 9 | + '\u{FE02}' | '\u{FE03}' | '\u{FE04}' | '\u{FE05}' | '\u{FE06}' | '\u{FE07}' | |
| 10 | + '\u{FE08}' | '\u{FE09}' | '\u{FE0A}' | '\u{FE0B}' | '\u{FE0C}' | '\u{FE0D}' | |
| 11 | + '\u{FE0E}' | '\u{FE0F}' | '\u{FEFF}' => true, |
| 12 | + _ => false, |
| 13 | + } |
| 14 | +} |
| 15 | + |
| 16 | +/// C.1.2 Non-ASCII space characters |
| 17 | +pub fn non_ascii_space_character(c: char) -> bool { |
| 18 | + match c { |
| 19 | + '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' | |
| 20 | + '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' | |
| 21 | + '\u{200A}' | '\u{200B}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true, |
| 22 | + _ => false, |
| 23 | + } |
| 24 | +} |
| 25 | + |
| 26 | +/// C.2.1 ASCII control characters |
| 27 | +pub fn ascii_control_character(c: char) -> bool { |
| 28 | + match c { |
| 29 | + '\u{0000}'...'\u{001F}' | |
| 30 | + '\u{007F}' => true, |
| 31 | + _ => false, |
| 32 | + } |
| 33 | +} |
| 34 | + |
| 35 | +/// C.2.2 Non-ASCII control characters |
| 36 | +pub fn non_ascii_control_character(c: char) -> bool { |
| 37 | + match c { |
| 38 | + '\u{0080}'...'\u{009F}' | |
| 39 | + '\u{06DD}' | |
| 40 | + '\u{070F}' | |
| 41 | + '\u{180E}' | |
| 42 | + '\u{200C}' | |
| 43 | + '\u{200D}' | |
| 44 | + '\u{2028}' | |
| 45 | + '\u{2029}' | |
| 46 | + '\u{2060}' | |
| 47 | + '\u{2061}' | |
| 48 | + '\u{2062}' | |
| 49 | + '\u{2063}' | |
| 50 | + '\u{206A}'...'\u{206F}' | |
| 51 | + '\u{FEFF}' | |
| 52 | + '\u{FFF9}'...'\u{FFFC}' | |
| 53 | + '\u{1D173}'...'\u{1D17A}' => true, |
| 54 | + _ => false, |
| 55 | + } |
| 56 | +} |
| 57 | + |
| 58 | +/// C.3 Private use |
| 59 | +pub fn private_use(c: char) -> bool { |
| 60 | + match c { |
| 61 | + '\u{E000}'...'\u{F8FF}' | |
| 62 | + '\u{F0000}'...'\u{FFFFD}' | |
| 63 | + '\u{100000}'...'\u{10FFFD}' => true, |
| 64 | + _ => false, |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +/// C.4 Non-character code points |
| 69 | +pub fn non_character_code_point(c: char) -> bool { |
| 70 | + match c { |
| 71 | + '\u{FDD0}'...'\u{FDEF}' | |
| 72 | + '\u{FFFE}'...'\u{FFFF}' | |
| 73 | + '\u{1FFFE}'...'\u{1FFFF}' | |
| 74 | + '\u{2FFFE}'...'\u{2FFFF}' | |
| 75 | + '\u{3FFFE}'...'\u{3FFFF}' | |
| 76 | + '\u{4FFFE}'...'\u{4FFFF}' | |
| 77 | + '\u{5FFFE}'...'\u{5FFFF}' | |
| 78 | + '\u{6FFFE}'...'\u{6FFFF}' | |
| 79 | + '\u{7FFFE}'...'\u{7FFFF}' | |
| 80 | + '\u{8FFFE}'...'\u{8FFFF}' | |
| 81 | + '\u{9FFFE}'...'\u{9FFFF}' | |
| 82 | + '\u{AFFFE}'...'\u{AFFFF}' | |
| 83 | + '\u{BFFFE}'...'\u{BFFFF}' | |
| 84 | + '\u{CFFFE}'...'\u{CFFFF}' | |
| 85 | + '\u{DFFFE}'...'\u{DFFFF}' | |
| 86 | + '\u{EFFFE}'...'\u{EFFFF}' | |
| 87 | + '\u{FFFFE}'...'\u{FFFFF}' | |
| 88 | + '\u{10FFFE}'...'\u{10FFFF}' => true, |
| 89 | + _ => false, |
| 90 | + } |
| 91 | +} |
| 92 | + |
| 93 | +/// C.5 Surrogate codes |
| 94 | +pub fn surrogate_code(c: char) -> bool { |
| 95 | + match c { |
| 96 | + // forbidden by rust |
| 97 | + /*'\u{D800}'...'\u{DFFF}' => true,*/ |
| 98 | + _ => false, |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +/// C.6 Inappropriate for plain text |
| 103 | +pub fn inappropriate_for_plain_text(c: char) -> bool { |
| 104 | + match c { |
| 105 | + '\u{FFF9}' | '\u{FFFA}' | '\u{FFFB}' | '\u{FFFC}' | '\u{FFFD}' => true, |
| 106 | + _ => false, |
| 107 | + } |
| 108 | +} |
| 109 | + |
| 110 | +/// C.7 Inappropriate for canonical representation |
| 111 | +pub fn inappropriate_for_canonical_representation(c: char) -> bool { |
| 112 | + match c { |
| 113 | + '\u{2FF0}'...'\u{2FFB}' => true, |
| 114 | + _ => false, |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | +/// C.8 Change display properties or are deprecated |
| 119 | +pub fn change_display_properties_or_deprecated(c: char) -> bool { |
| 120 | + match c { |
| 121 | + '\u{0340}' | '\u{0341}' | '\u{200E}' | '\u{200F}' | '\u{202A}' | '\u{202B}' | |
| 122 | + '\u{202C}' | '\u{202D}' | '\u{202E}' | '\u{206A}' | '\u{206B}' | '\u{206C}' | |
| 123 | + '\u{206D}' | '\u{206E}' | '\u{206F}' => true, |
| 124 | + _ => false, |
| 125 | + } |
| 126 | +} |
| 127 | + |
| 128 | +/// C.9 Tagging characters |
| 129 | +pub fn tagging_character(c: char) -> bool { |
| 130 | + match c { |
| 131 | + '\u{E0001}' | |
| 132 | + '\u{E0020}'...'\u{E007F}' => true, |
| 133 | + _ => false, |
| 134 | + } |
| 135 | +} |
| 136 | + |
| 137 | +/// D.1 Characters with bidirectional property "R" or "AL" |
| 138 | +pub fn bidi_r_or_al(c: char) -> bool { |
| 139 | + match bidi_class(c) { |
| 140 | + BidiClass::R | BidiClass::AL => true, |
| 141 | + _ => false, |
| 142 | + } |
| 143 | +} |
| 144 | + |
| 145 | +/// D.2 Characters with bidirectional property "L" |
| 146 | +pub fn bidi_l(c: char) -> bool { |
| 147 | + match bidi_class(c) { |
| 148 | + BidiClass::L => true, |
| 149 | + _ => false, |
| 150 | + } |
| 151 | +} |
0 commit comments