Skip to content

Commit 0cd80d7

Browse files
committed
SASLprep implementation
1 parent 50c39a8 commit 0cd80d7

File tree

3 files changed

+256
-2
lines changed

3 files changed

+256
-2
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ version = "0.1.0"
44
authors = ["Steven Fackler <sfackler@gmail.com>"]
55

66
[dependencies]
7+
unicode-bidi = "0.3"
8+
unicode-normalization = "0.1"

src/lib.rs

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,107 @@
1+
//! An implementation of the "stringprep" algorithm defined in [RFC 3454][].
2+
//!
3+
//! [RFC 3454]: https://tools.ietf.org/html/rfc3454
4+
#![warn(missing_docs)]
5+
extern crate unicode_bidi;
6+
extern crate unicode_normalization;
7+
8+
use std::ascii::AsciiExt;
9+
use std::borrow::Cow;
10+
use std::error;
11+
use std::fmt;
12+
use unicode_normalization::UnicodeNormalization;
13+
14+
pub mod tables;
15+
16+
#[derive(Debug)]
17+
enum ErrorCause {
18+
ProhibitedCharacter(char),
19+
ProhibitedBidirectionalText,
20+
}
21+
22+
/// An error performing the stringprep algorithm.
23+
#[derive(Debug)]
24+
pub struct Error(ErrorCause);
25+
26+
impl fmt::Display for Error {
27+
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
28+
match self.0 {
29+
ErrorCause::ProhibitedCharacter(c) => write!(fmt, "prohibited character `{}`", c),
30+
ErrorCause::ProhibitedBidirectionalText => write!(fmt, "prohibited bidirectional text"),
31+
}
32+
}
33+
}
34+
35+
impl error::Error for Error {
36+
fn description(&self) -> &str {
37+
"error performing stringprep algorithm"
38+
}
39+
}
40+
41+
/// Prepares a string with the SASLprep profile of the stringprep algorithm.
42+
///
43+
/// SASLprep is defined in [RFC 4013][].
44+
///
45+
/// [RFC 4013]: https://tools.ietf.org/html/rfc4013
46+
pub fn saslprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
47+
// fast path for ascii text
48+
if s.chars()
49+
.all(|c| c.is_ascii() && !tables::ascii_control_character(c)) {
50+
return Ok(Cow::Borrowed(s));
51+
}
52+
53+
let mapped = s.chars()
54+
.map(|c| if tables::non_ascii_space_character(c) {
55+
' '
56+
} else {
57+
c
58+
})
59+
.filter(|&c| !tables::commonly_mapped_to_nothing(c));
60+
61+
let normalized = mapped.nfkc().collect::<String>();
62+
63+
let prohibited = normalized
64+
.chars()
65+
.filter(|&c| {
66+
tables::non_ascii_space_character(c) || tables::ascii_control_character(c) ||
67+
tables::non_ascii_control_character(c) || tables::private_use(c) ||
68+
tables::non_character_code_point(c) ||
69+
tables::surrogate_code(c) || tables::inappropriate_for_plain_text(c) ||
70+
tables::inappropriate_for_canonical_representation(c) ||
71+
tables::change_display_properties_or_deprecated(c) ||
72+
tables::tagging_character(c)
73+
})
74+
.next();
75+
if let Some(c) = prohibited {
76+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
77+
}
78+
79+
if normalized.contains(tables::bidi_r_or_al) {
80+
if normalized.contains(tables::bidi_l) {
81+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
82+
}
83+
84+
if !tables::bidi_r_or_al(normalized.chars().next().unwrap()) ||
85+
!tables::bidi_r_or_al(normalized.chars().next_back().unwrap()) {
86+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
87+
}
88+
}
89+
90+
Ok(Cow::Owned(normalized))
91+
}
92+
193
#[cfg(test)]
2-
mod tests {
94+
mod test {
95+
use super::*;
96+
397
#[test]
4-
fn it_works() {
98+
fn saslprep_examples() {
99+
assert_eq!(saslprep("I\u{00AD}X").unwrap(), "IX");
100+
assert_eq!(saslprep("user").unwrap(), "user");
101+
assert_eq!(saslprep("USER").unwrap(), "USER");
102+
assert_eq!(saslprep("\u{00AA}").unwrap(), "a");
103+
assert_eq!(saslprep("\u{2168}").unwrap(), "IX");
104+
assert!(saslprep("\u{0007}").is_err());
105+
assert!(saslprep("\u{0627}\u{0031}").is_err());
5106
}
6107
}

src/tables.rs

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
//! Character Tables
2+
use unicode_bidi::{bidi_class, BidiClass};
3+
4+
/// B.1 Commonly mapped to nothing
5+
pub fn commonly_mapped_to_nothing(c: char) -> bool {
6+
match c {
7+
'\u{00AD}' | '\u{034F}' | '\u{1806}' | '\u{180B}' | '\u{180C}' | '\u{180D}' |
8+
'\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{2060}' | '\u{FE00}' | '\u{FE01}' |
9+
'\u{FE02}' | '\u{FE03}' | '\u{FE04}' | '\u{FE05}' | '\u{FE06}' | '\u{FE07}' |
10+
'\u{FE08}' | '\u{FE09}' | '\u{FE0A}' | '\u{FE0B}' | '\u{FE0C}' | '\u{FE0D}' |
11+
'\u{FE0E}' | '\u{FE0F}' | '\u{FEFF}' => true,
12+
_ => false,
13+
}
14+
}
15+
16+
/// C.1.2 Non-ASCII space characters
17+
pub fn non_ascii_space_character(c: char) -> bool {
18+
match c {
19+
'\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' |
20+
'\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' |
21+
'\u{200A}' | '\u{200B}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
22+
_ => false,
23+
}
24+
}
25+
26+
/// C.2.1 ASCII control characters
27+
pub fn ascii_control_character(c: char) -> bool {
28+
match c {
29+
'\u{0000}'...'\u{001F}' |
30+
'\u{007F}' => true,
31+
_ => false,
32+
}
33+
}
34+
35+
/// C.2.2 Non-ASCII control characters
36+
pub fn non_ascii_control_character(c: char) -> bool {
37+
match c {
38+
'\u{0080}'...'\u{009F}' |
39+
'\u{06DD}' |
40+
'\u{070F}' |
41+
'\u{180E}' |
42+
'\u{200C}' |
43+
'\u{200D}' |
44+
'\u{2028}' |
45+
'\u{2029}' |
46+
'\u{2060}' |
47+
'\u{2061}' |
48+
'\u{2062}' |
49+
'\u{2063}' |
50+
'\u{206A}'...'\u{206F}' |
51+
'\u{FEFF}' |
52+
'\u{FFF9}'...'\u{FFFC}' |
53+
'\u{1D173}'...'\u{1D17A}' => true,
54+
_ => false,
55+
}
56+
}
57+
58+
/// C.3 Private use
59+
pub fn private_use(c: char) -> bool {
60+
match c {
61+
'\u{E000}'...'\u{F8FF}' |
62+
'\u{F0000}'...'\u{FFFFD}' |
63+
'\u{100000}'...'\u{10FFFD}' => true,
64+
_ => false,
65+
}
66+
}
67+
68+
/// C.4 Non-character code points
69+
pub fn non_character_code_point(c: char) -> bool {
70+
match c {
71+
'\u{FDD0}'...'\u{FDEF}' |
72+
'\u{FFFE}'...'\u{FFFF}' |
73+
'\u{1FFFE}'...'\u{1FFFF}' |
74+
'\u{2FFFE}'...'\u{2FFFF}' |
75+
'\u{3FFFE}'...'\u{3FFFF}' |
76+
'\u{4FFFE}'...'\u{4FFFF}' |
77+
'\u{5FFFE}'...'\u{5FFFF}' |
78+
'\u{6FFFE}'...'\u{6FFFF}' |
79+
'\u{7FFFE}'...'\u{7FFFF}' |
80+
'\u{8FFFE}'...'\u{8FFFF}' |
81+
'\u{9FFFE}'...'\u{9FFFF}' |
82+
'\u{AFFFE}'...'\u{AFFFF}' |
83+
'\u{BFFFE}'...'\u{BFFFF}' |
84+
'\u{CFFFE}'...'\u{CFFFF}' |
85+
'\u{DFFFE}'...'\u{DFFFF}' |
86+
'\u{EFFFE}'...'\u{EFFFF}' |
87+
'\u{FFFFE}'...'\u{FFFFF}' |
88+
'\u{10FFFE}'...'\u{10FFFF}' => true,
89+
_ => false,
90+
}
91+
}
92+
93+
/// C.5 Surrogate codes
94+
pub fn surrogate_code(c: char) -> bool {
95+
match c {
96+
// forbidden by rust
97+
/*'\u{D800}'...'\u{DFFF}' => true,*/
98+
_ => false,
99+
}
100+
}
101+
102+
/// C.6 Inappropriate for plain text
103+
pub fn inappropriate_for_plain_text(c: char) -> bool {
104+
match c {
105+
'\u{FFF9}' | '\u{FFFA}' | '\u{FFFB}' | '\u{FFFC}' | '\u{FFFD}' => true,
106+
_ => false,
107+
}
108+
}
109+
110+
/// C.7 Inappropriate for canonical representation
111+
pub fn inappropriate_for_canonical_representation(c: char) -> bool {
112+
match c {
113+
'\u{2FF0}'...'\u{2FFB}' => true,
114+
_ => false,
115+
}
116+
}
117+
118+
/// C.8 Change display properties or are deprecated
119+
pub fn change_display_properties_or_deprecated(c: char) -> bool {
120+
match c {
121+
'\u{0340}' | '\u{0341}' | '\u{200E}' | '\u{200F}' | '\u{202A}' | '\u{202B}' |
122+
'\u{202C}' | '\u{202D}' | '\u{202E}' | '\u{206A}' | '\u{206B}' | '\u{206C}' |
123+
'\u{206D}' | '\u{206E}' | '\u{206F}' => true,
124+
_ => false,
125+
}
126+
}
127+
128+
/// C.9 Tagging characters
129+
pub fn tagging_character(c: char) -> bool {
130+
match c {
131+
'\u{E0001}' |
132+
'\u{E0020}'...'\u{E007F}' => true,
133+
_ => false,
134+
}
135+
}
136+
137+
/// D.1 Characters with bidirectional property "R" or "AL"
138+
pub fn bidi_r_or_al(c: char) -> bool {
139+
match bidi_class(c) {
140+
BidiClass::R | BidiClass::AL => true,
141+
_ => false,
142+
}
143+
}
144+
145+
/// D.2 Characters with bidirectional property "L"
146+
pub fn bidi_l(c: char) -> bool {
147+
match bidi_class(c) {
148+
BidiClass::L => true,
149+
_ => false,
150+
}
151+
}

0 commit comments

Comments
 (0)