Skip to content

Commit f793c34

Browse files
authored
Merge pull request #8 from JonathanWilbur/master
feat: x520_prep
2 parents 0696f19 + fad6f83 commit f793c34

File tree

3 files changed

+110
-1
lines changed

3 files changed

+110
-1
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ documentation = "https://docs.rs/stringprep/0.1.2/stringprep"
99
readme = "README.md"
1010

1111
[dependencies]
12+
finl_unicode = "1.2.0"
1213
unicode-bidi = "0.3"
1314
unicode-normalization = "0.1"

circle.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ jobs:
33
build:
44
working_directory: ~/build
55
docker:
6-
- image: rust:1.47.0
6+
- image: rust:1.56.0 # 1.56.0 = Rust 2021.
77
steps:
88
- checkout
99
- restore_cache:

src/lib.rs

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
#![warn(missing_docs)]
66
extern crate unicode_bidi;
77
extern crate unicode_normalization;
8+
extern crate finl_unicode;
89

910
use std::borrow::Cow;
1011
use std::fmt;
12+
use finl_unicode::categories::CharacterCategories;
1113
use unicode_normalization::UnicodeNormalization;
1214

1315
mod rfc3454;
@@ -20,6 +22,10 @@ enum ErrorCause {
2022
ProhibitedCharacter(char),
2123
/// Violates stringprep rules for bidirectional text.
2224
ProhibitedBidirectionalText,
25+
/// Starts with a combining character
26+
StartsWithCombiningCharacter,
27+
/// Empty String
28+
EmptyString,
2329
}
2430

2531
/// An error performing the stringprep algorithm.
@@ -31,6 +37,8 @@ impl fmt::Display for Error {
3137
match self.0 {
3238
ErrorCause::ProhibitedCharacter(c) => write!(fmt, "prohibited character `{}`", c),
3339
ErrorCause::ProhibitedBidirectionalText => write!(fmt, "prohibited bidirectional text"),
40+
ErrorCause::StartsWithCombiningCharacter => write!(fmt, "starts with combining character"),
41+
ErrorCause::EmptyString => write!(fmt, "empty string"),
3442
}
3543
}
3644
}
@@ -293,6 +301,90 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
293301
Ok(Cow::Owned(normalized))
294302
}
295303

304+
/// Determines if `c` is to be removed according to section 7.2 of
305+
/// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
306+
fn x520_mapped_to_nothing(c: char) -> bool {
307+
match c {
308+
'\u{00AD}' | '\u{1806}' | '\u{034F}' | '\u{180B}'..='\u{180D}' |
309+
'\u{FE00}'..='\u{FE0F}' | '\u{FFFC}' | '\u{200B}' => true,
310+
// Technically control characters, but mapped to whitespace in X.520.
311+
'\u{09}' | '\u{0A}'..='\u{0D}' | '\u{85}' => false,
312+
_ => c.is_control(),
313+
}
314+
}
315+
316+
/// Determines if `c` is to be replaced by SPACE (0x20) according to section 7.2 of
317+
/// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
318+
fn x520_mapped_to_space(c: char) -> bool {
319+
match c {
320+
'\u{09}' | '\u{0A}'..='\u{0D}' | '\u{85}' => true,
321+
_ => c.is_separator(),
322+
}
323+
}
324+
325+
/// Prepares a string according to the procedures described in Section 7 of
326+
/// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
327+
///
328+
/// Note that this function does _not_ remove leading, trailing, or inner
329+
/// spaces as described in Section 7.6, because the characters needing removal
330+
/// will vary across the matching rules and ASN.1 syntaxes used.
331+
pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
332+
if s.len() == 0 {
333+
return Err(Error(ErrorCause::EmptyString));
334+
}
335+
if s.chars().all(|c| matches!(c, ' '..='~') && (!case_fold || c.is_ascii_lowercase())) {
336+
return Ok(Cow::Borrowed(s));
337+
}
338+
339+
// 1. Transcode
340+
// Already done because &str is enforced to be Unicode.
341+
342+
// 2. Map
343+
let mapped = s.chars()
344+
.filter(|&c| !x520_mapped_to_nothing(c))
345+
.map(|c| if x520_mapped_to_space(c) { ' ' } else { c });
346+
347+
// 3. Normalize
348+
let normalized = if case_fold {
349+
mapped
350+
.flat_map(tables::case_fold_for_nfkc)
351+
.collect::<String>()
352+
} else {
353+
mapped.nfkc().collect::<String>()
354+
};
355+
356+
// 4. Prohibit
357+
let prohibited = normalized.chars().find(|&c| tables::unassigned_code_point(c)
358+
|| tables::private_use(c)
359+
|| tables::non_character_code_point(c)
360+
|| tables::surrogate_code(c)
361+
|| c == '\u{FFFD}' // REPLACEMENT CHARACTER
362+
);
363+
if let Some(c) = prohibited {
364+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
365+
}
366+
// From ITU-T Recommendation X.520, Section 7.4:
367+
// "The first code point of a string is prohibited from being a combining character."
368+
let first_char = s.chars().next();
369+
if let Some(c) = first_char {
370+
if c.is_mark() {
371+
return Err(Error(ErrorCause::StartsWithCombiningCharacter));
372+
}
373+
} else {
374+
return Err(Error(ErrorCause::EmptyString));
375+
}
376+
377+
// 5. Check bidi
378+
// From ITU-T Recommendation X.520, Section 7.4:
379+
// "There are no bidirectional restrictions. The output string is the input string."
380+
// So there is nothing to do for this step.
381+
382+
// 6. Insignificant Character Removal
383+
// Done in calling functions.
384+
385+
Ok(normalized.into())
386+
}
387+
296388
#[cfg(test)]
297389
mod test {
298390
use super::*;
@@ -304,6 +396,13 @@ mod test {
304396
}
305397
}
306398

399+
fn assert_starts_with_combining_char<T>(result: Result<T, Error>) {
400+
match result {
401+
Err(Error(ErrorCause::StartsWithCombiningCharacter)) => (),
402+
_ => assert!(false)
403+
}
404+
}
405+
307406
// RFC4013, 3. Examples
308407
#[test]
309408
fn saslprep_examples() {
@@ -322,6 +421,15 @@ mod test {
322421
assert_eq!("foo@bar", resourceprep("foo@bar").unwrap());
323422
}
324423

424+
#[test]
425+
fn x520prep_examples() {
426+
assert_eq!(x520prep("foo@bar", true).unwrap(), "foo@bar");
427+
assert_eq!(x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", false).unwrap(), "J. W. wuz h\u{0115}re");
428+
assert_eq!(x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", true).unwrap(), "j. w. wuz h\u{0115}re");
429+
assert_eq!(x520prep("UPPERCASED", true).unwrap(), "uppercased");
430+
assert_starts_with_combining_char(x520prep("\u{0306}hello", true));
431+
}
432+
325433
#[test]
326434
fn ascii_optimisations() {
327435
if let Cow::Owned(_) = nodeprep("nodepart").unwrap() {

0 commit comments

Comments
 (0)