5
5
#![ warn( missing_docs) ]
6
6
extern crate unicode_bidi;
7
7
extern crate unicode_normalization;
8
+ extern crate finl_unicode;
8
9
9
10
use std:: borrow:: Cow ;
10
11
use std:: fmt;
12
+ use finl_unicode:: categories:: CharacterCategories ;
11
13
use unicode_normalization:: UnicodeNormalization ;
12
14
13
15
mod rfc3454;
@@ -20,6 +22,10 @@ enum ErrorCause {
20
22
ProhibitedCharacter ( char ) ,
21
23
/// Violates stringprep rules for bidirectional text.
22
24
ProhibitedBidirectionalText ,
25
+ /// Starts with a combining character
26
+ StartsWithCombiningCharacter ,
27
+ /// Empty String
28
+ EmptyString ,
23
29
}
24
30
25
31
/// An error performing the stringprep algorithm.
@@ -31,6 +37,8 @@ impl fmt::Display for Error {
31
37
match self . 0 {
32
38
ErrorCause :: ProhibitedCharacter ( c) => write ! ( fmt, "prohibited character `{}`" , c) ,
33
39
ErrorCause :: ProhibitedBidirectionalText => write ! ( fmt, "prohibited bidirectional text" ) ,
40
+ ErrorCause :: StartsWithCombiningCharacter => write ! ( fmt, "starts with combining character" ) ,
41
+ ErrorCause :: EmptyString => write ! ( fmt, "empty string" ) ,
34
42
}
35
43
}
36
44
}
@@ -293,6 +301,90 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
293
301
Ok ( Cow :: Owned ( normalized) )
294
302
}
295
303
304
+ /// Determines if `c` is to be removed according to section 7.2 of
305
+ /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
306
+ fn x520_mapped_to_nothing ( c : char ) -> bool {
307
+ match c {
308
+ '\u{00AD}' | '\u{1806}' | '\u{034F}' | '\u{180B}' ..='\u{180D}' |
309
+ '\u{FE00}' ..='\u{FE0F}' | '\u{FFFC}' | '\u{200B}' => true ,
310
+ // Technically control characters, but mapped to whitespace in X.520.
311
+ '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => false ,
312
+ _ => c. is_control ( ) ,
313
+ }
314
+ }
315
+
316
+ /// Determines if `c` is to be replaced by SPACE (0x20) according to section 7.2 of
317
+ /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
318
+ fn x520_mapped_to_space ( c : char ) -> bool {
319
+ match c {
320
+ '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => true ,
321
+ _ => c. is_separator ( ) ,
322
+ }
323
+ }
324
+
325
+ /// Prepares a string according to the procedures described in Section 7 of
326
+ /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
327
+ ///
328
+ /// Note that this function does _not_ remove leading, trailing, or inner
329
+ /// spaces as described in Section 7.6, because the characters needing removal
330
+ /// will vary across the matching rules and ASN.1 syntaxes used.
331
+ pub fn x520prep ( s : & str , case_fold : bool ) -> Result < Cow < ' _ , str > , Error > {
332
+ if s. len ( ) == 0 {
333
+ return Err ( Error ( ErrorCause :: EmptyString ) ) ;
334
+ }
335
+ if s. chars ( ) . all ( |c| matches ! ( c, ' ' ..='~' ) && ( !case_fold || c. is_ascii_lowercase ( ) ) ) {
336
+ return Ok ( Cow :: Borrowed ( s) ) ;
337
+ }
338
+
339
+ // 1. Transcode
340
+ // Already done because &str is enforced to be Unicode.
341
+
342
+ // 2. Map
343
+ let mapped = s. chars ( )
344
+ . filter ( |& c| !x520_mapped_to_nothing ( c) )
345
+ . map ( |c| if x520_mapped_to_space ( c) { ' ' } else { c } ) ;
346
+
347
+ // 3. Normalize
348
+ let normalized = if case_fold {
349
+ mapped
350
+ . flat_map ( tables:: case_fold_for_nfkc)
351
+ . collect :: < String > ( )
352
+ } else {
353
+ mapped. nfkc ( ) . collect :: < String > ( )
354
+ } ;
355
+
356
+ // 4. Prohibit
357
+ let prohibited = normalized. chars ( ) . find ( |& c| tables:: unassigned_code_point ( c)
358
+ || tables:: private_use ( c)
359
+ || tables:: non_character_code_point ( c)
360
+ || tables:: surrogate_code ( c)
361
+ || c == '\u{FFFD}' // REPLACEMENT CHARACTER
362
+ ) ;
363
+ if let Some ( c) = prohibited {
364
+ return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
365
+ }
366
+ // From ITU-T Recommendation X.520, Section 7.4:
367
+ // "The first code point of a string is prohibited from being a combining character."
368
+ let first_char = s. chars ( ) . next ( ) ;
369
+ if let Some ( c) = first_char {
370
+ if c. is_mark ( ) {
371
+ return Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) ;
372
+ }
373
+ } else {
374
+ return Err ( Error ( ErrorCause :: EmptyString ) ) ;
375
+ }
376
+
377
+ // 5. Check bidi
378
+ // From ITU-T Recommendation X.520, Section 7.4:
379
+ // "There are no bidirectional restrictions. The output string is the input string."
380
+ // So there is nothing to do for this step.
381
+
382
+ // 6. Insignificant Character Removal
383
+ // Done in calling functions.
384
+
385
+ Ok ( normalized. into ( ) )
386
+ }
387
+
296
388
#[ cfg( test) ]
297
389
mod test {
298
390
use super :: * ;
@@ -304,6 +396,13 @@ mod test {
304
396
}
305
397
}
306
398
399
+ fn assert_starts_with_combining_char < T > ( result : Result < T , Error > ) {
400
+ match result {
401
+ Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) => ( ) ,
402
+ _ => assert ! ( false )
403
+ }
404
+ }
405
+
307
406
// RFC4013, 3. Examples
308
407
#[ test]
309
408
fn saslprep_examples ( ) {
@@ -322,6 +421,15 @@ mod test {
322
421
assert_eq ! ( "foo@bar" , resourceprep( "foo@bar" ) . unwrap( ) ) ;
323
422
}
324
423
424
+ #[ test]
425
+ fn x520prep_examples ( ) {
426
+ assert_eq ! ( x520prep( "foo@bar" , true ) . unwrap( ) , "foo@bar" ) ;
427
+ assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , false ) . unwrap( ) , "J. W. wuz h\u{0115} re" ) ;
428
+ assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , true ) . unwrap( ) , "j. w. wuz h\u{0115} re" ) ;
429
+ assert_eq ! ( x520prep( "UPPERCASED" , true ) . unwrap( ) , "uppercased" ) ;
430
+ assert_starts_with_combining_char ( x520prep ( "\u{0306} hello" , true ) ) ;
431
+ }
432
+
325
433
#[ test]
326
434
fn ascii_optimisations ( ) {
327
435
if let Cow :: Owned ( _) = nodeprep ( "nodepart" ) . unwrap ( ) {
0 commit comments