@@ -295,14 +295,24 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
295
295
Ok ( Cow :: Owned ( normalized) )
296
296
}
297
297
298
+ /// Determines if `c` is to be removed according to section 7.2 of
299
+ /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
298
300
fn x520_mapped_to_nothing ( c : char ) -> bool {
299
- if c. is_control ( ) {
300
- return true ;
301
- }
302
301
match c {
303
302
'\u{00AD}' | '\u{1806}' | '\u{034F}' | '\u{180B}' ..='\u{180D}' |
304
303
'\u{FE00}' ..='\u{FE0F}' | '\u{FFFC}' | '\u{200B}' => true ,
305
- _ => false ,
304
+ // Technically control characters, but mapped to whitespace in X.520.
305
+ '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => false ,
306
+ _ => c. is_control ( ) ,
307
+ }
308
+ }
309
+
310
+ /// Determines if `c` is to be replaced by SPACE (0x20) according to section 7.2 of
311
+ /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
312
+ fn x520_mapped_to_space ( c : char ) -> bool {
313
+ match c {
314
+ '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => true ,
315
+ _ => c. is_separator ( ) ,
306
316
}
307
317
}
308
318
@@ -323,7 +333,7 @@ pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
323
333
// 2. Map
324
334
let mapped = s. chars ( )
325
335
. filter ( |& c| !x520_mapped_to_nothing ( c) )
326
- . map ( |c| if c . is_separator ( ) { ' ' } else { c } ) ;
336
+ . map ( |c| if x520_mapped_to_space ( c ) { ' ' } else { c } ) ;
327
337
328
338
// 3. Normalize
329
339
let normalized = if case_fold {
@@ -399,8 +409,8 @@ mod test {
399
409
fn x520prep_examples ( ) {
400
410
assert_eq ! ( x520prep( "" , true ) . unwrap( ) , "" ) ;
401
411
assert_eq ! ( x520prep( "foo@bar" , true ) . unwrap( ) , "foo@bar" ) ;
402
- assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W.\u{9} \u{B} wuz h\u{0115} re" , false ) . unwrap( ) , "J. W. wuz h\u{0115} re" ) ;
403
- assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W.\u{9} \u{B} wuz h\u{0115} re" , true ) . unwrap( ) , "j. w. wuz h\u{0115} re" ) ;
412
+ assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , false ) . unwrap( ) , "J. W. wuz h\u{0115} re" ) ;
413
+ assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , true ) . unwrap( ) , "j. w. wuz h\u{0115} re" ) ;
404
414
assert_eq ! ( x520prep( "UPPERCASED" , true ) . unwrap( ) , "uppercased" ) ;
405
415
assert_prohibited_character ( x520prep ( "\u{0306} hello" , true ) ) ;
406
416
}
0 commit comments