@@ -293,6 +293,75 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
293
293
Ok ( Cow :: Owned ( normalized) )
294
294
}
295
295
296
+ fn x520_mapped_to_nothing ( c : char ) -> bool {
297
+ if c. is_control ( ) {
298
+ return true ;
299
+ }
300
+ match c {
301
+ '\u{00AD}' | '\u{1806}' | '\u{034F}' | '\u{180B}' ..='\u{180D}' |
302
+ '\u{FE00}' ..='\u{FE0F}' | '\u{FFFC}' | '\u{200B}' => true ,
303
+ _ => false ,
304
+ }
305
+ }
306
+
307
+ /// Prepares a string according to the procedures described in Section 7 of
308
+ /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
309
+ ///
310
+ /// Note that this function does _not_ remove leading, trailing, or inner
311
+ /// spaces as described in Section 7.6, because the characters needing removal
312
+ /// will vary across the matching rules and ASN.1 syntaxes used.
313
+ pub fn x520prep ( s : & str , case_fold : bool ) -> Result < Cow < ' _ , str > , Error > {
314
+ if s. chars ( ) . all ( |c| matches ! ( c, ' ' ..='~' ) ) {
315
+ return Ok ( Cow :: Borrowed ( s) ) ;
316
+ }
317
+
318
+ // 1. Transcode
319
+ // Already done because &str is enforced to be Unicode.
320
+
321
+ // 2. Map
322
+ let mapped = s. chars ( )
323
+ . filter ( |& c| !x520_mapped_to_nothing ( c) )
324
+ . map ( |c| if c. is_whitespace ( ) { ' ' } else { c } ) ;
325
+
326
+ // 3. Normalize
327
+ let normalized = if case_fold {
328
+ mapped
329
+ . flat_map ( tables:: case_fold_for_nfkc)
330
+ . collect :: < String > ( )
331
+ } else {
332
+ mapped. collect :: < String > ( )
333
+ } ;
334
+
335
+ // 4. Prohibit
336
+ let prohibited = normalized. chars ( ) . find ( |& c| tables:: unassigned_code_point ( c)
337
+ || tables:: private_use ( c)
338
+ || tables:: non_character_code_point ( c)
339
+ || tables:: surrogate_code ( c)
340
+ || c == '\u{FFFD}' // REPLACEMENT CHARACTER
341
+ ) ;
342
+ if let Some ( c) = prohibited {
343
+ return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
344
+ }
345
+ // From ITU-T Recommendation X.520, Section 7.4:
346
+ // "The first code point of a string is prohibited from being a combining character."
347
+ let first_char = s. chars ( ) . next ( ) ;
348
+ if first_char. is_some_and ( |c| tables:: unicode_mark_category ( c) ) {
349
+ // I do think this ought to be considered a different error, but adding
350
+ // another enum variant would be a breaking change, so this is "good"
351
+ return Err ( Error ( ErrorCause :: ProhibitedCharacter ( first_char. unwrap ( ) ) ) ) ;
352
+ }
353
+
354
+ // 5. Check bidi
355
+ // From ITU-T Recommendation X.520, Section 7.4:
356
+ // "There are no bidirectional restrictions. The output string is the input string."
357
+ // So there is nothing to do for this step.
358
+
359
+ // 6. Insignificant Character Removal
360
+ // Done in calling functions.
361
+
362
+ Ok ( normalized. into ( ) )
363
+ }
364
+
296
365
#[ cfg( test) ]
297
366
mod test {
298
367
use super :: * ;
@@ -322,6 +391,15 @@ mod test {
322
391
assert_eq ! ( "foo@bar" , resourceprep( "foo@bar" ) . unwrap( ) ) ;
323
392
}
324
393
394
+ #[ test]
395
+ fn x520prep_examples ( ) {
396
+ assert_eq ! ( x520prep( "" , true ) . unwrap( ) , "" ) ;
397
+ assert_eq ! ( x520prep( "foo@bar" , true ) . unwrap( ) , "foo@bar" ) ;
398
+ assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W.\u{9} \u{B} wuz h\u{0115} re" , false ) . unwrap( ) , "J. W. wuz h\u{0115} re" ) ;
399
+ assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W.\u{9} \u{B} wuz h\u{0115} re" , true ) . unwrap( ) , "j. w. wuz h\u{0115} re" ) ;
400
+ assert_prohibited_character ( x520prep ( "\u{0306} hello" , true ) ) ;
401
+ }
402
+
325
403
#[ test]
326
404
fn ascii_optimisations ( ) {
327
405
if let Cow :: Owned ( _) = nodeprep ( "nodepart" ) . unwrap ( ) {
0 commit comments