1
1
//! An implementation of the "stringprep" algorithm defined in [RFC 3454][].
2
2
//!
3
3
//! [RFC 3454]: https://tools.ietf.org/html/rfc3454
4
- #![ doc( html_root_url= "https://docs.rs/stringprep/0.1.2" ) ]
4
+ #![ doc( html_root_url = "https://docs.rs/stringprep/0.1.2" ) ]
5
5
#![ warn( missing_docs) ]
6
+ extern crate finl_unicode;
6
7
extern crate unicode_bidi;
7
8
extern crate unicode_normalization;
8
- extern crate finl_unicode;
9
9
10
+ use finl_unicode:: categories:: CharacterCategories ;
10
11
use std:: borrow:: Cow ;
11
12
use std:: fmt;
12
- use finl_unicode:: categories:: CharacterCategories ;
13
13
use unicode_normalization:: UnicodeNormalization ;
14
14
15
15
mod rfc3454;
@@ -37,7 +37,9 @@ impl fmt::Display for Error {
37
37
match self . 0 {
38
38
ErrorCause :: ProhibitedCharacter ( c) => write ! ( fmt, "prohibited character `{}`" , c) ,
39
39
ErrorCause :: ProhibitedBidirectionalText => write ! ( fmt, "prohibited bidirectional text" ) ,
40
- ErrorCause :: StartsWithCombiningCharacter => write ! ( fmt, "starts with combining character" ) ,
40
+ ErrorCause :: StartsWithCombiningCharacter => {
41
+ write ! ( fmt, "starts with combining character" )
42
+ }
41
43
ErrorCause :: EmptyString => write ! ( fmt, "empty string" ) ,
42
44
}
43
45
}
@@ -59,22 +61,23 @@ pub fn saslprep(s: &str) -> Result<Cow<'_, str>, Error> {
59
61
}
60
62
61
63
// 2.1 Mapping
62
- let mapped = s. chars ( )
63
- . map ( |c| if tables:: non_ascii_space_character ( c) {
64
- ' '
65
- } else {
66
- c
67
- } )
64
+ let mapped = s
65
+ . chars ( )
66
+ . map ( |c| {
67
+ if tables:: non_ascii_space_character ( c) {
68
+ ' '
69
+ } else {
70
+ c
71
+ }
72
+ } )
68
73
. filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) ) ;
69
74
70
75
// 2.2 Normalization
71
76
let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
72
77
73
78
// 2.3 Prohibited Output
74
- let prohibited = normalized
75
- . chars ( )
76
- . find ( |& c| {
77
- tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
79
+ let prohibited = normalized. chars ( ) . find ( |& c| {
80
+ tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
78
81
tables:: ascii_control_character ( c) /* C.2.1 */ ||
79
82
tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
80
83
tables:: private_use ( c) /* C.3 */ ||
@@ -84,7 +87,7 @@ pub fn saslprep(s: &str) -> Result<Cow<'_, str>, Error> {
84
87
tables:: inappropriate_for_canonical_representation ( c) /* C.7 */ ||
85
88
tables:: change_display_properties_or_deprecated ( c) /* C.8 */ ||
86
89
tables:: tagging_character ( c) /* C.9 */
87
- } ) ;
90
+ } ) ;
88
91
if let Some ( c) = prohibited {
89
92
return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
90
93
}
@@ -117,8 +120,9 @@ fn is_prohibited_bidirectional_text(s: &str) -> bool {
117
120
// 3) If a string contains any RandALCat character, a RandALCat
118
121
// character MUST be the first character of the string, and a
119
122
// RandALCat character MUST be the last character of the string.
120
- if !tables:: bidi_r_or_al ( s. chars ( ) . next ( ) . unwrap ( ) ) ||
121
- !tables:: bidi_r_or_al ( s. chars ( ) . next_back ( ) . unwrap ( ) ) {
123
+ if !tables:: bidi_r_or_al ( s. chars ( ) . next ( ) . unwrap ( ) )
124
+ || !tables:: bidi_r_or_al ( s. chars ( ) . next_back ( ) . unwrap ( ) )
125
+ {
122
126
return true ;
123
127
}
124
128
}
@@ -140,18 +144,17 @@ pub fn nameprep(s: &str) -> Result<Cow<'_, str>, Error> {
140
144
}
141
145
142
146
// 3. Mapping
143
- let mapped = s. chars ( )
147
+ let mapped = s
148
+ . chars ( )
144
149
. filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) )
145
150
. flat_map ( tables:: case_fold_for_nfkc) ;
146
151
147
152
// 4. Normalization
148
153
let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
149
154
150
155
// 5. Prohibited Output
151
- let prohibited = normalized
152
- . chars ( )
153
- . find ( |& c| {
154
- tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
156
+ let prohibited = normalized. chars ( ) . find ( |& c| {
157
+ tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
155
158
tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
156
159
tables:: private_use ( c) /* C.3 */ ||
157
160
tables:: non_character_code_point ( c) /* C.4 */ ||
@@ -160,7 +163,7 @@ pub fn nameprep(s: &str) -> Result<Cow<'_, str>, Error> {
160
163
tables:: inappropriate_for_canonical_representation ( c) /* C.7 */ ||
161
164
tables:: change_display_properties_or_deprecated ( c) /* C.9 */ ||
162
165
tables:: tagging_character ( c) /* C.9 */
163
- } ) ;
166
+ } ) ;
164
167
if let Some ( c) = prohibited {
165
168
return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
166
169
}
@@ -195,18 +198,17 @@ pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
195
198
}
196
199
197
200
// A.3. Mapping
198
- let mapped = s. chars ( )
201
+ let mapped = s
202
+ . chars ( )
199
203
. filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) )
200
204
. flat_map ( tables:: case_fold_for_nfkc) ;
201
205
202
206
// A.4. Normalization
203
207
let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
204
208
205
209
// A.5. Prohibited Output
206
- let prohibited = normalized
207
- . chars ( )
208
- . find ( |& c| {
209
- tables:: ascii_space_character ( c) /* C.1.1 */ ||
210
+ let prohibited = normalized. chars ( ) . find ( |& c| {
211
+ tables:: ascii_space_character ( c) /* C.1.1 */ ||
210
212
tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
211
213
tables:: ascii_control_character ( c) /* C.2.1 */ ||
212
214
tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
@@ -218,7 +220,7 @@ pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
218
220
tables:: change_display_properties_or_deprecated ( c) /* C.9 */ ||
219
221
tables:: tagging_character ( c) /* C.9 */ ||
220
222
prohibited_node_character ( c)
221
- } ) ;
223
+ } ) ;
222
224
if let Some ( c) = prohibited {
223
225
return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
224
226
}
@@ -240,10 +242,7 @@ pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
240
242
241
243
// Additional characters not allowed in JID nodes, by RFC3920.
242
244
fn prohibited_node_character ( c : char ) -> bool {
243
- match c {
244
- '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@' => true ,
245
- _ => false
246
- }
245
+ matches ! ( c, '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@' )
247
246
}
248
247
249
248
/// Prepares a string with the Resourceprep profile of the stringprep algorithm.
@@ -253,25 +252,22 @@ fn prohibited_node_character(c: char) -> bool {
253
252
/// [RFC 3920, Appendix B]: https://tools.ietf.org/html/rfc3920#appendix-B
254
253
pub fn resourceprep ( s : & str ) -> Result < Cow < ' _ , str > , Error > {
255
254
// fast path for ascii text
256
- if s. chars ( )
257
- . all ( |c| matches ! ( c, ' ' ..='~' ) )
258
- {
255
+ if s. chars ( ) . all ( |c| matches ! ( c, ' ' ..='~' ) ) {
259
256
return Ok ( Cow :: Borrowed ( s) ) ;
260
257
}
261
258
262
259
// B.3. Mapping
263
- let mapped = s. chars ( )
260
+ let mapped = s
261
+ . chars ( )
264
262
. filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) )
265
263
. collect :: < String > ( ) ;
266
264
267
265
// B.4. Normalization
268
266
let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
269
267
270
268
// B.5. Prohibited Output
271
- let prohibited = normalized
272
- . chars ( )
273
- . find ( |& c| {
274
- tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
269
+ let prohibited = normalized. chars ( ) . find ( |& c| {
270
+ tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
275
271
tables:: ascii_control_character ( c) /* C.2.1 */ ||
276
272
tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
277
273
tables:: private_use ( c) /* C.3 */ ||
@@ -281,7 +277,7 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
281
277
tables:: inappropriate_for_canonical_representation ( c) /* C.7 */ ||
282
278
tables:: change_display_properties_or_deprecated ( c) /* C.9 */ ||
283
279
tables:: tagging_character ( c) /* C.9 */
284
- } ) ;
280
+ } ) ;
285
281
if let Some ( c) = prohibited {
286
282
return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
287
283
}
@@ -301,48 +297,36 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
301
297
Ok ( Cow :: Owned ( normalized) )
302
298
}
303
299
304
- /// Determines if `c` is to be removed according to section 7.2 of
305
- /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
306
- fn x520_mapped_to_nothing ( c : char ) -> bool {
307
- match c {
308
- '\u{00AD}' | '\u{1806}' | '\u{034F}' | '\u{180B}' ..='\u{180D}' |
309
- '\u{FE00}' ..='\u{FE0F}' | '\u{FFFC}' | '\u{200B}' => true ,
310
- // Technically control characters, but mapped to whitespace in X.520.
311
- '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => false ,
312
- _ => c. is_control ( ) ,
313
- }
314
- }
315
-
316
- /// Determines if `c` is to be replaced by SPACE (0x20) according to section 7.2 of
317
- /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
318
- fn x520_mapped_to_space ( c : char ) -> bool {
319
- match c {
320
- '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => true ,
321
- _ => c. is_separator ( ) ,
322
- }
323
- }
324
-
325
300
/// Prepares a string according to the procedures described in Section 7 of
326
301
/// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
327
302
///
328
303
/// Note that this function does _not_ remove leading, trailing, or inner
329
304
/// spaces as described in Section 7.6, because the characters needing removal
330
305
/// will vary across the matching rules and ASN.1 syntaxes used.
331
306
pub fn x520prep ( s : & str , case_fold : bool ) -> Result < Cow < ' _ , str > , Error > {
332
- if s. len ( ) == 0 {
307
+ if s. is_empty ( ) {
333
308
return Err ( Error ( ErrorCause :: EmptyString ) ) ;
334
309
}
335
- if s. chars ( ) . all ( |c| matches ! ( c, ' ' ..='~' ) && ( !case_fold || c. is_ascii_lowercase ( ) ) ) {
310
+ if s. chars ( )
311
+ . all ( |c| matches ! ( c, ' ' ..='~' ) && ( !case_fold || c. is_ascii_lowercase ( ) ) )
312
+ {
336
313
return Ok ( Cow :: Borrowed ( s) ) ;
337
314
}
338
315
339
316
// 1. Transcode
340
317
// Already done because &str is enforced to be Unicode.
341
318
342
319
// 2. Map
343
- let mapped = s. chars ( )
344
- . filter ( |& c| !x520_mapped_to_nothing ( c) )
345
- . map ( |c| if x520_mapped_to_space ( c) { ' ' } else { c } ) ;
320
+ let mapped = s
321
+ . chars ( )
322
+ . filter ( |& c| !tables:: x520_mapped_to_nothing ( c) )
323
+ . map ( |c| {
324
+ if tables:: x520_mapped_to_space ( c) {
325
+ ' '
326
+ } else {
327
+ c
328
+ }
329
+ } ) ;
346
330
347
331
// 3. Normalize
348
332
let normalized = if case_fold {
@@ -354,24 +338,27 @@ pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
354
338
} ;
355
339
356
340
// 4. Prohibit
357
- let prohibited = normalized. chars ( ) . find ( |& c| tables:: unassigned_code_point ( c)
358
- || tables:: private_use ( c)
359
- || tables:: non_character_code_point ( c)
360
- || tables:: surrogate_code ( c)
361
- || c == '\u{FFFD}' // REPLACEMENT CHARACTER
341
+ let prohibited = normalized. chars ( ) . find (
342
+ |& c| {
343
+ tables:: unassigned_code_point ( c)
344
+ || tables:: private_use ( c)
345
+ || tables:: non_character_code_point ( c)
346
+ || tables:: surrogate_code ( c)
347
+ || c == '\u{FFFD}'
348
+ } , // REPLACEMENT CHARACTER
362
349
) ;
363
350
if let Some ( c) = prohibited {
364
351
return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
365
352
}
366
353
// From ITU-T Recommendation X.520, Section 7.4:
367
354
// "The first code point of a string is prohibited from being a combining character."
368
- let first_char = s. chars ( ) . next ( ) ;
369
- if let Some ( c) = first_char {
370
- if c. is_mark ( ) {
371
- return Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) ;
355
+ match s. chars ( ) . next ( ) {
356
+ Some ( c) => {
357
+ if c. is_mark ( ) {
358
+ return Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) ;
359
+ }
372
360
}
373
- } else {
374
- return Err ( Error ( ErrorCause :: EmptyString ) ) ;
361
+ None => return Err ( Error ( ErrorCause :: EmptyString ) ) ,
375
362
}
376
363
377
364
// 5. Check bidi
@@ -389,32 +376,32 @@ pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
389
376
mod test {
390
377
use super :: * ;
391
378
392
- fn assert_prohibited_character < T > ( result : Result < T , Error > ) {
393
- match result {
394
- Err ( Error ( ErrorCause :: ProhibitedCharacter ( _) ) ) => ( ) ,
395
- _ => assert ! ( false )
396
- }
397
- }
379
+ fn assert_prohibited_character < T > ( result : Result < T , Error > ) {
380
+ match result {
381
+ Err ( Error ( ErrorCause :: ProhibitedCharacter ( _) ) ) => ( ) ,
382
+ _ => panic ! ( ) ,
383
+ }
384
+ }
398
385
399
386
fn assert_starts_with_combining_char < T > ( result : Result < T , Error > ) {
400
- match result {
401
- Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) => ( ) ,
402
- _ => assert ! ( false )
403
- }
404
- }
387
+ match result {
388
+ Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) => ( ) ,
389
+ _ => panic ! ( ) ,
390
+ }
391
+ }
405
392
406
393
// RFC4013, 3. Examples
407
394
#[ test]
408
395
fn saslprep_examples ( ) {
409
- assert_prohibited_character ( saslprep ( "\u{0007} " ) ) ;
396
+ assert_prohibited_character ( saslprep ( "\u{0007} " ) ) ;
410
397
}
411
398
412
- #[ test]
413
- fn nodeprep_examples ( ) {
399
+ #[ test]
400
+ fn nodeprep_examples ( ) {
414
401
assert_prohibited_character ( nodeprep ( " " ) ) ;
415
402
assert_prohibited_character ( nodeprep ( "\u{00a0} " ) ) ;
416
403
assert_prohibited_character ( nodeprep ( "foo@bar" ) ) ;
417
- }
404
+ }
418
405
419
406
#[ test]
420
407
fn resourceprep_examples ( ) {
@@ -424,8 +411,14 @@ mod test {
424
411
#[ test]
425
412
fn x520prep_examples ( ) {
426
413
assert_eq ! ( x520prep( "foo@bar" , true ) . unwrap( ) , "foo@bar" ) ;
427
- assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , false ) . unwrap( ) , "J. W. wuz h\u{0115} re" ) ;
428
- assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , true ) . unwrap( ) , "j. w. wuz h\u{0115} re" ) ;
414
+ assert_eq ! (
415
+ x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , false ) . unwrap( ) ,
416
+ "J. W. wuz h\u{0115} re"
417
+ ) ;
418
+ assert_eq ! (
419
+ x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , true ) . unwrap( ) ,
420
+ "j. w. wuz h\u{0115} re"
421
+ ) ;
429
422
assert_eq ! ( x520prep( "UPPERCASED" , true ) . unwrap( ) , "uppercased" ) ;
430
423
assert_starts_with_combining_char ( x520prep ( "\u{0306} hello" , true ) ) ;
431
424
}
0 commit comments