@@ -258,16 +258,16 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
258
258
}
259
259
260
260
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
261
- fn validate_full ( label : & str , is_bidi_domain : bool , flags : Flags , errors : & mut Vec < Error > ) {
261
+ fn validate_full ( label : & str , is_bidi_domain : bool , config : Config , errors : & mut Vec < Error > ) {
262
262
// V1: Must be in NFC form.
263
263
if label. nfc ( ) . ne ( label. chars ( ) ) {
264
264
errors. push ( Error :: ValidityCriteria ) ;
265
265
} else {
266
- validate ( label, is_bidi_domain, flags , errors) ;
266
+ validate ( label, is_bidi_domain, config , errors) ;
267
267
}
268
268
}
269
269
270
- fn validate ( label : & str , is_bidi_domain : bool , flags : Flags , errors : & mut Vec < Error > ) {
270
+ fn validate ( label : & str , is_bidi_domain : bool , config : Config , errors : & mut Vec < Error > ) {
271
271
let first_char = label. chars ( ) . next ( ) ;
272
272
if first_char == None {
273
273
// Empty string, pass
@@ -277,11 +277,9 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Er
277
277
// NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the
278
278
// third and fourth positions. But nobody follows this criteria. See the spec issue below:
279
279
// https://github.com/whatwg/url/issues/53
280
- //
281
- // TODO: Add *CheckHyphens* flag.
282
280
283
281
// V3: neither begin nor end with a U+002D HYPHEN-MINUS
284
- else if label. starts_with ( "-" ) || label. ends_with ( "-" ) {
282
+ else if config . check_hyphens && ( label. starts_with ( "-" ) || label. ends_with ( "-" ) ) {
285
283
errors. push ( Error :: ValidityCriteria ) ;
286
284
}
287
285
// V4: not contain a U+002E FULL STOP
@@ -295,8 +293,8 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Er
295
293
// V6: Check against Mapping Table
296
294
else if label. chars ( ) . any ( |c| match * find_char ( c) {
297
295
Mapping :: Valid => false ,
298
- Mapping :: Deviation ( _) => flags. transitional_processing ,
299
- Mapping :: DisallowedStd3Valid => flags. use_std3_ascii_rules ,
296
+ Mapping :: Deviation ( _) => config . flags . transitional_processing ,
297
+ Mapping :: DisallowedStd3Valid => config . flags . use_std3_ascii_rules ,
300
298
_ => true ,
301
299
} ) {
302
300
errors. push ( Error :: ValidityCriteria ) ;
@@ -314,10 +312,10 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Er
314
312
}
315
313
316
314
/// http://www.unicode.org/reports/tr46/#Processing
317
- fn processing ( domain : & str , flags : Flags , errors : & mut Vec < Error > ) -> String {
315
+ fn processing ( domain : & str , config : Config , errors : & mut Vec < Error > ) -> String {
318
316
let mut mapped = String :: with_capacity ( domain. len ( ) ) ;
319
317
for c in domain. chars ( ) {
320
- map_char ( c, flags, & mut mapped, errors)
318
+ map_char ( c, config . flags , & mut mapped, errors)
321
319
}
322
320
let mut normalized = String :: with_capacity ( mapped. len ( ) ) ;
323
321
normalized. extend ( mapped. nfc ( ) ) ;
@@ -358,29 +356,117 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
358
356
if label. starts_with ( PUNYCODE_PREFIX ) {
359
357
match punycode:: decode_to_string ( & label[ PUNYCODE_PREFIX . len ( ) ..] ) {
360
358
Some ( decoded_label) => {
361
- let flags = Flags {
362
- transitional_processing : false ,
363
- ..flags
364
- } ;
365
- validate_full ( & decoded_label, is_bidi_domain, flags, errors) ;
359
+ let config = config. transitional_processing ( false ) ;
360
+ validate_full ( & decoded_label, is_bidi_domain, config, errors) ;
366
361
validated. push_str ( & decoded_label)
367
362
}
368
363
None => errors. push ( Error :: PunycodeError ) ,
369
364
}
370
365
} else {
371
366
// `normalized` is already `NFC` so we can skip that check
372
- validate ( label, is_bidi_domain, flags , errors) ;
367
+ validate ( label, is_bidi_domain, config , errors) ;
373
368
validated. push_str ( label)
374
369
}
375
370
}
376
371
validated
377
372
}
378
373
374
+ #[ derive( Clone , Copy ) ]
375
+ pub struct Config {
376
+ flags : Flags ,
377
+ check_hyphens : bool ,
378
+ }
379
+
380
+ impl From < Flags > for Config {
381
+ #[ inline]
382
+ fn from ( flags : Flags ) -> Self {
383
+ Self { flags, check_hyphens : true }
384
+ }
385
+ }
386
+
387
+ impl Config {
388
+ #[ inline]
389
+ pub fn use_std3_ascii_rules ( mut self , value : bool ) -> Self {
390
+ self . flags . use_std3_ascii_rules = value;
391
+ self
392
+ }
393
+
394
+ #[ inline]
395
+ pub fn transitional_processing ( mut self , value : bool ) -> Self {
396
+ self . flags . transitional_processing = value;
397
+ self
398
+ }
399
+
400
+ #[ inline]
401
+ pub fn verify_dns_length ( mut self , value : bool ) -> Self {
402
+ self . flags . verify_dns_length = value;
403
+ self
404
+ }
405
+
406
+ #[ inline]
407
+ pub fn check_hyphens ( mut self , value : bool ) -> Self {
408
+ self . check_hyphens = value;
409
+ self
410
+ }
411
+
412
+ /// http://www.unicode.org/reports/tr46/#ToASCII
413
+ pub fn to_ascii ( self , domain : & str ) -> Result < String , Errors > {
414
+ let mut errors = Vec :: new ( ) ;
415
+ let mut result = String :: new ( ) ;
416
+ let mut first = true ;
417
+ for label in processing ( domain, self , & mut errors) . split ( '.' ) {
418
+ if !first {
419
+ result. push ( '.' ) ;
420
+ }
421
+ first = false ;
422
+ if label. is_ascii ( ) {
423
+ result. push_str ( label) ;
424
+ } else {
425
+ match punycode:: encode_str ( label) {
426
+ Some ( x) => {
427
+ result. push_str ( PUNYCODE_PREFIX ) ;
428
+ result. push_str ( & x) ;
429
+ } ,
430
+ None => errors. push ( Error :: PunycodeError )
431
+ }
432
+ }
433
+ }
434
+
435
+ if self . flags . verify_dns_length {
436
+ let domain = if result. ends_with ( "." ) { & result[ ..result. len ( ) -1 ] } else { & * result } ;
437
+ if domain. len ( ) < 1 || domain. split ( '.' ) . any ( |label| label. len ( ) < 1 ) {
438
+ errors. push ( Error :: TooShortForDns )
439
+ }
440
+ if domain. len ( ) > 253 || domain. split ( '.' ) . any ( |label| label. len ( ) > 63 ) {
441
+ errors. push ( Error :: TooLongForDns )
442
+ }
443
+ }
444
+ if errors. is_empty ( ) {
445
+ Ok ( result)
446
+ } else {
447
+ Err ( Errors ( errors) )
448
+ }
449
+ }
450
+
451
+ /// http://www.unicode.org/reports/tr46/#ToUnicode
452
+ pub fn to_unicode ( self , domain : & str ) -> ( String , Result < ( ) , Errors > ) {
453
+ let mut errors = Vec :: new ( ) ;
454
+ let domain = processing ( domain, self , & mut errors) ;
455
+ let errors = if errors. is_empty ( ) {
456
+ Ok ( ( ) )
457
+ } else {
458
+ Err ( Errors ( errors) )
459
+ } ;
460
+ ( domain, errors)
461
+ }
462
+
463
+ }
464
+
379
465
#[ derive( Copy , Clone ) ]
380
466
pub struct Flags {
381
- pub use_std3_ascii_rules : bool ,
382
- pub transitional_processing : bool ,
383
- pub verify_dns_length : bool ,
467
+ pub use_std3_ascii_rules : bool ,
468
+ pub transitional_processing : bool ,
469
+ pub verify_dns_length : bool ,
384
470
}
385
471
386
472
#[ derive( PartialEq , Eq , Clone , Copy , Debug ) ]
@@ -403,58 +489,13 @@ pub struct Errors(Vec<Error>);
403
489
404
490
/// http://www.unicode.org/reports/tr46/#ToASCII
405
491
pub fn to_ascii ( domain : & str , flags : Flags ) -> Result < String , Errors > {
406
- let mut errors = Vec :: new ( ) ;
407
- let mut result = String :: new ( ) ;
408
- let mut first = true ;
409
- for label in processing ( domain, flags, & mut errors) . split ( '.' ) {
410
- if !first {
411
- result. push ( '.' ) ;
412
- }
413
- first = false ;
414
- if label. is_ascii ( ) {
415
- result. push_str ( label) ;
416
- } else {
417
- match punycode:: encode_str ( label) {
418
- Some ( x) => {
419
- result. push_str ( PUNYCODE_PREFIX ) ;
420
- result. push_str ( & x) ;
421
- }
422
- None => errors. push ( Error :: PunycodeError ) ,
423
- }
424
- }
425
- }
426
-
427
- if flags. verify_dns_length {
428
- let domain = if result. ends_with ( "." ) {
429
- & result[ ..result. len ( ) - 1 ]
430
- } else {
431
- & * result
432
- } ;
433
- if domain. len ( ) < 1 || domain. split ( '.' ) . any ( |label| label. len ( ) < 1 ) {
434
- errors. push ( Error :: TooShortForDns )
435
- }
436
- if domain. len ( ) > 253 || domain. split ( '.' ) . any ( |label| label. len ( ) > 63 ) {
437
- errors. push ( Error :: TooLongForDns )
438
- }
439
- }
440
- if errors. is_empty ( ) {
441
- Ok ( result)
442
- } else {
443
- Err ( Errors ( errors) )
444
- }
492
+ Config :: from ( flags) . to_ascii ( domain)
445
493
}
446
494
447
495
/// http://www.unicode.org/reports/tr46/#ToUnicode
448
496
///
449
497
/// Only `use_std3_ascii_rules` is used in `flags`.
450
498
pub fn to_unicode ( domain : & str , mut flags : Flags ) -> ( String , Result < ( ) , Errors > ) {
451
499
flags. transitional_processing = false ;
452
- let mut errors = Vec :: new ( ) ;
453
- let domain = processing ( domain, flags, & mut errors) ;
454
- let errors = if errors. is_empty ( ) {
455
- Ok ( ( ) )
456
- } else {
457
- Err ( Errors ( errors) )
458
- } ;
459
- ( domain, errors)
500
+ Config :: from ( flags) . to_unicode ( domain)
460
501
}
0 commit comments