Skip to content

Commit da2b1af

Browse files
committed
Merge commit 'refs/pull/484/head' of github.com:servo/rust-url into 2.0
2 parents 5453f6f + 8975bb9 commit da2b1af

File tree

3 files changed

+121
-84
lines changed

3 files changed

+121
-84
lines changed

idna/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "idna"
3-
version = "0.1.5"
3+
version = "0.1.6"
44
authors = ["The rust-url developers"]
55
description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
66
repository = "https://github.com/servo/rust-url/"

idna/src/lib.rs

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,12 @@ pub mod uts46;
4848
///
4949
/// This process may fail.
5050
pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
51-
uts46::to_ascii(
52-
domain,
53-
uts46::Flags {
54-
use_std3_ascii_rules: false,
55-
transitional_processing: false,
56-
verify_dns_length: false,
57-
},
58-
)
51+
let flags = uts46::Flags {
52+
use_std3_ascii_rules: false,
53+
transitional_processing: false,
54+
verify_dns_length: false,
55+
};
56+
uts46::Config::from(flags).check_hyphens(false).to_ascii(domain)
5957
}
6058

6159
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
@@ -67,14 +65,12 @@ pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
6765
/// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
6866
/// but always returns a string for the mapped domain.
6967
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
70-
uts46::to_unicode(
71-
domain,
72-
uts46::Flags {
73-
use_std3_ascii_rules: false,
68+
let flags = uts46::Flags {
69+
use_std3_ascii_rules: false,
7470

75-
// Unused:
76-
transitional_processing: false,
77-
verify_dns_length: false,
78-
},
79-
)
71+
// Unused:
72+
transitional_processing: false,
73+
verify_dns_length: false,
74+
};
75+
uts46::Config::from(flags).check_hyphens(false).to_unicode(domain)
8076
}

idna/src/uts46.rs

Lines changed: 107 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -258,16 +258,16 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
258258
}
259259

260260
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
261-
fn validate_full(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
261+
fn validate_full(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec<Error>) {
262262
// V1: Must be in NFC form.
263263
if label.nfc().ne(label.chars()) {
264264
errors.push(Error::ValidityCriteria);
265265
} else {
266-
validate(label, is_bidi_domain, flags, errors);
266+
validate(label, is_bidi_domain, config, errors);
267267
}
268268
}
269269

270-
fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
270+
fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec<Error>) {
271271
let first_char = label.chars().next();
272272
if first_char == None {
273273
// Empty string, pass
@@ -277,11 +277,9 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Er
277277
// NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the
278278
// third and fourth positions. But nobody follows this criteria. See the spec issue below:
279279
// https://github.com/whatwg/url/issues/53
280-
//
281-
// TODO: Add *CheckHyphens* flag.
282280

283281
// V3: neither begin nor end with a U+002D HYPHEN-MINUS
284-
else if label.starts_with("-") || label.ends_with("-") {
282+
else if config.check_hyphens && (label.starts_with("-") || label.ends_with("-")) {
285283
errors.push(Error::ValidityCriteria);
286284
}
287285
// V4: not contain a U+002E FULL STOP
@@ -295,8 +293,8 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Er
295293
// V6: Check against Mapping Table
296294
else if label.chars().any(|c| match *find_char(c) {
297295
Mapping::Valid => false,
298-
Mapping::Deviation(_) => flags.transitional_processing,
299-
Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules,
296+
Mapping::Deviation(_) => config.flags.transitional_processing,
297+
Mapping::DisallowedStd3Valid => config.flags.use_std3_ascii_rules,
300298
_ => true,
301299
}) {
302300
errors.push(Error::ValidityCriteria);
@@ -314,10 +312,10 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Er
314312
}
315313

316314
/// http://www.unicode.org/reports/tr46/#Processing
317-
fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
315+
fn processing(domain: &str, config: Config, errors: &mut Vec<Error>) -> String {
318316
let mut mapped = String::with_capacity(domain.len());
319317
for c in domain.chars() {
320-
map_char(c, flags, &mut mapped, errors)
318+
map_char(c, config.flags, &mut mapped, errors)
321319
}
322320
let mut normalized = String::with_capacity(mapped.len());
323321
normalized.extend(mapped.nfc());
@@ -358,29 +356,117 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
358356
if label.starts_with(PUNYCODE_PREFIX) {
359357
match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
360358
Some(decoded_label) => {
361-
let flags = Flags {
362-
transitional_processing: false,
363-
..flags
364-
};
365-
validate_full(&decoded_label, is_bidi_domain, flags, errors);
359+
let config = config.transitional_processing(false);
360+
validate_full(&decoded_label, is_bidi_domain, config, errors);
366361
validated.push_str(&decoded_label)
367362
}
368363
None => errors.push(Error::PunycodeError),
369364
}
370365
} else {
371366
// `normalized` is already `NFC` so we can skip that check
372-
validate(label, is_bidi_domain, flags, errors);
367+
validate(label, is_bidi_domain, config, errors);
373368
validated.push_str(label)
374369
}
375370
}
376371
validated
377372
}
378373

374+
#[derive(Clone, Copy)]
375+
pub struct Config {
376+
flags: Flags,
377+
check_hyphens: bool,
378+
}
379+
380+
impl From<Flags> for Config {
381+
#[inline]
382+
fn from(flags: Flags) -> Self {
383+
Self { flags, check_hyphens: true }
384+
}
385+
}
386+
387+
impl Config {
388+
#[inline]
389+
pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
390+
self.flags.use_std3_ascii_rules = value;
391+
self
392+
}
393+
394+
#[inline]
395+
pub fn transitional_processing(mut self, value: bool) -> Self {
396+
self.flags.transitional_processing = value;
397+
self
398+
}
399+
400+
#[inline]
401+
pub fn verify_dns_length(mut self, value: bool) -> Self {
402+
self.flags.verify_dns_length = value;
403+
self
404+
}
405+
406+
#[inline]
407+
pub fn check_hyphens(mut self, value: bool) -> Self {
408+
self.check_hyphens = value;
409+
self
410+
}
411+
412+
/// http://www.unicode.org/reports/tr46/#ToASCII
413+
pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
414+
let mut errors = Vec::new();
415+
let mut result = String::new();
416+
let mut first = true;
417+
for label in processing(domain, self, &mut errors).split('.') {
418+
if !first {
419+
result.push('.');
420+
}
421+
first = false;
422+
if label.is_ascii() {
423+
result.push_str(label);
424+
} else {
425+
match punycode::encode_str(label) {
426+
Some(x) => {
427+
result.push_str(PUNYCODE_PREFIX);
428+
result.push_str(&x);
429+
},
430+
None => errors.push(Error::PunycodeError)
431+
}
432+
}
433+
}
434+
435+
if self.flags.verify_dns_length {
436+
let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
437+
if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) {
438+
errors.push(Error::TooShortForDns)
439+
}
440+
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
441+
errors.push(Error::TooLongForDns)
442+
}
443+
}
444+
if errors.is_empty() {
445+
Ok(result)
446+
} else {
447+
Err(Errors(errors))
448+
}
449+
}
450+
451+
/// http://www.unicode.org/reports/tr46/#ToUnicode
452+
pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
453+
let mut errors = Vec::new();
454+
let domain = processing(domain, self, &mut errors);
455+
let errors = if errors.is_empty() {
456+
Ok(())
457+
} else {
458+
Err(Errors(errors))
459+
};
460+
(domain, errors)
461+
}
462+
463+
}
464+
379465
#[derive(Copy, Clone)]
380466
pub struct Flags {
381-
pub use_std3_ascii_rules: bool,
382-
pub transitional_processing: bool,
383-
pub verify_dns_length: bool,
467+
pub use_std3_ascii_rules: bool,
468+
pub transitional_processing: bool,
469+
pub verify_dns_length: bool,
384470
}
385471

386472
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
@@ -403,58 +489,13 @@ pub struct Errors(Vec<Error>);
403489

404490
/// http://www.unicode.org/reports/tr46/#ToASCII
405491
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
406-
let mut errors = Vec::new();
407-
let mut result = String::new();
408-
let mut first = true;
409-
for label in processing(domain, flags, &mut errors).split('.') {
410-
if !first {
411-
result.push('.');
412-
}
413-
first = false;
414-
if label.is_ascii() {
415-
result.push_str(label);
416-
} else {
417-
match punycode::encode_str(label) {
418-
Some(x) => {
419-
result.push_str(PUNYCODE_PREFIX);
420-
result.push_str(&x);
421-
}
422-
None => errors.push(Error::PunycodeError),
423-
}
424-
}
425-
}
426-
427-
if flags.verify_dns_length {
428-
let domain = if result.ends_with(".") {
429-
&result[..result.len() - 1]
430-
} else {
431-
&*result
432-
};
433-
if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) {
434-
errors.push(Error::TooShortForDns)
435-
}
436-
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
437-
errors.push(Error::TooLongForDns)
438-
}
439-
}
440-
if errors.is_empty() {
441-
Ok(result)
442-
} else {
443-
Err(Errors(errors))
444-
}
492+
Config::from(flags).to_ascii(domain)
445493
}
446494

447495
/// http://www.unicode.org/reports/tr46/#ToUnicode
448496
///
449497
/// Only `use_std3_ascii_rules` is used in `flags`.
450498
pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
451499
flags.transitional_processing = false;
452-
let mut errors = Vec::new();
453-
let domain = processing(domain, flags, &mut errors);
454-
let errors = if errors.is_empty() {
455-
Ok(())
456-
} else {
457-
Err(Errors(errors))
458-
};
459-
(domain, errors)
500+
Config::from(flags).to_unicode(domain)
460501
}

0 commit comments

Comments
 (0)