Skip to content

Commit 753eb40

Browse files
committed
Host parsing rules.
1 parent bdde710 commit 753eb40

File tree

4 files changed

+127
-38
lines changed

4 files changed

+127
-38
lines changed

src/host.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,10 @@ pub(crate) enum HostInternal {
2424
Ipv6(Ipv6Addr),
2525
}
2626

27-
impl<S> From<Host<S>> for HostInternal
28-
where
29-
S: ToString,
30-
{
31-
fn from(host: Host<S>) -> HostInternal {
27+
impl From<Host<String>> for HostInternal {
28+
fn from(host: Host<String>) -> HostInternal {
3229
match host {
33-
Host::Domain(ref s) if s.to_string().is_empty() => HostInternal::None,
30+
Host::Domain(ref s) if s.is_empty() => HostInternal::None,
3431
Host::Domain(_) => HostInternal::Domain,
3532
Host::Ipv4(address) => HostInternal::Ipv4(address),
3633
Host::Ipv6(address) => HostInternal::Ipv6(address),

src/lib.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,7 @@ impl Url {
690690
/// ```
691691
#[inline]
692692
pub fn cannot_be_a_base(&self) -> bool {
693-
!self.slice(self.path_start..).starts_with('/')
693+
!self.slice(self.scheme_end + 1..).starts_with('/')
694694
}
695695

696696
/// Return the username for this URL (typically the empty string)
@@ -1642,10 +1642,25 @@ impl Url {
16421642
if host == "" && SchemeType::from(self.scheme()).is_special() {
16431643
return Err(ParseError::EmptyHost);
16441644
}
1645+
let mut host_substr = host;
1646+
// Otherwise, if c is U+003A (:) and the [] flag is unset, then
1647+
if !host.starts_with('[') || !host.ends_with(']') {
1648+
match host.find(':') {
1649+
Some(0) => {
1650+
// If buffer is the empty string, validation error, return failure.
1651+
return Err(ParseError::InvalidDomainCharacter);
1652+
}
1653+
// Let host be the result of host parsing buffer
1654+
Some(colon_index) => {
1655+
host_substr = &host[..colon_index];
1656+
}
1657+
None => {}
1658+
}
1659+
}
16451660
if SchemeType::from(self.scheme()).is_special() {
1646-
self.set_host_internal(Host::parse(host)?, None)
1661+
self.set_host_internal(Host::parse(host_substr)?, None);
16471662
} else {
1648-
self.set_host_internal(Host::parse_opaque(host)?, None)
1663+
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
16491664
}
16501665
} else if self.has_host() {
16511666
if SchemeType::from(self.scheme()).is_special() {

src/parser.rs

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ impl fmt::Display for SyntaxViolation {
156156
}
157157
}
158158

159-
#[derive(Copy, Clone)]
159+
#[derive(Copy, Clone, PartialEq)]
160160
pub enum SchemeType {
161161
File,
162162
SpecialNotFile,
@@ -852,11 +852,16 @@ impl<'a> Parser<'a> {
852852
self.serialization.push('/');
853853
self.serialization.push('/');
854854
// authority state
855+
let before_authority = self.serialization.len();
855856
let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
857+
let has_authority = before_authority != self.serialization.len();
856858
// host state
857859
let host_start = to_u32(self.serialization.len())?;
858860
let (host_end, host, port, remaining) =
859861
self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
862+
if host == HostInternal::None && has_authority {
863+
return Err(ParseError::EmptyHost);
864+
}
860865
// path state
861866
let path_start = to_u32(self.serialization.len())?;
862867
let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
@@ -900,7 +905,18 @@ impl<'a> Parser<'a> {
900905
}
901906
let (mut userinfo_char_count, remaining) = match last_at {
902907
None => return Ok((to_u32(self.serialization.len())?, input)),
903-
Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)),
908+
Some((0, remaining)) => {
909+
// Otherwise, if one of the following is true
910+
// c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
911+
// url is special and c is U+005C (\)
912+
// If @ flag is set and buffer is the empty string, validation error, return failure.
913+
if let (Some(c), _) = remaining.split_first() {
914+
if c == '/' || c == '?' || c == '#' || scheme_type.is_special() && c == '\\' {
915+
return Err(ParseError::EmptyHost);
916+
}
917+
}
918+
return Ok((to_u32(self.serialization.len())?, remaining));
919+
}
904920
Some(x) => x,
905921
};
906922

@@ -946,6 +962,18 @@ impl<'a> Parser<'a> {
946962
let (host, remaining) = Parser::parse_host(input, scheme_type)?;
947963
write!(&mut self.serialization, "{}", host).unwrap();
948964
let host_end = to_u32(self.serialization.len())?;
965+
if let Host::Domain(h) = &host {
966+
if h.is_empty() {
967+
// Port with an empty host
968+
if remaining.starts_with(":") {
969+
return Err(ParseError::EmptyHost);
970+
}
971+
if scheme_type.is_special() {
972+
return Err(ParseError::EmptyHost);
973+
}
974+
}
975+
};
976+
949977
let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
950978
let scheme = || default_port(&self.serialization[..scheme_end as usize]);
951979
Parser::parse_port(remaining, scheme, self.context)?
@@ -962,6 +990,9 @@ impl<'a> Parser<'a> {
962990
mut input: Input,
963991
scheme_type: SchemeType,
964992
) -> ParseResult<(Host<String>, Input)> {
993+
if scheme_type.is_file() {
994+
return Parser::get_file_host(input);
995+
}
965996
// Undo the Input abstraction here to avoid allocating in the common case
966997
// where the host part of the input does not contain any tab or newline
967998
let input_str = input.chars.as_str();
@@ -1012,10 +1043,41 @@ impl<'a> Parser<'a> {
10121043
Ok((host, input))
10131044
}
10141045

1015-
pub(crate) fn parse_file_host<'i>(
1046+
fn get_file_host<'i>(input: Input<'i>) -> ParseResult<(Host<String>, Input)> {
1047+
let (_, host_str, remaining) = Parser::file_host(input)?;
1048+
let host = match Host::parse(&host_str)? {
1049+
Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
1050+
host => host,
1051+
};
1052+
Ok((host, remaining))
1053+
}
1054+
1055+
fn parse_file_host<'i>(
10161056
&mut self,
10171057
input: Input<'i>,
10181058
) -> ParseResult<(bool, HostInternal, Input<'i>)> {
1059+
let has_host;
1060+
let (_, host_str, remaining) = Parser::file_host(input)?;
1061+
let host = if host_str.is_empty() {
1062+
has_host = false;
1063+
HostInternal::None
1064+
} else {
1065+
match Host::parse(&host_str)? {
1066+
Host::Domain(ref d) if d == "localhost" => {
1067+
has_host = false;
1068+
HostInternal::None
1069+
}
1070+
host => {
1071+
write!(&mut self.serialization, "{}", host).unwrap();
1072+
has_host = true;
1073+
host.into()
1074+
}
1075+
}
1076+
};
1077+
Ok((has_host, host, remaining))
1078+
}
1079+
1080+
pub fn file_host<'i>(input: Input<'i>) -> ParseResult<(bool, String, Input<'i>)> {
10191081
// Undo the Input abstraction here to avoid allocating in the common case
10201082
// where the host part of the input does not contain any tab or newline
10211083
let input_str = input.chars.as_str();
@@ -1044,20 +1106,9 @@ impl<'a> Parser<'a> {
10441106
}
10451107
}
10461108
if is_windows_drive_letter(host_str) {
1047-
return Ok((false, HostInternal::None, input));
1109+
return Ok((false, "".to_string(), input));
10481110
}
1049-
let host = if host_str.is_empty() {
1050-
HostInternal::None
1051-
} else {
1052-
match Host::parse(host_str)? {
1053-
Host::Domain(ref d) if d == "localhost" => HostInternal::None,
1054-
host => {
1055-
write!(&mut self.serialization, "{}", host).unwrap();
1056-
host.into()
1057-
}
1058-
}
1059-
};
1060-
Ok((true, host, remaining))
1111+
Ok((true, host_str.to_string(), remaining))
10611112
}
10621113

10631114
pub fn parse_port<P>(
@@ -1492,7 +1543,7 @@ fn c0_control_or_space(ch: char) -> bool {
14921543

14931544
/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
14941545
#[inline]
1495-
pub fn ascii_tab_or_new_line(ch: char) -> bool {
1546+
fn ascii_tab_or_new_line(ch: char) -> bool {
14961547
matches!(ch, '\t' | '\r' | '\n')
14971548
}
14981549

src/quirks.rs

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
//! you probably want to use `Url` method instead.
1313
1414
use parser::{default_port, Context, Input, Parser, SchemeType};
15+
use std::cell::RefCell;
16+
use SyntaxViolation;
1517
use {idna, Host, ParseError, Position, Url};
1618

1719
/// https://url.spec.whatwg.org/#dom-url-domaintoascii
@@ -110,19 +112,22 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
110112
let opt_port;
111113
{
112114
let scheme = url.scheme();
113-
let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme));
114-
match result {
115-
Ok((h, remaining)) => {
116-
host = h;
117-
opt_port = if let Some(remaining) = remaining.split_prefix(':') {
115+
let scheme_type = SchemeType::from(scheme);
116+
if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) {
117+
host = h;
118+
opt_port = if let Some(remaining) = remaining.split_prefix(':') {
119+
if remaining.is_empty() {
120+
None
121+
} else {
118122
Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
119123
.ok()
120124
.map(|(port, _remaining)| port)
121-
} else {
122-
None
123-
};
124-
}
125-
Err(_) => return Err(()),
125+
}
126+
} else {
127+
None
128+
};
129+
} else {
130+
return Err(());
126131
}
127132
}
128133
// Make sure we won't set an empty host to a url with a username or a port
@@ -154,8 +159,25 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
154159
if url.cannot_be_a_base() {
155160
return Err(());
156161
}
157-
let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme()));
158-
if let Ok((host, _remaining)) = result {
162+
// Host parsing rules are strict,
163+
// We don't want to trim the input
164+
let input = Input::no_trim(new_hostname);
165+
let scheme_type = SchemeType::from(url.scheme());
166+
if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) {
167+
if let Host::Domain(h) = &host {
168+
if h.is_empty() {
169+
// Empty host on special not file url
170+
if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile
171+
// Port with an empty host
172+
||!port(&url).is_empty()
173+
// Empty host with includes credentials
174+
|| !url.username().is_empty()
175+
|| !url.password().unwrap_or(&"").is_empty()
176+
{
177+
return Err(());
178+
}
179+
}
180+
}
159181
url.set_host_internal(host, None);
160182
Ok(())
161183
} else {
@@ -209,6 +231,10 @@ pub fn set_pathname(url: &mut Url, new_pathname: &str) {
209231
&& Some('\\') == new_pathname.chars().nth(0)
210232
{
211233
url.set_path(new_pathname)
234+
} else {
235+
let mut path_to_set = String::from("/");
236+
path_to_set.push_str(new_pathname);
237+
url.set_path(&path_to_set)
212238
}
213239
}
214240

0 commit comments

Comments
 (0)