Skip to content

Commit 80d76bd

Browse files
committed
Host parsing rules.
1 parent 9981c7b commit 80d76bd

File tree

3 files changed

+129
-27
lines changed

3 files changed

+129
-27
lines changed

src/lib.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ impl Url {
692692
/// ```
693693
#[inline]
694694
pub fn cannot_be_a_base(&self) -> bool {
695-
!self.slice(self.path_start..).starts_with('/')
695+
!self.slice(self.scheme_end + 1..).starts_with('/')
696696
}
697697

698698
/// Return the username for this URL (typically the empty string)
@@ -1643,10 +1643,25 @@ impl Url {
16431643
if host == "" && SchemeType::from(self.scheme()).is_special() {
16441644
return Err(ParseError::EmptyHost);
16451645
}
1646+
let mut host_substr = host;
1647+
// Otherwise, if c is U+003A (:) and the [] flag is unset, then
1648+
if !host.starts_with('[') || !host.ends_with(']') {
1649+
match host.find(':') {
1650+
Some(0) => {
1651+
// If buffer is the empty string, validation error, return failure.
1652+
return Err(ParseError::InvalidDomainCharacter);
1653+
}
1654+
// Let host be the result of host parsing buffer
1655+
Some(colon_index) => {
1656+
host_substr = &host[..colon_index];
1657+
}
1658+
None => {}
1659+
}
1660+
}
16461661
if SchemeType::from(self.scheme()).is_special() {
1647-
self.set_host_internal(Host::parse(host)?, None)
1662+
self.set_host_internal(Host::parse(host_substr)?, None);
16481663
} else {
1649-
self.set_host_internal(Host::parse_opaque(host)?, None)
1664+
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
16501665
}
16511666
} else if self.has_host() {
16521667
if SchemeType::from(self.scheme()).is_special() {

src/parser.rs

Lines changed: 69 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ macro_rules! simple_enum_error {
7979

8080
simple_enum_error! {
8181
EmptyHost => "empty host",
82+
InvalidAuthority => "invalid authority",
8283
IdnaError => "invalid international domain name",
8384
InvalidPort => "invalid port number",
8485
InvalidIpv4Address => "invalid IPv4 address",
@@ -156,7 +157,7 @@ impl fmt::Display for SyntaxViolation {
156157
}
157158
}
158159

159-
#[derive(Copy, Clone)]
160+
#[derive(Copy, Clone, PartialEq)]
160161
pub enum SchemeType {
161162
File,
162163
SpecialNotFile,
@@ -217,7 +218,7 @@ impl<'i> Input<'i> {
217218
pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self {
218219
let input = original_input.trim_matches(c0_control_or_space);
219220
if let Some(vfn) = vfn {
220-
if input.len() < original_input.len() {
221+
if input.len() != original_input.len() {
221222
vfn(SyntaxViolation::C0SpaceIgnored)
222223
}
223224
if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
@@ -858,11 +859,13 @@ impl<'a> Parser<'a> {
858859
self.serialization.push('/');
859860
self.serialization.push('/');
860861
// authority state
862+
let before_authority = self.serialization.len();
861863
let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
864+
let has_authority = before_authority != self.serialization.len();
862865
// host state
863866
let host_start = to_u32(self.serialization.len())?;
864867
let (host_end, host, port, remaining) =
865-
self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
868+
self.parse_host_and_port(remaining, scheme_end, scheme_type, has_authority)?;
866869
// path state
867870
let path_start = to_u32(self.serialization.len())?;
868871
let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
@@ -906,7 +909,18 @@ impl<'a> Parser<'a> {
906909
}
907910
let (mut userinfo_char_count, remaining) = match last_at {
908911
None => return Ok((to_u32(self.serialization.len())?, input)),
909-
Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)),
912+
Some((0, remaining)) => {
913+
// Otherwise, if one of the following is true
914+
// c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
915+
// url is special and c is U+005C (\)
916+
// If @ flag is set and buffer is the empty string, validation error, return failure.
917+
if let (Some(c), _) = remaining.split_first() {
918+
if c == '/' || c == '?' || c == '#' || scheme_type.is_special() && c == '\\' {
919+
return Err(ParseError::InvalidAuthority);
920+
}
921+
}
922+
return Ok((to_u32(self.serialization.len())?, remaining));
923+
}
910924
Some(x) => x,
911925
};
912926

@@ -948,10 +962,26 @@ impl<'a> Parser<'a> {
948962
input: Input<'i>,
949963
scheme_end: u32,
950964
scheme_type: SchemeType,
965+
has_authority: bool,
951966
) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
952967
let (host, remaining) = Parser::parse_host(input, scheme_type)?;
953968
write!(&mut self.serialization, "{}", host).unwrap();
954969
let host_end = to_u32(self.serialization.len())?;
970+
if let Host::Domain(h) = &host {
971+
if h.is_empty() {
972+
// Port with an empty host
973+
if remaining.starts_with(":") {
974+
return Err(ParseError::EmptyHost);
975+
}
976+
if scheme_type.is_special() {
977+
return Err(ParseError::EmptyHost);
978+
}
979+
if !scheme_type.is_special() && has_authority {
980+
return Err(ParseError::EmptyHost);
981+
}
982+
}
983+
};
984+
955985
let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
956986
let scheme = || default_port(&self.serialization[..scheme_end as usize]);
957987
Parser::parse_port(remaining, scheme, self.context)?
@@ -1018,10 +1048,41 @@ impl<'a> Parser<'a> {
10181048
Ok((host, input))
10191049
}
10201050

1021-
pub(crate) fn parse_file_host<'i>(
1051+
pub fn get_file_host<'i>(input: Input<'i>) -> ParseResult<(Host<String>, Input)> {
1052+
let (_, host_str, remaining) = Parser::file_host(input)?;
1053+
let host = match Host::parse(&host_str)? {
1054+
Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
1055+
host => host,
1056+
};
1057+
Ok((host, remaining))
1058+
}
1059+
1060+
fn parse_file_host<'i>(
10221061
&mut self,
10231062
input: Input<'i>,
10241063
) -> ParseResult<(bool, HostInternal, Input<'i>)> {
1064+
let has_host;
1065+
let (_, host_str, remaining) = Parser::file_host(input)?;
1066+
let host = if host_str.is_empty() {
1067+
has_host = false;
1068+
HostInternal::None
1069+
} else {
1070+
match Host::parse(&host_str)? {
1071+
Host::Domain(ref d) if d == "localhost" => {
1072+
has_host = false;
1073+
HostInternal::None
1074+
}
1075+
host => {
1076+
write!(&mut self.serialization, "{}", host).unwrap();
1077+
has_host = true;
1078+
host.into()
1079+
}
1080+
}
1081+
};
1082+
Ok((has_host, host, remaining))
1083+
}
1084+
1085+
pub fn file_host<'i>(input: Input<'i>) -> ParseResult<(bool, String, Input<'i>)> {
10251086
// Undo the Input abstraction here to avoid allocating in the common case
10261087
// where the host part of the input does not contain any tab or newline
10271088
let input_str = input.chars.as_str();
@@ -1050,20 +1111,9 @@ impl<'a> Parser<'a> {
10501111
}
10511112
}
10521113
if is_windows_drive_letter(host_str) {
1053-
return Ok((false, HostInternal::None, input));
1114+
return Ok((false, "".to_string(), input));
10541115
}
1055-
let host = if host_str.is_empty() {
1056-
HostInternal::None
1057-
} else {
1058-
match Host::parse(host_str)? {
1059-
Host::Domain(ref d) if d == "localhost" => HostInternal::None,
1060-
host => {
1061-
write!(&mut self.serialization, "{}", host).unwrap();
1062-
host.into()
1063-
}
1064-
}
1065-
};
1066-
Ok((true, host, remaining))
1116+
Ok((true, host_str.to_string(), remaining))
10671117
}
10681118

10691119
pub fn parse_port<P>(
@@ -1503,7 +1553,7 @@ fn c0_control_or_space(ch: char) -> bool {
15031553

15041554
/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
15051555
#[inline]
1506-
pub fn ascii_tab_or_new_line(ch: char) -> bool {
1556+
fn ascii_tab_or_new_line(ch: char) -> bool {
15071557
matches!(ch, '\t' | '\r' | '\n')
15081558
}
15091559

src/quirks.rs

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
//! you probably want to use `Url` method instead.
1313
1414
use parser::{default_port, Context, Input, Parser, SchemeType};
15+
use std::cell::RefCell;
16+
use SyntaxViolation;
1517
use {idna, Host, ParseError, Position, Url};
1618

1719
/// https://url.spec.whatwg.org/#dom-url-domaintoascii
@@ -110,14 +112,23 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
110112
let opt_port;
111113
{
112114
let scheme = url.scheme();
113-
let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme));
115+
let scheme_type = SchemeType::from(scheme);
116+
let result = if scheme_type == SchemeType::File {
117+
Parser::get_file_host(input)
118+
} else {
119+
Parser::parse_host(input, scheme_type)
120+
};
114121
match result {
115122
Ok((h, remaining)) => {
116123
host = h;
117124
opt_port = if let Some(remaining) = remaining.split_prefix(':') {
118-
Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
119-
.ok()
120-
.map(|(port, _remaining)| port)
125+
if remaining.is_empty() {
126+
None
127+
} else {
128+
Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
129+
.ok()
130+
.map(|(port, _remaining)| port)
131+
}
121132
} else {
122133
None
123134
};
@@ -154,8 +165,30 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
154165
if url.cannot_be_a_base() {
155166
return Err(());
156167
}
157-
let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme()));
168+
// Host parsing rules are strict,
169+
// We don't want to trim the input
170+
let input = Input::no_trim(new_hostname);
171+
let scheme_type = SchemeType::from(url.scheme());
172+
let result = if scheme_type == SchemeType::File {
173+
Parser::get_file_host(input)
174+
} else {
175+
Parser::parse_host(input, scheme_type)
176+
};
158177
if let Ok((host, _remaining)) = result {
178+
if let Host::Domain(h) = &host {
179+
if h.is_empty() {
180+
// Empty host on special not file url
181+
if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile
182+
// Port with an empty host
183+
||!port(&url).is_empty()
184+
// Empty host with includes credentials
185+
|| !url.username().is_empty()
186+
|| !url.password().unwrap_or(&"").is_empty()
187+
{
188+
return Err(());
189+
}
190+
}
191+
}
159192
url.set_host_internal(host, None);
160193
Ok(())
161194
} else {
@@ -209,6 +242,10 @@ pub fn set_pathname(url: &mut Url, new_pathname: &str) {
209242
&& Some('\\') == new_pathname.chars().nth(0)
210243
{
211244
url.set_path(new_pathname)
245+
} else {
246+
let mut path_to_set = String::from("/");
247+
path_to_set.push_str(new_pathname);
248+
url.set_path(&path_to_set)
212249
}
213250
}
214251

0 commit comments

Comments
 (0)