Skip to content

Commit 5e3640e

Browse files
committed
Do not trim each forward slash when not required.
1 parent fff8db7 commit 5e3640e

File tree

4 files changed

+107
-14
lines changed

4 files changed

+107
-14
lines changed

src/host.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,52 @@ pub(crate) enum HostInternal {
2424
Ipv6(Ipv6Addr),
2525
}
2626

27-
impl<S> From<Host<S>> for HostInternal {
27+
#[cfg(feature = "serde")]
28+
impl ::serde::Serialize for HostInternal {
29+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
30+
where
31+
S: ::serde::Serializer,
32+
{
33+
// This doesn’t use `derive` because that involves
34+
// large dependencies (that take a long time to build), and
35+
// either Macros 1.1 which are not stable yet or a cumbersome build script.
36+
//
37+
// Implementing `Serializer` correctly for an enum is tricky,
38+
// so let’s use existing enums that already do.
39+
use std::net::IpAddr;
40+
match *self {
41+
HostInternal::None => None,
42+
HostInternal::Domain => Some(None),
43+
HostInternal::Ipv4(addr) => Some(Some(IpAddr::V4(addr))),
44+
HostInternal::Ipv6(addr) => Some(Some(IpAddr::V6(addr))),
45+
}
46+
.serialize(serializer)
47+
}
48+
}
49+
50+
#[cfg(feature = "serde")]
51+
impl<'de> ::serde::Deserialize<'de> for HostInternal {
52+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
53+
where
54+
D: ::serde::Deserializer<'de>,
55+
{
56+
use std::net::IpAddr;
57+
Ok(match ::serde::Deserialize::deserialize(deserializer)? {
58+
None => HostInternal::None,
59+
Some(None) => HostInternal::Domain,
60+
Some(Some(IpAddr::V4(addr))) => HostInternal::Ipv4(addr),
61+
Some(Some(IpAddr::V6(addr))) => HostInternal::Ipv6(addr),
62+
})
63+
}
64+
}
65+
66+
impl<S> From<Host<S>> for HostInternal
67+
where
68+
S: ToString,
69+
{
2870
fn from(host: Host<S>) -> HostInternal {
2971
match host {
72+
Host::Domain(ref s) if s.to_string().is_empty() => HostInternal::None,
3073
Host::Domain(_) => HostInternal::Domain,
3174
Host::Ipv4(address) => HostInternal::Ipv4(address),
3275
Host::Ipv6(address) => HostInternal::Ipv6(address),

src/lib.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -456,13 +456,15 @@ impl Url {
456456

457457
if self.slice(self.scheme_end + 1..).starts_with("//") {
458458
// URL with authority
459-
match self.byte_at(self.username_end) {
460-
b':' => {
461-
assert!(self.host_start >= self.username_end + 2);
462-
assert_eq!(self.byte_at(self.host_start - 1), b'@');
459+
if self.username_end < self.serialization.len() as u32 {
460+
match self.byte_at(self.username_end) {
461+
b':' => {
462+
assert!(self.host_start >= self.username_end + 2);
463+
assert_eq!(self.byte_at(self.host_start - 1), b'@');
464+
}
465+
b'@' => assert!(self.host_start == self.username_end + 1),
466+
_ => assert_eq!(self.username_end, self.scheme_end + 3),
463467
}
464-
b'@' => assert!(self.host_start == self.username_end + 1),
465-
_ => assert_eq!(self.username_end, self.scheme_end + 3),
466468
}
467469
assert!(self.host_start >= self.username_end);
468470
assert!(self.host_end >= self.host_start);

src/parser.rs

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,11 @@ impl<'a> Parser<'a> {
440440
.collect::<String>()
441441
!= "//"
442442
});
443-
self.after_double_slash(remaining, scheme_type, scheme_end)
443+
if let Some(after_prefix) = input.split_prefix("//") {
444+
return self.after_double_slash(after_prefix, scheme_type, scheme_end);
445+
} else {
446+
self.after_double_slash(remaining, scheme_type, scheme_end)
447+
}
444448
}
445449
SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end),
446450
}
@@ -634,7 +638,7 @@ impl<'a> Parser<'a> {
634638
(Some(i), _) | (None, Some(i)) => base_url.slice(..i),
635639
};
636640
self.serialization.push_str(before_query);
637-
self.pop_path(SchemeType::File, base_url.path_start as usize);
641+
self.shorten_path(SchemeType::File, base_url.path_start as usize);
638642
let remaining = self.parse_path(
639643
SchemeType::File,
640644
&mut true,
@@ -753,6 +757,9 @@ impl<'a> Parser<'a> {
753757
debug_assert!(base_url.byte_at(scheme_end) == b':');
754758
self.serialization
755759
.push_str(base_url.slice(..scheme_end + 1));
760+
if let Some(after_prefix) = input.split_prefix("//") {
761+
return self.after_double_slash(after_prefix, scheme_type, scheme_end);
762+
}
756763
return self.after_double_slash(remaining, scheme_type, scheme_end);
757764
}
758765
let path_start = base_url.path_start;
@@ -960,7 +967,7 @@ impl<'a> Parser<'a> {
960967
host_str = &input_str[..bytes]
961968
}
962969
}
963-
if scheme_type.is_special() && host_str.is_empty() {
970+
if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
964971
return Err(ParseError::EmptyHost);
965972
}
966973
if !scheme_type.is_special() {
@@ -1150,8 +1157,15 @@ impl<'a> Parser<'a> {
11501157
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
11511158
| ".%2E" => {
11521159
debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1153-
self.serialization.truncate(segment_start - 1); // Truncate "/../"
1154-
self.pop_path(scheme_type, path_start);
1160+
self.serialization.truncate(segment_start);
1161+
// Do not remove the root slash
1162+
if self.serialization.ends_with("/") && path_start + 1 < segment_start {
1163+
self.serialization.pop();
1164+
self.shorten_path(scheme_type, path_start);
1165+
} else {
1166+
self.shorten_path(scheme_type, path_start);
1167+
}
1168+
11551169
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
11561170
if ends_with_slash && !self.serialization.ends_with("/") {
11571171
self.serialization.push('/');
@@ -1195,16 +1209,36 @@ impl<'a> Parser<'a> {
11951209
input
11961210
}
11971211

1212+
/// https://url.spec.whatwg.org/#shorten-a-urls-path
1213+
fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1214+
// If path is empty, then return.
1215+
if self.serialization.len() <= path_start {
1216+
return;
1217+
}
1218+
// If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
1219+
let segments: Vec<&str> = self.serialization[path_start..]
1220+
.split('/')
1221+
.filter(|s| !s.is_empty())
1222+
.collect();
1223+
if scheme_type.is_file()
1224+
&& segments.len() == 1
1225+
&& is_normalized_windows_drive_letter(segments[0])
1226+
{
1227+
return;
1228+
}
1229+
// Remove path’s last item.
1230+
self.pop_path(scheme_type, path_start);
1231+
}
1232+
11981233
/// https://url.spec.whatwg.org/#pop-a-urls-path
11991234
fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
12001235
if self.serialization.len() > path_start {
12011236
let slash_position = self.serialization[path_start..].rfind('/').unwrap();
12021237
// + 1 since rfind returns the position before the slash.
12031238
let segment_start = path_start + slash_position + 1;
12041239
// Don’t pop a Windows drive letter
1205-
// FIXME: *normalized* Windows drive letter
12061240
if !(scheme_type.is_file()
1207-
&& is_windows_drive_letter(&self.serialization[segment_start..]))
1241+
&& is_normalized_windows_drive_letter(&self.serialization[segment_start..]))
12081242
{
12091243
self.serialization.truncate(segment_start);
12101244
}

src/quirks.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
122122
Err(_) => return Err(()),
123123
}
124124
}
125+
// Make sure we won't set an empty host to a url with a username or a port
126+
if host == Host::Domain("".to_string()) {
127+
if !username(&url).is_empty() {
128+
return Err(());
129+
}
130+
if let Some(p) = opt_port {
131+
if let Some(_) = p {
132+
return Err(());
133+
}
134+
}
135+
if url.port().is_some() {
136+
return Err(());
137+
}
138+
}
125139
url.set_host_internal(host, opt_port);
126140
Ok(())
127141
}

0 commit comments

Comments
 (0)