Skip to content

Commit ee8e3f6

Browse files
committed
Windows drive letter handling.
1 parent 5712f83 commit ee8e3f6

File tree

1 file changed

+41
-8
lines changed

1 file changed

+41
-8
lines changed

src/parser.rs

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,13 +1230,12 @@ impl<'a> Parser<'a> {
12301230
| ".%2E" => {
12311231
debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
12321232
self.serialization.truncate(segment_start);
1233-
// Do not remove the root slash
1234-
if self.serialization.ends_with("/") && path_start + 1 < segment_start {
1233+
if self.serialization.ends_with("/")
1234+
&& Parser::last_slash_can_be_removed(&self.serialization, path_start)
1235+
{
12351236
self.serialization.pop();
1236-
self.shorten_path(scheme_type, path_start);
1237-
} else {
1238-
self.shorten_path(scheme_type, path_start);
12391237
}
1238+
self.shorten_path(scheme_type, path_start);
12401239

12411240
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
12421241
if ends_with_slash && !self.serialization.ends_with("/") {
@@ -1281,6 +1280,18 @@ impl<'a> Parser<'a> {
12811280
input
12821281
}
12831282

1283+
fn last_slash_can_be_removed(serialization: &String, path_start: usize) -> bool {
1284+
let url_before_segment = &serialization[..serialization.len() - 1];
1285+
if let Some(segment_before_start) = url_before_segment.rfind("/") {
1286+
// Do not remove the root slash
1287+
segment_before_start >= path_start
1288+
// Or a windows drive letter slash
1289+
&& !path_starts_with_windows_drive_letter(&serialization[segment_before_start..])
1290+
} else {
1291+
false
1292+
}
1293+
}
1294+
12841295
/// https://url.spec.whatwg.org/#shorten-a-urls-path
12851296
fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
12861297
// If path is empty, then return.
@@ -1553,12 +1564,34 @@ fn is_windows_drive_letter(segment: &str) -> bool {
15531564
segment.len() == 2 && starts_with_windows_drive_letter(segment)
15541565
}
15551566

1567+
/// Wether path starts with a root slash
1568+
/// and a windows drive letter eg: "/c:" or "/a:/"
1569+
fn path_starts_with_windows_drive_letter(s: &str) -> bool {
1570+
s.len() > 3
1571+
&& matches!(s.as_bytes()[0], b'/' | b'\\' | b'?' | b'#')
1572+
&& starts_with_windows_drive_letter(&s[1..])
1573+
}
1574+
15561575
fn starts_with_windows_drive_letter(s: &str) -> bool {
1557-
ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|')
1576+
ascii_alpha(s.as_bytes()[0] as char)
1577+
&& matches!(s.as_bytes()[1], b':' | b'|')
1578+
&& (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
15581579
}
15591580

1581+
/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
15601582
fn starts_with_windows_drive_letter_segment(input: &Input) -> bool {
15611583
let mut input = input.clone();
1562-
matches!((input.next(), input.next(), input.next()), (Some(a), Some(b), Some(c))
1563-
if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#'))
1584+
match (input.next(), input.next(), input.next()) {
1585+
// its first two code points are a Windows drive letter
1586+
// its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
1587+
(Some(a), Some(b), Some(c))
1588+
if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') =>
1589+
{
1590+
true
1591+
}
1592+
// its first two code points are a Windows drive letter
1593+
// its length is 2
1594+
(Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true,
1595+
_ => false,
1596+
}
15641597
}

0 commit comments

Comments
 (0)