Skip to content

Commit 5a0eccc

Browse files
committed
Trim forward slashes in file paths.
1 parent 282a0d0 commit 5a0eccc

File tree

1 file changed

+46
-78
lines changed

1 file changed

+46
-78
lines changed

src/parser.rs

Lines changed: 46 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,8 @@ impl<'a> Parser<'a> {
515515
self.serialization.push('/');
516516
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
517517
};
518+
// TODO: Handle authority
519+
trim_path(&mut self.serialization, host_end as usize);
518520
// For file URLs that have a host and whose path starts
519521
// with the windows drive letter we just remove the host.
520522
if !has_host {
@@ -556,16 +558,28 @@ impl<'a> Parser<'a> {
556558
}
557559
}
558560
}
559-
self.serialization.push('/');
560-
let remaining = self.parse_path(
561-
SchemeType::File,
562-
&mut false,
563-
host_end,
564-
input_after_first_char,
565-
);
561+
// If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
562+
let parse_path_input = if let Some(c) = first_char {
563+
if c == '/' || c == '\\' || c == '?' || c == '#' {
564+
input
565+
} else {
566+
input_after_first_char
567+
}
568+
} else {
569+
input_after_first_char
570+
};
571+
572+
let remaining =
573+
self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
574+
575+
let host_start = host_start as u32;
576+
577+
// TODO: Handle authority
578+
trim_path(&mut self.serialization, host_end);
579+
566580
let (query_start, fragment_start) =
567581
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
568-
let host_start = host_start as u32;
582+
569583
let host_end = host_end as u32;
570584
return Ok(Url {
571585
serialization: self.serialization,
@@ -1043,21 +1057,24 @@ impl<'a> Parser<'a> {
10431057
input: Input<'i>,
10441058
) -> Input<'i> {
10451059
let path_start = self.serialization.len();
1046-
let (maybe_c, _) = input.split_first();
1060+
let (maybe_c, remaining) = input.split_first();
10471061
// If url is special, then:
10481062
if scheme_type.is_special() {
1049-
// A special URL always has a non-empty path.
1050-
if maybe_c != Some('/') {
1051-
self.serialization.push('/');
1052-
}
10531063
if let Some(c) = maybe_c {
10541064
if c == '\\' {
10551065
// If c is U+005C (\), validation error.
10561066
self.log_violation(SyntaxViolation::Backslash);
10571067
}
1058-
// Set state to path state.
1059-
return self.parse_path(scheme_type, has_host, path_start, input);
10601068
}
1069+
// A special URL always has a non-empty path.
1070+
if !self.serialization.ends_with("/") {
1071+
self.serialization.push('/');
1072+
// We have already made sure the forward slash is present.
1073+
if maybe_c == Some('/') || maybe_c == Some('\\') {
1074+
return self.parse_path(scheme_type, has_host, path_start, remaining);
1075+
}
1076+
}
1077+
return self.parse_path(scheme_type, has_host, path_start, input);
10611078
} else if maybe_c == Some('?') || maybe_c == Some('#') {
10621079
// Otherwise, if state override is not given and c is U+003F (?),
10631080
// set url’s query to the empty string and state to query state.
@@ -1070,67 +1087,6 @@ impl<'a> Parser<'a> {
10701087
self.parse_path(scheme_type, has_host, path_start, input)
10711088
}
10721089

1073-
pub fn parse_query_2<'i>(
1074-
&mut self,
1075-
scheme_type: SchemeType,
1076-
mut input: Input<'i>,
1077-
) -> Input<'i> {
1078-
let mut query = String::new(); // FIXME: use a streaming decoder instead
1079-
1080-
while let Some((c, _)) = input.next_utf8() {
1081-
match c {
1082-
// If state override is not given and c is U+0023 (#),
1083-
// then set url’s fragment to the empty string and state to fragment state.
1084-
'#' => return self.parse_fragment_2(input),
1085-
c => {
1086-
// If c is not a URL code point and not U+0025 (%), validation error.
1087-
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1088-
self.check_url_code_point(c, &input);
1089-
query.push(c);
1090-
}
1091-
}
1092-
}
1093-
1094-
// If encoding is not UTF-8 and one of the following is true
1095-
// url is not special
1096-
// url’s scheme is "ws" or "wss"
1097-
let encoding = if !scheme_type.is_special()
1098-
|| self.serialization.starts_with("ws")
1099-
|| self.serialization.starts_with("wss")
1100-
{
1101-
self.query_encoding_override
1102-
} else {
1103-
None
1104-
};
1105-
let query_bytes = ::query_encoding::encode(encoding, &query);
1106-
let set = if scheme_type.is_special() {
1107-
SPECIAL_QUERY
1108-
} else {
1109-
QUERY
1110-
};
1111-
self.serialization.extend(percent_encode(&query_bytes, set));
1112-
input
1113-
}
1114-
1115-
pub fn parse_fragment_2<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1116-
while let Some((c, _)) = input.next_utf8() {
1117-
match c {
1118-
// U+0000 NULL: Validation error.
1119-
'\0' => self.log_violation(SyntaxViolation::NullInFragment),
1120-
c => {
1121-
// If c is not a URL code point and not U+0025 (%), validation error.
1122-
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1123-
self.check_url_code_point(c, &input);
1124-
// UTF-8 percent encode c using the fragment percent-encode set
1125-
// and append the result to url’s fragment.
1126-
self.serialization
1127-
.extend(utf8_percent_encode(&c.to_string(), FRAGMENT));
1128-
}
1129-
}
1130-
}
1131-
input
1132-
}
1133-
11341090
pub fn parse_path<'i>(
11351091
&mut self,
11361092
scheme_type: SchemeType,
@@ -1191,12 +1147,12 @@ impl<'a> Parser<'a> {
11911147
};
11921148
match to_match {
11931149
// If buffer is a double-dot path segment, shorten url’s path,
1194-
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
11951150
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
11961151
| ".%2E" => {
11971152
debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1198-
self.serialization.truncate(segment_start - 1); // Truncate "/.."
1153+
self.serialization.truncate(segment_start - 1); // Truncate "/../"
11991154
self.pop_path(scheme_type, path_start);
1155+
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
12001156
if ends_with_slash && !self.serialization.ends_with("/") {
12011157
self.serialization.push('/');
12021158
}
@@ -1211,6 +1167,7 @@ impl<'a> Parser<'a> {
12111167
}
12121168
_ => {
12131169
if scheme_type.is_file()
1170+
//&& path_start + 1 < self.serialization.len()
12141171
&& is_windows_drive_letter(&self.serialization[path_start + 1..])
12151172
{
12161173
if self.serialization.ends_with('|') {
@@ -1402,6 +1359,17 @@ impl<'a> Parser<'a> {
14021359
}
14031360
}
14041361

1362+
// Trim path start forward slashes when no authority is present
1363+
// https://github.com/whatwg/url/issues/232
1364+
fn trim_path(serialization: &mut String, path_start: usize) {
1365+
let path = serialization.split_off(path_start);
1366+
if path.starts_with("/") {
1367+
let mut trimmed_path = "/".to_string();
1368+
trimmed_path.push_str(path.trim_start_matches("/"));
1369+
serialization.push_str(&trimmed_path);
1370+
}
1371+
}
1372+
14051373
#[inline]
14061374
fn is_ascii_hex_digit(c: char) -> bool {
14071375
matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')

0 commit comments

Comments
 (0)