Skip to content

Commit f828688

Browse files
committed
Trim forward slashes in file paths.
1 parent e4a27e5 commit f828688

File tree

1 file changed

+46
-78
lines changed

1 file changed

+46
-78
lines changed

src/parser.rs

Lines changed: 46 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,8 @@ impl<'a> Parser<'a> {
497497
self.serialization.push('/');
498498
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
499499
};
500+
// TODO: Handle authority
501+
trim_path(&mut self.serialization, host_end as usize);
500502
// For file URLs that have a host and whose path starts
501503
// with the windows drive letter we just remove the host.
502504
if !has_host {
@@ -538,16 +540,28 @@ impl<'a> Parser<'a> {
538540
}
539541
}
540542
}
541-
self.serialization.push('/');
542-
let remaining = self.parse_path(
543-
SchemeType::File,
544-
&mut false,
545-
host_end,
546-
input_after_first_char,
547-
);
543+
// If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
544+
let parse_path_input = if let Some(c) = first_char {
545+
if c == '/' || c == '\\' || c == '?' || c == '#' {
546+
input
547+
} else {
548+
input_after_first_char
549+
}
550+
} else {
551+
input_after_first_char
552+
};
553+
554+
let remaining =
555+
self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
556+
557+
let host_start = host_start as u32;
558+
559+
// TODO: Handle authority
560+
trim_path(&mut self.serialization, host_end);
561+
548562
let (query_start, fragment_start) =
549563
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
550-
let host_start = host_start as u32;
564+
551565
let host_end = host_end as u32;
552566
return Ok(Url {
553567
serialization: self.serialization,
@@ -1025,21 +1039,24 @@ impl<'a> Parser<'a> {
10251039
input: Input<'i>,
10261040
) -> Input<'i> {
10271041
let path_start = self.serialization.len();
1028-
let (maybe_c, _) = input.split_first();
1042+
let (maybe_c, remaining) = input.split_first();
10291043
// If url is special, then:
10301044
if scheme_type.is_special() {
1031-
// A special URL always has a non-empty path.
1032-
if maybe_c != Some('/') {
1033-
self.serialization.push('/');
1034-
}
10351045
if let Some(c) = maybe_c {
10361046
if c == '\\' {
10371047
// If c is U+005C (\), validation error.
10381048
self.log_violation(SyntaxViolation::Backslash);
10391049
}
1040-
// Set state to path state.
1041-
return self.parse_path(scheme_type, has_host, path_start, input);
10421050
}
1051+
// A special URL always has a non-empty path.
1052+
if !self.serialization.ends_with("/") {
1053+
self.serialization.push('/');
1054+
// We have already made sure the forward slash is present.
1055+
if maybe_c == Some('/') || maybe_c == Some('\\') {
1056+
return self.parse_path(scheme_type, has_host, path_start, remaining);
1057+
}
1058+
}
1059+
return self.parse_path(scheme_type, has_host, path_start, input);
10431060
} else if maybe_c == Some('?') || maybe_c == Some('#') {
10441061
// Otherwise, if state override is not given and c is U+003F (?),
10451062
// set url’s query to the empty string and state to query state.
@@ -1052,67 +1069,6 @@ impl<'a> Parser<'a> {
10521069
self.parse_path(scheme_type, has_host, path_start, input)
10531070
}
10541071

1055-
pub fn parse_query_2<'i>(
1056-
&mut self,
1057-
scheme_type: SchemeType,
1058-
mut input: Input<'i>,
1059-
) -> Input<'i> {
1060-
let mut query = String::new(); // FIXME: use a streaming decoder instead
1061-
1062-
while let Some((c, _)) = input.next_utf8() {
1063-
match c {
1064-
// If state override is not given and c is U+0023 (#),
1065-
// then set url’s fragment to the empty string and state to fragment state.
1066-
'#' => return self.parse_fragment_2(input),
1067-
c => {
1068-
// If c is not a URL code point and not U+0025 (%), validation error.
1069-
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1070-
self.check_url_code_point(c, &input);
1071-
query.push(c);
1072-
}
1073-
}
1074-
}
1075-
1076-
// If encoding is not UTF-8 and one of the following is true
1077-
// url is not special
1078-
// url’s scheme is "ws" or "wss"
1079-
let encoding = if !scheme_type.is_special()
1080-
|| self.serialization.starts_with("ws")
1081-
|| self.serialization.starts_with("wss")
1082-
{
1083-
self.query_encoding_override
1084-
} else {
1085-
None
1086-
};
1087-
let query_bytes = ::query_encoding::encode(encoding, &query);
1088-
let set = if scheme_type.is_special() {
1089-
SPECIAL_QUERY
1090-
} else {
1091-
QUERY
1092-
};
1093-
self.serialization.extend(percent_encode(&query_bytes, set));
1094-
input
1095-
}
1096-
1097-
pub fn parse_fragment_2<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1098-
while let Some((c, _)) = input.next_utf8() {
1099-
match c {
1100-
// U+0000 NULL: Validation error.
1101-
'\0' => self.log_violation(SyntaxViolation::NullInFragment),
1102-
c => {
1103-
// If c is not a URL code point and not U+0025 (%), validation error.
1104-
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1105-
self.check_url_code_point(c, &input);
1106-
// UTF-8 percent encode c using the fragment percent-encode set
1107-
// and append the result to url’s fragment.
1108-
self.serialization
1109-
.extend(utf8_percent_encode(&c.to_string(), FRAGMENT));
1110-
}
1111-
}
1112-
}
1113-
input
1114-
}
1115-
11161072
pub fn parse_path<'i>(
11171073
&mut self,
11181074
scheme_type: SchemeType,
@@ -1173,12 +1129,12 @@ impl<'a> Parser<'a> {
11731129
};
11741130
match to_match {
11751131
// If buffer is a double-dot path segment, shorten url’s path,
1176-
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
11771132
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
11781133
| ".%2E" => {
11791134
debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1180-
self.serialization.truncate(segment_start - 1); // Truncate "/.."
1135+
self.serialization.truncate(segment_start - 1); // Truncate "/../"
11811136
self.pop_path(scheme_type, path_start);
1137+
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
11821138
if ends_with_slash && !self.serialization.ends_with("/") {
11831139
self.serialization.push('/');
11841140
}
@@ -1193,6 +1149,7 @@ impl<'a> Parser<'a> {
11931149
}
11941150
_ => {
11951151
if scheme_type.is_file()
1152+
//&& path_start + 1 < self.serialization.len()
11961153
&& is_windows_drive_letter(&self.serialization[path_start + 1..])
11971154
{
11981155
if self.serialization.ends_with('|') {
@@ -1384,6 +1341,17 @@ impl<'a> Parser<'a> {
13841341
}
13851342
}
13861343

1344+
// Trim path start forward slashes when no authority is present
1345+
// https://github.com/whatwg/url/issues/232
1346+
fn trim_path(serialization: &mut String, path_start: usize) {
1347+
let path = serialization.split_off(path_start);
1348+
if path.starts_with("/") {
1349+
let mut trimmed_path = "/".to_string();
1350+
trimmed_path.push_str(path.trim_start_matches("/"));
1351+
serialization.push_str(&trimmed_path);
1352+
}
1353+
}
1354+
13871355
#[inline]
13881356
fn is_ascii_hex_digit(c: char) -> bool {
13891357
matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')

0 commit comments

Comments
 (0)