Skip to content

Commit 09d8466

Browse files
committed
Fix path parsing
1 parent 86607f8 commit 09d8466

File tree

3 files changed

+97
-34
lines changed

3 files changed

+97
-34
lines changed

src/lib.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,12 @@ impl Url {
490490
Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
491491
);
492492
}
493-
assert_eq!(self.byte_at(self.path_start), b'/');
493+
assert!(
494+
self.path_start as usize == self.serialization.len()
495+
|| self.byte_at(self.path_start) == b'/'
496+
|| self.byte_at(self.path_start) == b'#'
497+
|| self.byte_at(self.path_start) == b'?'
498+
);
494499
} else {
495500
// Anarchist URL (no authority)
496501
assert_eq!(self.username_end, self.scheme_end + 1);
@@ -501,11 +506,11 @@ impl Url {
501506
assert_eq!(self.path_start, self.scheme_end + 1);
502507
}
503508
if let Some(start) = self.query_start {
504-
assert!(start > self.path_start);
509+
assert!(start >= self.path_start);
505510
assert_eq!(self.byte_at(start), b'?');
506511
}
507512
if let Some(start) = self.fragment_start {
508-
assert!(start > self.path_start);
513+
assert!(start >= self.path_start);
509514
assert_eq!(self.byte_at(start), b'#');
510515
}
511516
if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {

src/parser.rs

Lines changed: 77 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,8 @@ impl<'a> Parser<'a> {
515515
self.serialization.push('/');
516516
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
517517
};
518+
// TODO: Handle authority
519+
trim_path(&mut self.serialization, host_end as usize);
518520
// For file URLs that have a host and whose path starts
519521
// with the windows drive letter we just remove the host.
520522
if !has_host {
@@ -556,16 +558,28 @@ impl<'a> Parser<'a> {
556558
}
557559
}
558560
}
559-
self.serialization.push('/');
560-
let remaining = self.parse_path(
561-
SchemeType::File,
562-
&mut false,
563-
host_end,
564-
input_after_first_char,
565-
);
561+
// If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
562+
let parse_path_input = if let Some(c) = first_char {
563+
if c == '/' || c == '\\' || c == '?' || c == '#' {
564+
input
565+
} else {
566+
input_after_first_char
567+
}
568+
} else {
569+
input_after_first_char
570+
};
571+
572+
let remaining =
573+
self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
574+
575+
let host_start = host_start as u32;
576+
577+
// TODO: Handle authority
578+
trim_path(&mut self.serialization, host_end);
579+
566580
let (query_start, fragment_start) =
567581
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
568-
let host_start = host_start as u32;
582+
569583
let host_end = host_end as u32;
570584
return Ok(Url {
571585
serialization: self.serialization,
@@ -1040,21 +1054,36 @@ impl<'a> Parser<'a> {
10401054
&mut self,
10411055
scheme_type: SchemeType,
10421056
has_host: &mut bool,
1043-
mut input: Input<'i>,
1057+
input: Input<'i>,
10441058
) -> Input<'i> {
1045-
// Path start state
1046-
match input.split_first() {
1047-
(Some('/'), remaining) => input = remaining,
1048-
(Some('\\'), remaining) => {
1049-
if scheme_type.is_special() {
1059+
let path_start = self.serialization.len();
1060+
let (maybe_c, remaining) = input.split_first();
1061+
// If url is special, then:
1062+
if scheme_type.is_special() {
1063+
if let Some(c) = maybe_c {
1064+
if c == '\\' {
1065+
// If c is U+005C (\), validation error.
10501066
self.log_violation(SyntaxViolation::Backslash);
1051-
input = remaining
10521067
}
10531068
}
1054-
_ => {}
1069+
// A special URL always has a non-empty path.
1070+
if !self.serialization.ends_with("/") {
1071+
self.serialization.push('/');
1072+
// We have already made sure the forward slash is present.
1073+
if maybe_c == Some('/') || maybe_c == Some('\\') {
1074+
return self.parse_path(scheme_type, has_host, path_start, remaining);
1075+
}
1076+
}
1077+
return self.parse_path(scheme_type, has_host, path_start, input);
1078+
} else if maybe_c == Some('?') || maybe_c == Some('#') {
1079+
// Otherwise, if state override is not given and c is U+003F (?),
1080+
// set url’s query to the empty string and state to query state.
1081+
// Otherwise, if state override is not given and c is U+0023 (#),
1082+
// set url’s fragment to the empty string and state to fragment state.
1083+
// The query and path states will be handled by the caller.
1084+
return input;
10551085
}
1056-
let path_start = self.serialization.len();
1057-
self.serialization.push('/');
1086+
// Otherwise, if c is not the EOF code point:
10581087
self.parse_path(scheme_type, has_host, path_start, input)
10591088
}
10601089

@@ -1066,7 +1095,6 @@ impl<'a> Parser<'a> {
10661095
mut input: Input<'i>,
10671096
) -> Input<'i> {
10681097
// Relative path state
1069-
debug_assert!(self.serialization.ends_with('/'));
10701098
loop {
10711099
let segment_start = self.serialization.len();
10721100
let mut ends_with_slash = false;
@@ -1079,13 +1107,15 @@ impl<'a> Parser<'a> {
10791107
};
10801108
match c {
10811109
'/' if self.context != Context::PathSegmentSetter => {
1110+
self.serialization.push(c);
10821111
ends_with_slash = true;
10831112
break;
10841113
}
10851114
'\\' if self.context != Context::PathSegmentSetter
10861115
&& scheme_type.is_special() =>
10871116
{
10881117
self.log_violation(SyntaxViolation::Backslash);
1118+
self.serialization.push('/');
10891119
ends_with_slash = true;
10901120
break;
10911121
}
@@ -1109,18 +1139,31 @@ impl<'a> Parser<'a> {
11091139
}
11101140
}
11111141
}
1112-
match &self.serialization[segment_start..] {
1142+
1143+
let to_match = if ends_with_slash {
1144+
&self.serialization[segment_start..self.serialization.len() - 1]
1145+
} else {
1146+
&self.serialization[segment_start..self.serialization.len()]
1147+
};
1148+
match to_match {
1149+
// If buffer is a double-dot path segment, shorten url’s path,
11131150
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
11141151
| ".%2E" => {
11151152
debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1116-
self.serialization.truncate(segment_start - 1); // Truncate "/.."
1153+
self.serialization.truncate(segment_start - 1); // Truncate "/../"
11171154
self.pop_path(scheme_type, path_start);
1118-
if !self.serialization[path_start..].ends_with('/') {
1119-
self.serialization.push('/')
1155+
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1156+
if ends_with_slash && !self.serialization.ends_with("/") {
1157+
self.serialization.push('/');
11201158
}
11211159
}
1160+
// Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/),
1161+
// nor url is special and c is U+005C (\), append the empty string to url’s path.
11221162
"." | "%2e" | "%2E" => {
11231163
self.serialization.truncate(segment_start);
1164+
if ends_with_slash && !self.serialization.ends_with("/") {
1165+
self.serialization.push('/');
1166+
}
11241167
}
11251168
_ => {
11261169
if scheme_type.is_file()
@@ -1135,9 +1178,6 @@ impl<'a> Parser<'a> {
11351178
*has_host = false; // FIXME account for this in callers
11361179
}
11371180
}
1138-
if ends_with_slash {
1139-
self.serialization.push('/')
1140-
}
11411181
}
11421182
}
11431183
if !ends_with_slash {
@@ -1318,6 +1358,17 @@ impl<'a> Parser<'a> {
13181358
}
13191359
}
13201360

1361+
// Trim path start forward slashes when no authority is present
1362+
// https://github.com/whatwg/url/issues/232
1363+
fn trim_path(serialization: &mut String, path_start: usize) {
1364+
let path = serialization.split_off(path_start);
1365+
if path.starts_with("/") {
1366+
let mut trimmed_path = "/".to_string();
1367+
trimmed_path.push_str(path.trim_start_matches("/"));
1368+
serialization.push_str(&trimmed_path);
1369+
}
1370+
}
1371+
13211372
#[inline]
13221373
fn is_ascii_hex_digit(c: char) -> bool {
13231374
matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')

src/quirks.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,6 @@ pub fn host(url: &Url) -> &str {
9999

100100
/// Setter for https://url.spec.whatwg.org/#dom-url-host
101101
pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
102-
if url.cannot_be_a_base() {
103-
return Err(());
104-
}
105102
let host;
106103
let opt_port;
107104
{
@@ -186,8 +183,18 @@ pub fn pathname(url: &Url) -> &str {
186183

187184
/// Setter for https://url.spec.whatwg.org/#dom-url-pathname
188185
pub fn set_pathname(url: &mut Url, new_pathname: &str) {
189-
if !url.cannot_be_a_base() {
190-
url.set_path(new_pathname)
186+
if !url.cannot_be_a_base() && !new_pathname.is_empty() {
187+
if !SchemeType::from(url.scheme()).is_special()
188+
|| Some('/') == new_pathname.chars().nth(0)
189+
// \\ is a segment delimiter for 'special' URLs"
190+
|| Some('\\') == new_pathname.chars().nth(0)
191+
{
192+
url.set_path(new_pathname)
193+
} else {
194+
let mut path_to_set = String::from("/");
195+
path_to_set.push_str(new_pathname);
196+
url.set_path(&path_to_set)
197+
}
191198
}
192199
}
193200

0 commit comments

Comments
 (0)