Skip to content

Commit 27225f1

Browse files
committed
tryouts to rewrite parse path
1 parent 08f0d48 commit 27225f1

File tree

3 files changed

+106
-15
lines changed

3 files changed

+106
-15
lines changed

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ impl Url {
489489
Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
490490
);
491491
}
492-
assert_eq!(self.byte_at(self.path_start), b'/');
492+
//assert!(self.path_start as usize == self.serialization.len() || self.byte_at(self.path_start) == b'/');
493493
} else {
494494
// Anarchist URL (no authority)
495495
assert_eq!(self.username_end, self.scheme_end + 1);

src/parser.rs

Lines changed: 96 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,20 +1024,100 @@ impl<'a> Parser<'a> {
10241024
has_host: &mut bool,
10251025
mut input: Input<'i>,
10261026
) -> Input<'i> {
1027-
// Path start state
1028-
match input.split_first() {
1029-
(Some('/'), remaining) => input = remaining,
1030-
(Some('\\'), remaining) => {
1031-
if scheme_type.is_special() {
1032-
self.log_violation(SyntaxViolation::Backslash);
1033-
input = remaining
1027+
let path_start = self.serialization.len();
1028+
let (maybe_c, remaining) = input.split_first();
1029+
// If url is special, then:
1030+
if scheme_type.is_special() {
1031+
// If c is U+005C (\), validation error.
1032+
if maybe_c == Some('\\') {
1033+
self.log_violation(SyntaxViolation::Backslash);
1034+
}
1035+
// If c is neither U+002F (/) nor U+005C (\), then decrease pointer by one.
1036+
if maybe_c == Some('/') || maybe_c == Some('\\') {
1037+
input = remaining;
1038+
}
1039+
// Set state to path state.
1040+
return self.parse_path(scheme_type, has_host, path_start, input);
1041+
} else if maybe_c == Some('?') {
1042+
// Otherwise, if state override is not given and c is U+003F (?),
1043+
// set url’s query to the empty string and state to query state.
1044+
return self.parse_query_2(scheme_type, remaining);
1045+
} else if maybe_c == Some('#') {
1046+
// Otherwise, if state override is not given and c is U+0023 (#),
1047+
// set url’s fragment to the empty string and state to fragment state.
1048+
return self.parse_fragment_2(remaining);
1049+
}
1050+
// Otherwise, if c is not the EOF code point:
1051+
if !remaining.is_empty() {
1052+
if maybe_c == Some('/') {
1053+
return self.parse_path(scheme_type, has_host, path_start, input);
1054+
} else {
1055+
// If c is not U+002F (/), then decrease pointer by one.
1056+
return self.parse_path(scheme_type, has_host, path_start, remaining);
1057+
}
1058+
}
1059+
input
1060+
}
1061+
1062+
pub fn parse_query_2<'i>(
1063+
&mut self,
1064+
scheme_type: SchemeType,
1065+
mut input: Input<'i>,
1066+
) -> Input<'i> {
1067+
let mut query = String::new(); // FIXME: use a streaming decoder instead
1068+
1069+
while let Some((c, _)) = input.next_utf8() {
1070+
match c {
1071+
// If state override is not given and c is U+0023 (#),
1072+
// then set url’s fragment to the empty string and state to fragment state.
1073+
'#' => return self.parse_fragment_2(input),
1074+
c => {
1075+
// If c is not a URL code point and not U+0025 (%), validation error.
1076+
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1077+
self.check_url_code_point(c, &input);
1078+
query.push(c);
10341079
}
10351080
}
1036-
_ => {}
10371081
}
1038-
let path_start = self.serialization.len();
1039-
self.serialization.push('/');
1040-
self.parse_path(scheme_type, has_host, path_start, input)
1082+
1083+
// If encoding is not UTF-8 and one of the following is true
1084+
// url is not special
1085+
// url’s scheme is "ws" or "wss"
1086+
let encoding = if !scheme_type.is_special()
1087+
|| self.serialization.starts_with("ws")
1088+
|| self.serialization.starts_with("wss")
1089+
{
1090+
self.query_encoding_override
1091+
} else {
1092+
None
1093+
};
1094+
let query_bytes = ::query_encoding::encode(encoding, &query);
1095+
let set = if scheme_type.is_special() {
1096+
SPECIAL_QUERY
1097+
} else {
1098+
QUERY
1099+
};
1100+
self.serialization.extend(percent_encode(&query_bytes, set));
1101+
input
1102+
}
1103+
1104+
pub fn parse_fragment_2<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1105+
while let Some((c, _)) = input.next_utf8() {
1106+
match c {
1107+
// U+0000 NULL: Validation error.
1108+
'\0' => self.log_violation(SyntaxViolation::NullInFragment),
1109+
c => {
1110+
// If c is not a URL code point and not U+0025 (%), validation error.
1111+
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1112+
self.check_url_code_point(c, &input);
1113+
// UTF-8 percent encode c using the fragment percent-encode set
1114+
// and append the result to url’s fragment.
1115+
self.serialization
1116+
.extend(utf8_percent_encode(&c.to_string(), FRAGMENT));
1117+
}
1118+
}
1119+
}
1120+
input
10411121
}
10421122

10431123
pub fn parse_path<'i>(
@@ -1047,8 +1127,10 @@ impl<'a> Parser<'a> {
10471127
path_start: usize,
10481128
mut input: Input<'i>,
10491129
) -> Input<'i> {
1130+
if !self.serialization.ends_with('/') && scheme_type.is_special() && !input.is_empty() {
1131+
self.serialization.push('/');
1132+
}
10501133
// Relative path state
1051-
debug_assert!(self.serialization.ends_with('/'));
10521134
loop {
10531135
let segment_start = self.serialization.len();
10541136
let mut ends_with_slash = false;
@@ -1061,13 +1143,15 @@ impl<'a> Parser<'a> {
10611143
};
10621144
match c {
10631145
'/' if self.context != Context::PathSegmentSetter => {
1146+
self.serialization.push(c);
10641147
ends_with_slash = true;
10651148
break;
10661149
}
10671150
'\\' if self.context != Context::PathSegmentSetter
10681151
&& scheme_type.is_special() =>
10691152
{
10701153
self.log_violation(SyntaxViolation::Backslash);
1154+
self.serialization.push(c);
10711155
ends_with_slash = true;
10721156
break;
10731157
}

src/quirks.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,15 @@ pub fn pathname(url: &Url) -> &str {
186186

187187
/// Setter for https://url.spec.whatwg.org/#dom-url-pathname
188188
pub fn set_pathname(url: &mut Url, new_pathname: &str) {
189-
if !url.cannot_be_a_base() {
190-
url.set_path(new_pathname)
189+
if !url.cannot_be_a_base() && !new_pathname.is_empty() {
190+
if !SchemeType::from(url.scheme()).is_special() || Some('/') == new_pathname.chars().nth(0)
191+
{
192+
url.set_path(new_pathname)
193+
} else {
194+
let mut path_to_set = String::from("/");
195+
path_to_set.push_str(new_pathname);
196+
url.set_path(&path_to_set)
197+
}
191198
}
192199
}
193200

0 commit comments

Comments
 (0)