@@ -515,6 +515,8 @@ impl<'a> Parser<'a> {
515
515
self . serialization . push ( '/' ) ;
516
516
self . parse_path ( SchemeType :: File , & mut has_host, path_start, remaining)
517
517
} ;
518
+ // TODO: Handle authority
519
+ trim_path ( & mut self . serialization , host_end as usize ) ;
518
520
// For file URLs that have a host and whose path starts
519
521
// with the windows drive letter we just remove the host.
520
522
if !has_host {
@@ -556,16 +558,28 @@ impl<'a> Parser<'a> {
556
558
}
557
559
}
558
560
}
559
- self . serialization . push ( '/' ) ;
560
- let remaining = self . parse_path (
561
- SchemeType :: File ,
562
- & mut false ,
563
- host_end,
564
- input_after_first_char,
565
- ) ;
561
+ // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
562
+ let parse_path_input = if let Some ( c) = first_char {
563
+ if c == '/' || c == '\\' || c == '?' || c == '#' {
564
+ input
565
+ } else {
566
+ input_after_first_char
567
+ }
568
+ } else {
569
+ input_after_first_char
570
+ } ;
571
+
572
+ let remaining =
573
+ self . parse_path ( SchemeType :: File , & mut false , host_end, parse_path_input) ;
574
+
575
+ let host_start = host_start as u32 ;
576
+
577
+ // TODO: Handle authority
578
+ trim_path ( & mut self . serialization , host_end) ;
579
+
566
580
let ( query_start, fragment_start) =
567
581
self . parse_query_and_fragment ( scheme_type, scheme_end, remaining) ?;
568
- let host_start = host_start as u32 ;
582
+
569
583
let host_end = host_end as u32 ;
570
584
return Ok ( Url {
571
585
serialization : self . serialization ,
@@ -1043,21 +1057,24 @@ impl<'a> Parser<'a> {
1043
1057
input : Input < ' i > ,
1044
1058
) -> Input < ' i > {
1045
1059
let path_start = self . serialization . len ( ) ;
1046
- let ( maybe_c, _ ) = input. split_first ( ) ;
1060
+ let ( maybe_c, remaining ) = input. split_first ( ) ;
1047
1061
// If url is special, then:
1048
1062
if scheme_type. is_special ( ) {
1049
- // A special URL always has a non-empty path.
1050
- if maybe_c != Some ( '/' ) {
1051
- self . serialization . push ( '/' ) ;
1052
- }
1053
1063
if let Some ( c) = maybe_c {
1054
1064
if c == '\\' {
1055
1065
// If c is U+005C (\), validation error.
1056
1066
self . log_violation ( SyntaxViolation :: Backslash ) ;
1057
1067
}
1058
- // Set state to path state.
1059
- return self . parse_path ( scheme_type, has_host, path_start, input) ;
1060
1068
}
1069
+ // A special URL always has a non-empty path.
1070
+ if !self . serialization . ends_with ( "/" ) {
1071
+ self . serialization . push ( '/' ) ;
1072
+ // We have already made sure the forward slash is present.
1073
+ if maybe_c == Some ( '/' ) || maybe_c == Some ( '\\' ) {
1074
+ return self . parse_path ( scheme_type, has_host, path_start, remaining) ;
1075
+ }
1076
+ }
1077
+ return self . parse_path ( scheme_type, has_host, path_start, input) ;
1061
1078
} else if maybe_c == Some ( '?' ) || maybe_c == Some ( '#' ) {
1062
1079
// Otherwise, if state override is not given and c is U+003F (?),
1063
1080
// set url’s query to the empty string and state to query state.
@@ -1070,67 +1087,6 @@ impl<'a> Parser<'a> {
1070
1087
self . parse_path ( scheme_type, has_host, path_start, input)
1071
1088
}
1072
1089
1073
- pub fn parse_query_2 < ' i > (
1074
- & mut self ,
1075
- scheme_type : SchemeType ,
1076
- mut input : Input < ' i > ,
1077
- ) -> Input < ' i > {
1078
- let mut query = String :: new ( ) ; // FIXME: use a streaming decoder instead
1079
-
1080
- while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1081
- match c {
1082
- // If state override is not given and c is U+0023 (#),
1083
- // then set url’s fragment to the empty string and state to fragment state.
1084
- '#' => return self . parse_fragment_2 ( input) ,
1085
- c => {
1086
- // If c is not a URL code point and not U+0025 (%), validation error.
1087
- // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1088
- self . check_url_code_point ( c, & input) ;
1089
- query. push ( c) ;
1090
- }
1091
- }
1092
- }
1093
-
1094
- // If encoding is not UTF-8 and one of the following is true
1095
- // url is not special
1096
- // url’s scheme is "ws" or "wss"
1097
- let encoding = if !scheme_type. is_special ( )
1098
- || self . serialization . starts_with ( "ws" )
1099
- || self . serialization . starts_with ( "wss" )
1100
- {
1101
- self . query_encoding_override
1102
- } else {
1103
- None
1104
- } ;
1105
- let query_bytes = :: query_encoding:: encode ( encoding, & query) ;
1106
- let set = if scheme_type. is_special ( ) {
1107
- SPECIAL_QUERY
1108
- } else {
1109
- QUERY
1110
- } ;
1111
- self . serialization . extend ( percent_encode ( & query_bytes, set) ) ;
1112
- input
1113
- }
1114
-
1115
- pub fn parse_fragment_2 < ' i > ( & mut self , mut input : Input < ' i > ) -> Input < ' i > {
1116
- while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1117
- match c {
1118
- // U+0000 NULL: Validation error.
1119
- '\0' => self . log_violation ( SyntaxViolation :: NullInFragment ) ,
1120
- c => {
1121
- // If c is not a URL code point and not U+0025 (%), validation error.
1122
- // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1123
- self . check_url_code_point ( c, & input) ;
1124
- // UTF-8 percent encode c using the fragment percent-encode set
1125
- // and append the result to url’s fragment.
1126
- self . serialization
1127
- . extend ( utf8_percent_encode ( & c. to_string ( ) , FRAGMENT ) ) ;
1128
- }
1129
- }
1130
- }
1131
- input
1132
- }
1133
-
1134
1090
pub fn parse_path < ' i > (
1135
1091
& mut self ,
1136
1092
scheme_type : SchemeType ,
@@ -1191,12 +1147,12 @@ impl<'a> Parser<'a> {
1191
1147
} ;
1192
1148
match to_match {
1193
1149
// If buffer is a double-dot path segment, shorten url’s path,
1194
- // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1195
1150
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
1196
1151
| ".%2E" => {
1197
1152
debug_assert ! ( self . serialization. as_bytes( ) [ segment_start - 1 ] == b'/' ) ;
1198
- self . serialization . truncate ( segment_start - 1 ) ; // Truncate "/.."
1153
+ self . serialization . truncate ( segment_start - 1 ) ; // Truncate "/../ "
1199
1154
self . pop_path ( scheme_type, path_start) ;
1155
+ // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1200
1156
if ends_with_slash && !self . serialization . ends_with ( "/" ) {
1201
1157
self . serialization . push ( '/' ) ;
1202
1158
}
@@ -1211,6 +1167,7 @@ impl<'a> Parser<'a> {
1211
1167
}
1212
1168
_ => {
1213
1169
if scheme_type. is_file ( )
1170
+ //&& path_start + 1 < self.serialization.len()
1214
1171
&& is_windows_drive_letter ( & self . serialization [ path_start + 1 ..] )
1215
1172
{
1216
1173
if self . serialization . ends_with ( '|' ) {
@@ -1402,6 +1359,17 @@ impl<'a> Parser<'a> {
1402
1359
}
1403
1360
}
1404
1361
1362
+ // Trim path start forward slashes when no authority is present
1363
+ // https://github.com/whatwg/url/issues/232
1364
+ fn trim_path ( serialization : & mut String , path_start : usize ) {
1365
+ let path = serialization. split_off ( path_start) ;
1366
+ if path. starts_with ( "/" ) {
1367
+ let mut trimmed_path = "/" . to_string ( ) ;
1368
+ trimmed_path. push_str ( path. trim_start_matches ( "/" ) ) ;
1369
+ serialization. push_str ( & trimmed_path) ;
1370
+ }
1371
+ }
1372
+
1405
1373
#[ inline]
1406
1374
fn is_ascii_hex_digit ( c : char ) -> bool {
1407
1375
matches ! ( c, 'a' ..='f' | 'A' ..='F' | '0' ..='9' )
0 commit comments