@@ -497,6 +497,8 @@ impl<'a> Parser<'a> {
497
497
self . serialization . push ( '/' ) ;
498
498
self . parse_path ( SchemeType :: File , & mut has_host, path_start, remaining)
499
499
} ;
500
+ // TODO: Handle authority
501
+ trim_path ( & mut self . serialization , host_end as usize ) ;
500
502
// For file URLs that have a host and whose path starts
501
503
// with the windows drive letter we just remove the host.
502
504
if !has_host {
@@ -538,16 +540,28 @@ impl<'a> Parser<'a> {
538
540
}
539
541
}
540
542
}
541
- self . serialization . push ( '/' ) ;
542
- let remaining = self . parse_path (
543
- SchemeType :: File ,
544
- & mut false ,
545
- host_end,
546
- input_after_first_char,
547
- ) ;
543
+ // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
544
+ let parse_path_input = if let Some ( c) = first_char {
545
+ if c == '/' || c == '\\' || c == '?' || c == '#' {
546
+ input
547
+ } else {
548
+ input_after_first_char
549
+ }
550
+ } else {
551
+ input_after_first_char
552
+ } ;
553
+
554
+ let remaining =
555
+ self . parse_path ( SchemeType :: File , & mut false , host_end, parse_path_input) ;
556
+
557
+ let host_start = host_start as u32 ;
558
+
559
+ // TODO: Handle authority
560
+ trim_path ( & mut self . serialization , host_end) ;
561
+
548
562
let ( query_start, fragment_start) =
549
563
self . parse_query_and_fragment ( scheme_type, scheme_end, remaining) ?;
550
- let host_start = host_start as u32 ;
564
+
551
565
let host_end = host_end as u32 ;
552
566
return Ok ( Url {
553
567
serialization : self . serialization ,
@@ -1025,21 +1039,24 @@ impl<'a> Parser<'a> {
1025
1039
input : Input < ' i > ,
1026
1040
) -> Input < ' i > {
1027
1041
let path_start = self . serialization . len ( ) ;
1028
- let ( maybe_c, _ ) = input. split_first ( ) ;
1042
+ let ( maybe_c, remaining ) = input. split_first ( ) ;
1029
1043
// If url is special, then:
1030
1044
if scheme_type. is_special ( ) {
1031
- // A special URL always has a non-empty path.
1032
- if maybe_c != Some ( '/' ) {
1033
- self . serialization . push ( '/' ) ;
1034
- }
1035
1045
if let Some ( c) = maybe_c {
1036
1046
if c == '\\' {
1037
1047
// If c is U+005C (\), validation error.
1038
1048
self . log_violation ( SyntaxViolation :: Backslash ) ;
1039
1049
}
1040
- // Set state to path state.
1041
- return self . parse_path ( scheme_type, has_host, path_start, input) ;
1042
1050
}
1051
+ // A special URL always has a non-empty path.
1052
+ if !self . serialization . ends_with ( "/" ) {
1053
+ self . serialization . push ( '/' ) ;
1054
+ // We have already made sure the forward slash is present.
1055
+ if maybe_c == Some ( '/' ) || maybe_c == Some ( '\\' ) {
1056
+ return self . parse_path ( scheme_type, has_host, path_start, remaining) ;
1057
+ }
1058
+ }
1059
+ return self . parse_path ( scheme_type, has_host, path_start, input) ;
1043
1060
} else if maybe_c == Some ( '?' ) || maybe_c == Some ( '#' ) {
1044
1061
// Otherwise, if state override is not given and c is U+003F (?),
1045
1062
// set url’s query to the empty string and state to query state.
@@ -1052,67 +1069,6 @@ impl<'a> Parser<'a> {
1052
1069
self . parse_path ( scheme_type, has_host, path_start, input)
1053
1070
}
1054
1071
1055
- pub fn parse_query_2 < ' i > (
1056
- & mut self ,
1057
- scheme_type : SchemeType ,
1058
- mut input : Input < ' i > ,
1059
- ) -> Input < ' i > {
1060
- let mut query = String :: new ( ) ; // FIXME: use a streaming decoder instead
1061
-
1062
- while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1063
- match c {
1064
- // If state override is not given and c is U+0023 (#),
1065
- // then set url’s fragment to the empty string and state to fragment state.
1066
- '#' => return self . parse_fragment_2 ( input) ,
1067
- c => {
1068
- // If c is not a URL code point and not U+0025 (%), validation error.
1069
- // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1070
- self . check_url_code_point ( c, & input) ;
1071
- query. push ( c) ;
1072
- }
1073
- }
1074
- }
1075
-
1076
- // If encoding is not UTF-8 and one of the following is true
1077
- // url is not special
1078
- // url’s scheme is "ws" or "wss"
1079
- let encoding = if !scheme_type. is_special ( )
1080
- || self . serialization . starts_with ( "ws" )
1081
- || self . serialization . starts_with ( "wss" )
1082
- {
1083
- self . query_encoding_override
1084
- } else {
1085
- None
1086
- } ;
1087
- let query_bytes = :: query_encoding:: encode ( encoding, & query) ;
1088
- let set = if scheme_type. is_special ( ) {
1089
- SPECIAL_QUERY
1090
- } else {
1091
- QUERY
1092
- } ;
1093
- self . serialization . extend ( percent_encode ( & query_bytes, set) ) ;
1094
- input
1095
- }
1096
-
1097
- pub fn parse_fragment_2 < ' i > ( & mut self , mut input : Input < ' i > ) -> Input < ' i > {
1098
- while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1099
- match c {
1100
- // U+0000 NULL: Validation error.
1101
- '\0' => self . log_violation ( SyntaxViolation :: NullInFragment ) ,
1102
- c => {
1103
- // If c is not a URL code point and not U+0025 (%), validation error.
1104
- // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1105
- self . check_url_code_point ( c, & input) ;
1106
- // UTF-8 percent encode c using the fragment percent-encode set
1107
- // and append the result to url’s fragment.
1108
- self . serialization
1109
- . extend ( utf8_percent_encode ( & c. to_string ( ) , FRAGMENT ) ) ;
1110
- }
1111
- }
1112
- }
1113
- input
1114
- }
1115
-
1116
1072
pub fn parse_path < ' i > (
1117
1073
& mut self ,
1118
1074
scheme_type : SchemeType ,
@@ -1173,12 +1129,12 @@ impl<'a> Parser<'a> {
1173
1129
} ;
1174
1130
match to_match {
1175
1131
// If buffer is a double-dot path segment, shorten url’s path,
1176
- // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1177
1132
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
1178
1133
| ".%2E" => {
1179
1134
debug_assert ! ( self . serialization. as_bytes( ) [ segment_start - 1 ] == b'/' ) ;
1180
- self . serialization . truncate ( segment_start - 1 ) ; // Truncate "/.."
1135
+ self . serialization . truncate ( segment_start - 1 ) ; // Truncate "/../ "
1181
1136
self . pop_path ( scheme_type, path_start) ;
1137
+ // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1182
1138
if ends_with_slash && !self . serialization . ends_with ( "/" ) {
1183
1139
self . serialization . push ( '/' ) ;
1184
1140
}
@@ -1193,6 +1149,7 @@ impl<'a> Parser<'a> {
1193
1149
}
1194
1150
_ => {
1195
1151
if scheme_type. is_file ( )
1152
+ //&& path_start + 1 < self.serialization.len()
1196
1153
&& is_windows_drive_letter ( & self . serialization [ path_start + 1 ..] )
1197
1154
{
1198
1155
if self . serialization . ends_with ( '|' ) {
@@ -1384,6 +1341,17 @@ impl<'a> Parser<'a> {
1384
1341
}
1385
1342
}
1386
1343
1344
+ // Trim path start forward slashes when no authority is present
1345
+ // https://github.com/whatwg/url/issues/232
1346
+ fn trim_path ( serialization : & mut String , path_start : usize ) {
1347
+ let path = serialization. split_off ( path_start) ;
1348
+ if path. starts_with ( "/" ) {
1349
+ let mut trimmed_path = "/" . to_string ( ) ;
1350
+ trimmed_path. push_str ( path. trim_start_matches ( "/" ) ) ;
1351
+ serialization. push_str ( & trimmed_path) ;
1352
+ }
1353
+ }
1354
+
1387
1355
#[ inline]
1388
1356
fn is_ascii_hex_digit ( c : char ) -> bool {
1389
1357
matches ! ( c, 'a' ..='f' | 'A' ..='F' | '0' ..='9' )
0 commit comments