@@ -515,6 +515,8 @@ impl<'a> Parser<'a> {
515
515
self . serialization . push ( '/' ) ;
516
516
self . parse_path ( SchemeType :: File , & mut has_host, path_start, remaining)
517
517
} ;
518
+ // TODO: Handle authority
519
+ trim_path ( & mut self . serialization , host_end as usize ) ;
518
520
// For file URLs that have a host and whose path starts
519
521
// with the windows drive letter we just remove the host.
520
522
if !has_host {
@@ -556,16 +558,28 @@ impl<'a> Parser<'a> {
556
558
}
557
559
}
558
560
}
559
- self . serialization . push ( '/' ) ;
560
- let remaining = self . parse_path (
561
- SchemeType :: File ,
562
- & mut false ,
563
- host_end,
564
- input_after_first_char,
565
- ) ;
561
+ // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
562
+ let parse_path_input = if let Some ( c) = first_char {
563
+ if c == '/' || c == '\\' || c == '?' || c == '#' {
564
+ input
565
+ } else {
566
+ input_after_first_char
567
+ }
568
+ } else {
569
+ input_after_first_char
570
+ } ;
571
+
572
+ let remaining =
573
+ self . parse_path ( SchemeType :: File , & mut false , host_end, parse_path_input) ;
574
+
575
+ let host_start = host_start as u32 ;
576
+
577
+ // TODO: Handle authority
578
+ trim_path ( & mut self . serialization , host_end) ;
579
+
566
580
let ( query_start, fragment_start) =
567
581
self . parse_query_and_fragment ( scheme_type, scheme_end, remaining) ?;
568
- let host_start = host_start as u32 ;
582
+
569
583
let host_end = host_end as u32 ;
570
584
return Ok ( Url {
571
585
serialization : self . serialization ,
@@ -1040,21 +1054,36 @@ impl<'a> Parser<'a> {
1040
1054
& mut self ,
1041
1055
scheme_type : SchemeType ,
1042
1056
has_host : & mut bool ,
1043
- mut input : Input < ' i > ,
1057
+ input : Input < ' i > ,
1044
1058
) -> Input < ' i > {
1045
- // Path start state
1046
- match input. split_first ( ) {
1047
- ( Some ( '/' ) , remaining) => input = remaining,
1048
- ( Some ( '\\' ) , remaining) => {
1049
- if scheme_type. is_special ( ) {
1059
+ let path_start = self . serialization . len ( ) ;
1060
+ let ( maybe_c, remaining) = input. split_first ( ) ;
1061
+ // If url is special, then:
1062
+ if scheme_type. is_special ( ) {
1063
+ if let Some ( c) = maybe_c {
1064
+ if c == '\\' {
1065
+ // If c is U+005C (\), validation error.
1050
1066
self . log_violation ( SyntaxViolation :: Backslash ) ;
1051
- input = remaining
1052
1067
}
1053
1068
}
1054
- _ => { }
1069
+ // A special URL always has a non-empty path.
1070
+ if !self . serialization . ends_with ( "/" ) {
1071
+ self . serialization . push ( '/' ) ;
1072
+ // We have already made sure the forward slash is present.
1073
+ if maybe_c == Some ( '/' ) || maybe_c == Some ( '\\' ) {
1074
+ return self . parse_path ( scheme_type, has_host, path_start, remaining) ;
1075
+ }
1076
+ }
1077
+ return self . parse_path ( scheme_type, has_host, path_start, input) ;
1078
+ } else if maybe_c == Some ( '?' ) || maybe_c == Some ( '#' ) {
1079
+ // Otherwise, if state override is not given and c is U+003F (?),
1080
+ // set url’s query to the empty string and state to query state.
1081
+ // Otherwise, if state override is not given and c is U+0023 (#),
1082
+ // set url’s fragment to the empty string and state to fragment state.
1083
+ // The query and path states will be handled by the caller.
1084
+ return input;
1055
1085
}
1056
- let path_start = self . serialization . len ( ) ;
1057
- self . serialization . push ( '/' ) ;
1086
+ // Otherwise, if c is not the EOF code point:
1058
1087
self . parse_path ( scheme_type, has_host, path_start, input)
1059
1088
}
1060
1089
@@ -1066,7 +1095,6 @@ impl<'a> Parser<'a> {
1066
1095
mut input : Input < ' i > ,
1067
1096
) -> Input < ' i > {
1068
1097
// Relative path state
1069
- debug_assert ! ( self . serialization. ends_with( '/' ) ) ;
1070
1098
loop {
1071
1099
let segment_start = self . serialization . len ( ) ;
1072
1100
let mut ends_with_slash = false ;
@@ -1079,13 +1107,15 @@ impl<'a> Parser<'a> {
1079
1107
} ;
1080
1108
match c {
1081
1109
'/' if self . context != Context :: PathSegmentSetter => {
1110
+ self . serialization . push ( c) ;
1082
1111
ends_with_slash = true ;
1083
1112
break ;
1084
1113
}
1085
1114
'\\' if self . context != Context :: PathSegmentSetter
1086
1115
&& scheme_type. is_special ( ) =>
1087
1116
{
1088
1117
self . log_violation ( SyntaxViolation :: Backslash ) ;
1118
+ self . serialization . push ( '/' ) ;
1089
1119
ends_with_slash = true ;
1090
1120
break ;
1091
1121
}
@@ -1109,18 +1139,31 @@ impl<'a> Parser<'a> {
1109
1139
}
1110
1140
}
1111
1141
}
1112
- match & self . serialization [ segment_start..] {
1142
+
1143
+ let to_match = if ends_with_slash {
1144
+ & self . serialization [ segment_start..self . serialization . len ( ) - 1 ]
1145
+ } else {
1146
+ & self . serialization [ segment_start..self . serialization . len ( ) ]
1147
+ } ;
1148
+ match to_match {
1149
+ // If buffer is a double-dot path segment, shorten url’s path,
1113
1150
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
1114
1151
| ".%2E" => {
1115
1152
debug_assert ! ( self . serialization. as_bytes( ) [ segment_start - 1 ] == b'/' ) ;
1116
- self . serialization . truncate ( segment_start - 1 ) ; // Truncate "/.."
1153
+ self . serialization . truncate ( segment_start - 1 ) ; // Truncate "/../ "
1117
1154
self . pop_path ( scheme_type, path_start) ;
1118
- if !self . serialization [ path_start..] . ends_with ( '/' ) {
1119
- self . serialization . push ( '/' )
1155
+ // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1156
+ if ends_with_slash && !self . serialization . ends_with ( "/" ) {
1157
+ self . serialization . push ( '/' ) ;
1120
1158
}
1121
1159
}
1160
+ // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/),
1161
+ // nor url is special and c is U+005C (\), append the empty string to url’s path.
1122
1162
"." | "%2e" | "%2E" => {
1123
1163
self . serialization . truncate ( segment_start) ;
1164
+ if ends_with_slash && !self . serialization . ends_with ( "/" ) {
1165
+ self . serialization . push ( '/' ) ;
1166
+ }
1124
1167
}
1125
1168
_ => {
1126
1169
if scheme_type. is_file ( )
@@ -1135,9 +1178,6 @@ impl<'a> Parser<'a> {
1135
1178
* has_host = false ; // FIXME account for this in callers
1136
1179
}
1137
1180
}
1138
- if ends_with_slash {
1139
- self . serialization . push ( '/' )
1140
- }
1141
1181
}
1142
1182
}
1143
1183
if !ends_with_slash {
@@ -1318,6 +1358,17 @@ impl<'a> Parser<'a> {
1318
1358
}
1319
1359
}
1320
1360
1361
+ // Trim path start forward slashes when no authority is present
1362
+ // https://github.com/whatwg/url/issues/232
1363
+ fn trim_path ( serialization : & mut String , path_start : usize ) {
1364
+ let path = serialization. split_off ( path_start) ;
1365
+ if path. starts_with ( "/" ) {
1366
+ let mut trimmed_path = "/" . to_string ( ) ;
1367
+ trimmed_path. push_str ( path. trim_start_matches ( "/" ) ) ;
1368
+ serialization. push_str ( & trimmed_path) ;
1369
+ }
1370
+ }
1371
+
1321
1372
#[ inline]
1322
1373
fn is_ascii_hex_digit ( c : char ) -> bool {
1323
1374
matches ! ( c, 'a' ..='f' | 'A' ..='F' | '0' ..='9' )
0 commit comments