@@ -1024,20 +1024,100 @@ impl<'a> Parser<'a> {
1024
1024
has_host : & mut bool ,
1025
1025
mut input : Input < ' i > ,
1026
1026
) -> Input < ' i > {
1027
- // Path start state
1028
- match input. split_first ( ) {
1029
- ( Some ( '/' ) , remaining) => input = remaining,
1030
- ( Some ( '\\' ) , remaining) => {
1031
- if scheme_type. is_special ( ) {
1032
- self . log_violation ( SyntaxViolation :: Backslash ) ;
1033
- input = remaining
1027
+ let path_start = self . serialization . len ( ) ;
1028
+ let ( maybe_c, remaining) = input. split_first ( ) ;
1029
+ // If url is special, then:
1030
+ if scheme_type. is_special ( ) {
1031
+ // If c is U+005C (\), validation error.
1032
+ if maybe_c == Some ( '\\' ) {
1033
+ self . log_violation ( SyntaxViolation :: Backslash ) ;
1034
+ }
1035
+ // If c is neither U+002F (/) nor U+005C (\), then decrease pointer by one.
1036
+ if maybe_c == Some ( '/' ) || maybe_c == Some ( '\\' ) {
1037
+ input = remaining;
1038
+ }
1039
+ // Set state to path state.
1040
+ return self . parse_path ( scheme_type, has_host, path_start, input) ;
1041
+ } else if maybe_c == Some ( '?' ) {
1042
+ // Otherwise, if state override is not given and c is U+003F (?),
1043
+ // set url’s query to the empty string and state to query state.
1044
+ return self . parse_query_2 ( scheme_type, remaining) ;
1045
+ } else if maybe_c == Some ( '#' ) {
1046
+ // Otherwise, if state override is not given and c is U+0023 (#),
1047
+ // set url’s fragment to the empty string and state to fragment state.
1048
+ return self . parse_fragment_2 ( remaining) ;
1049
+ }
1050
+ // Otherwise, if c is not the EOF code point:
1051
+ if !remaining. is_empty ( ) {
1052
+ if maybe_c == Some ( '/' ) {
1053
+ return self . parse_path ( scheme_type, has_host, path_start, input) ;
1054
+ } else {
1055
+ // If c is not U+002F (/), then decrease pointer by one.
1056
+ return self . parse_path ( scheme_type, has_host, path_start, remaining) ;
1057
+ }
1058
+ }
1059
+ input
1060
+ }
1061
+
1062
+ pub fn parse_query_2 < ' i > (
1063
+ & mut self ,
1064
+ scheme_type : SchemeType ,
1065
+ mut input : Input < ' i > ,
1066
+ ) -> Input < ' i > {
1067
+ let mut query = String :: new ( ) ; // FIXME: use a streaming decoder instead
1068
+
1069
+ while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1070
+ match c {
1071
+ // If state override is not given and c is U+0023 (#),
1072
+ // then set url’s fragment to the empty string and state to fragment state.
1073
+ '#' => return self . parse_fragment_2 ( input) ,
1074
+ c => {
1075
+ // If c is not a URL code point and not U+0025 (%), validation error.
1076
+ // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1077
+ self . check_url_code_point ( c, & input) ;
1078
+ query. push ( c) ;
1034
1079
}
1035
1080
}
1036
- _ => { }
1037
1081
}
1038
- let path_start = self . serialization . len ( ) ;
1039
- self . serialization . push ( '/' ) ;
1040
- self . parse_path ( scheme_type, has_host, path_start, input)
1082
+
1083
+ // If encoding is not UTF-8 and one of the following is true
1084
+ // url is not special
1085
+ // url’s scheme is "ws" or "wss"
1086
+ let encoding = if !scheme_type. is_special ( )
1087
+ || self . serialization . starts_with ( "ws" )
1088
+ || self . serialization . starts_with ( "wss" )
1089
+ {
1090
+ self . query_encoding_override
1091
+ } else {
1092
+ None
1093
+ } ;
1094
+ let query_bytes = :: query_encoding:: encode ( encoding, & query) ;
1095
+ let set = if scheme_type. is_special ( ) {
1096
+ SPECIAL_QUERY
1097
+ } else {
1098
+ QUERY
1099
+ } ;
1100
+ self . serialization . extend ( percent_encode ( & query_bytes, set) ) ;
1101
+ input
1102
+ }
1103
+
1104
+ pub fn parse_fragment_2 < ' i > ( & mut self , mut input : Input < ' i > ) -> Input < ' i > {
1105
+ while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1106
+ match c {
1107
+ // U+0000 NULL: Validation error.
1108
+ '\0' => self . log_violation ( SyntaxViolation :: NullInFragment ) ,
1109
+ c => {
1110
+ // If c is not a URL code point and not U+0025 (%), validation error.
1111
+ // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1112
+ self . check_url_code_point ( c, & input) ;
1113
+ // UTF-8 percent encode c using the fragment percent-encode set
1114
+ // and append the result to url’s fragment.
1115
+ self . serialization
1116
+ . extend ( utf8_percent_encode ( & c. to_string ( ) , FRAGMENT ) ) ;
1117
+ }
1118
+ }
1119
+ }
1120
+ input
1041
1121
}
1042
1122
1043
1123
pub fn parse_path < ' i > (
@@ -1047,8 +1127,10 @@ impl<'a> Parser<'a> {
1047
1127
path_start : usize ,
1048
1128
mut input : Input < ' i > ,
1049
1129
) -> Input < ' i > {
1130
+ if !self . serialization . ends_with ( '/' ) && scheme_type. is_special ( ) && !input. is_empty ( ) {
1131
+ self . serialization . push ( '/' ) ;
1132
+ }
1050
1133
// Relative path state
1051
- debug_assert ! ( self . serialization. ends_with( '/' ) ) ;
1052
1134
loop {
1053
1135
let segment_start = self . serialization . len ( ) ;
1054
1136
let mut ends_with_slash = false ;
@@ -1061,13 +1143,15 @@ impl<'a> Parser<'a> {
1061
1143
} ;
1062
1144
match c {
1063
1145
'/' if self . context != Context :: PathSegmentSetter => {
1146
+ self . serialization . push ( c) ;
1064
1147
ends_with_slash = true ;
1065
1148
break ;
1066
1149
}
1067
1150
'\\' if self . context != Context :: PathSegmentSetter
1068
1151
&& scheme_type. is_special ( ) =>
1069
1152
{
1070
1153
self . log_violation ( SyntaxViolation :: Backslash ) ;
1154
+ self . serialization . push ( c) ;
1071
1155
ends_with_slash = true ;
1072
1156
break ;
1073
1157
}
0 commit comments