@@ -103,7 +103,7 @@ fn pretend_parse_data_url(input: &str) -> Option<&str> {
103
103
104
104
let mut bytes = left_trimmed. bytes ( ) ;
105
105
{
106
- // Ignore ASCII tabs or newlines
106
+ // Ignore ASCII tabs or newlines like the URL parser would
107
107
let mut iter = bytes. by_ref ( ) . filter ( |& byte| !matches ! ( byte, b'\t' | b'\n' | b'\r' ) ) ;
108
108
require ! ( iter. next( ) ?. to_ascii_lowercase( ) == b'd' ) ;
109
109
require ! ( iter. next( ) ?. to_ascii_lowercase( ) == b'a' ) ;
@@ -131,53 +131,81 @@ fn find_comma_before_fragment(after_colon: &str) -> Option<(&str, &str)> {
131
131
}
132
132
133
133
fn parse_header ( from_colon_to_comma : & str ) -> ( mime:: Mime , bool ) {
134
- let input = from_colon_to_comma. chars ( )
135
- . filter ( |& c| !matches ! ( c, '\t' | '\n' | '\r' ) ) // Removed by the URL parser
136
- . collect :: < String > ( ) ;
137
- let mut string;
138
-
139
- let input = input. trim_matches ( ' ' ) ;
140
-
141
- let ( mut input, base64) = match without_base64_suffix ( input) {
142
- Some ( s) => ( s, true ) ,
143
- None => ( input, false ) ,
144
- } ;
145
-
146
- // FIXME: percent-encode
134
+ // "Strip leading and trailing ASCII whitespace"
135
+ // \t, \n, and \r would have been filtered by the URL parser
136
+ // \f percent-encoded by the URL parser
137
+ // space is the only remaining ASCII whitespace
138
+ let trimmed = from_colon_to_comma. trim_matches ( |c| matches ! ( c, ' ' | '\t' | '\n' | '\r' ) ) ;
139
+
140
+ let without_base64_suffix = remove_base64_suffix ( trimmed) ;
141
+ let base64 = without_base64_suffix. is_some ( ) ;
142
+ let mime_type = without_base64_suffix. unwrap_or ( trimmed) ;
143
+
144
+ let mut string = String :: new ( ) ;
145
+ if mime_type. starts_with ( ';' ) {
146
+ string. push_str ( "text/plain" )
147
+ }
148
+ let mut in_query = false ;
149
+ for byte in mime_type. bytes ( ) {
150
+ match byte {
151
+ // Ignore ASCII tabs or newlines like the URL parser would
152
+ b'\t' | b'\n' | b'\r' => continue ,
153
+
154
+ // C0 encode set
155
+ b'\0' ...b'\x1F' | b'\x7F' ...b'\xFF' => percent_encode ( byte, & mut string) ,
156
+
157
+ // Bytes other than the C0 encode set that are percent-encoded
158
+ // by the URL parser in the query state.
159
+ // '#' is also in that list but cannot occur here
160
+ // since it indicates the start of the URL’s fragment.
161
+ b' ' | b'"' | b'<' | b'>' if in_query => percent_encode ( byte, & mut string) ,
162
+
163
+ b'?' => {
164
+ in_query = true ;
165
+ string. push ( '?' )
166
+ }
147
167
148
- if input. starts_with ( ';' ) {
149
- string = String :: from ( "text/plain" ) ;
150
- string. push_str ( input) ;
151
- input = & * string;
168
+ // Printable ASCII
169
+ _ => string. push ( byte as char )
170
+ }
152
171
}
153
172
173
+
154
174
// FIXME: does Mime::from_str match the MIME Sniffing Standard’s parsing algorithm?
155
175
// <https://mimesniff.spec.whatwg.org/#parse-a-mime-type>
156
- let mime_type = input. parse ( )
157
- . unwrap_or_else ( |_| "text/plain;charset=US-ASCII" . parse ( ) . unwrap ( ) ) ;
176
+ let mime_type = string. parse ( ) . unwrap_or_else ( |_| {
177
+ "text/plain;charset=US-ASCII" . parse ( ) . unwrap ( )
178
+ } ) ;
158
179
159
180
( mime_type, base64)
160
181
}
161
182
162
183
/// None: no base64 suffix
163
- fn without_base64_suffix ( s : & str ) -> Option < & str > {
164
- remove_suffix (
165
- remove_suffix ( s, "base64" , str:: eq_ignore_ascii_case) ?
166
- . trim_right_matches ( ' ' ) ,
167
- ";" , str:: eq
168
- )
169
- }
184
+ fn remove_base64_suffix ( s : & str ) -> Option < & str > {
185
+ let mut bytes = s. bytes ( ) ;
186
+ {
187
+ // Ignore ASCII tabs or newlines like the URL parser would
188
+ let iter = bytes. by_ref ( ) . filter ( |& byte| !matches ! ( byte, b'\t' | b'\n' | b'\r' ) ) ;
170
189
171
- fn remove_suffix < ' a , Eq > ( haystack : & ' a str , needle : & str , eq : Eq ) -> Option < & ' a str >
172
- where Eq : Fn ( & str , & str ) -> bool
173
- {
174
- let start_index = haystack. len ( ) . checked_sub ( needle. len ( ) ) ?;
175
- let ( before, after) = haystack. split_at ( start_index) ;
176
- if eq ( after, needle) {
177
- Some ( before)
178
- } else {
179
- None
190
+ // Search from the end
191
+ let mut iter = iter. rev ( ) ;
192
+
193
+ require ! ( iter. next( ) ? == b'4' ) ;
194
+ require ! ( iter. next( ) ? == b'6' ) ;
195
+ require ! ( iter. next( ) ?. to_ascii_lowercase( ) == b'e' ) ;
196
+ require ! ( iter. next( ) ?. to_ascii_lowercase( ) == b's' ) ;
197
+ require ! ( iter. next( ) ?. to_ascii_lowercase( ) == b'a' ) ;
198
+ require ! ( iter. next( ) ?. to_ascii_lowercase( ) == b'b' ) ;
199
+ require ! ( iter. skip_while( |& byte| byte == b' ' ) . next( ) ? == b';' ) ;
180
200
}
201
+ Some ( & s[ ..bytes. len ( ) ] )
202
+ }
203
+
204
+ fn percent_encode ( byte : u8 , string : & mut String ) {
205
+ const HEX_UPPER : [ u8 ; 16 ] = * b"0123456789ABCDEF" ;
206
+ string. push ( '%' ) ;
207
+ string. push ( HEX_UPPER [ ( byte >> 4 ) as usize ] as char ) ;
208
+ string. push ( HEX_UPPER [ ( byte & 0x0f ) as usize ] as char ) ;
181
209
}
182
210
183
211
/// This is <https://url.spec.whatwg.org/#string-percent-decode> while also:
0 commit comments