Skip to content

Commit 1055810

Browse files
committed
Percent-encode the mime type like the URL parser would
1 parent 9d8acc3 commit 1055810

File tree

1 file changed

+64
-36
lines changed

1 file changed

+64
-36
lines changed

src/lib.rs

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ fn pretend_parse_data_url(input: &str) -> Option<&str> {
103103

104104
let mut bytes = left_trimmed.bytes();
105105
{
106-
// Ignore ASCII tabs or newlines
106+
// Ignore ASCII tabs or newlines like the URL parser would
107107
let mut iter = bytes.by_ref().filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r'));
108108
require!(iter.next()?.to_ascii_lowercase() == b'd');
109109
require!(iter.next()?.to_ascii_lowercase() == b'a');
@@ -131,53 +131,81 @@ fn find_comma_before_fragment(after_colon: &str) -> Option<(&str, &str)> {
131131
}
132132

133133
fn parse_header(from_colon_to_comma: &str) -> (mime::Mime, bool) {
134-
let input = from_colon_to_comma.chars()
135-
.filter(|&c| !matches!(c, '\t' | '\n' | '\r')) // Removed by the URL parser
136-
.collect::<String>();
137-
let mut string;
138-
139-
let input = input.trim_matches(' ');
140-
141-
let (mut input, base64) = match without_base64_suffix(input) {
142-
Some(s) => (s, true),
143-
None => (input, false),
144-
};
145-
146-
// FIXME: percent-encode
134+
// "Strip leading and trailing ASCII whitespace"
135+
// \t, \n, and \r would have been filtered by the URL parser
136+
// \f percent-encoded by the URL parser
137+
// space is the only remaining ASCII whitespace
138+
let trimmed = from_colon_to_comma.trim_matches(|c| matches!(c, ' ' | '\t' | '\n' | '\r'));
139+
140+
let without_base64_suffix = remove_base64_suffix(trimmed);
141+
let base64 = without_base64_suffix.is_some();
142+
let mime_type = without_base64_suffix.unwrap_or(trimmed);
143+
144+
let mut string = String::new();
145+
if mime_type.starts_with(';') {
146+
string.push_str("text/plain")
147+
}
148+
let mut in_query = false;
149+
for byte in mime_type.bytes() {
150+
match byte {
151+
// Ignore ASCII tabs or newlines like the URL parser would
152+
b'\t' | b'\n' | b'\r' => continue,
153+
154+
// C0 encode set
155+
b'\0'...b'\x1F' | b'\x7F'...b'\xFF' => percent_encode(byte, &mut string),
156+
157+
// Bytes other than the C0 encode set that are percent-encoded
158+
// by the URL parser in the query state.
159+
// '#' is also in that list but cannot occur here
160+
// since it indicates the start of the URL’s fragment.
161+
b' ' | b'"' | b'<' | b'>' if in_query => percent_encode(byte, &mut string),
162+
163+
b'?' => {
164+
in_query = true;
165+
string.push('?')
166+
}
147167

148-
if input.starts_with(';') {
149-
string = String::from("text/plain");
150-
string.push_str(input);
151-
input = &*string;
168+
// Printable ASCII
169+
_ => string.push(byte as char)
170+
}
152171
}
153172

173+
154174
// FIXME: does Mime::from_str match the MIME Sniffing Standard’s parsing algorithm?
155175
// <https://mimesniff.spec.whatwg.org/#parse-a-mime-type>
156-
let mime_type = input.parse()
157-
.unwrap_or_else(|_| "text/plain;charset=US-ASCII".parse().unwrap());
176+
let mime_type = string.parse().unwrap_or_else(|_| {
177+
"text/plain;charset=US-ASCII".parse().unwrap()
178+
});
158179

159180
(mime_type, base64)
160181
}
161182

162183
/// None: no base64 suffix
163-
fn without_base64_suffix(s: &str) -> Option<&str> {
164-
remove_suffix(
165-
remove_suffix(s, "base64", str::eq_ignore_ascii_case)?
166-
.trim_right_matches(' '),
167-
";", str::eq
168-
)
169-
}
184+
fn remove_base64_suffix(s: &str) -> Option<&str> {
185+
let mut bytes = s.bytes();
186+
{
187+
// Ignore ASCII tabs or newlines like the URL parser would
188+
let iter = bytes.by_ref().filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r'));
170189

171-
fn remove_suffix<'a, Eq>(haystack: &'a str, needle: &str, eq: Eq) -> Option<&'a str>
172-
where Eq: Fn(&str, &str) -> bool
173-
{
174-
let start_index = haystack.len().checked_sub(needle.len())?;
175-
let (before, after) = haystack.split_at(start_index);
176-
if eq(after, needle) {
177-
Some(before)
178-
} else {
179-
None
190+
// Search from the end
191+
let mut iter = iter.rev();
192+
193+
require!(iter.next()? == b'4');
194+
require!(iter.next()? == b'6');
195+
require!(iter.next()?.to_ascii_lowercase() == b'e');
196+
require!(iter.next()?.to_ascii_lowercase() == b's');
197+
require!(iter.next()?.to_ascii_lowercase() == b'a');
198+
require!(iter.next()?.to_ascii_lowercase() == b'b');
199+
require!(iter.skip_while(|&byte| byte == b' ').next()? == b';');
180200
}
201+
Some(&s[..bytes.len()])
202+
}
203+
204+
fn percent_encode(byte: u8, string: &mut String) {
205+
const HEX_UPPER: [u8; 16] = *b"0123456789ABCDEF";
206+
string.push('%');
207+
string.push(HEX_UPPER[(byte >> 4) as usize] as char);
208+
string.push(HEX_UPPER[(byte & 0x0f) as usize] as char);
181209
}
182210

183211
/// This is <https://url.spec.whatwg.org/#string-percent-decode> while also:

0 commit comments

Comments
 (0)