Skip to content

Commit e76e98c

Browse files
committed
Merge branch 'master' of ../rust-data-url into data
2 parents a2851be + cc670f6 commit e76e98c

File tree

10 files changed

+4905
-0
lines changed

10 files changed

+4905
-0
lines changed

data-url/Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "data-url"
3+
version = "0.1.0"
4+
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
5+
6+
[dependencies]
7+
matches = "0.1"
8+
9+
[dev-dependencies]
10+
rustc-test = "0.3"
11+
serde = {version = "1.0", features = ["derive"]}
12+
serde_json = "1.0"
13+
14+
[lib]
15+
test = false
16+
17+
[[test]]
18+
name = "wpt"
19+
harness = false

data-url/src/forgiving_base64.rs

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
//! <https://infra.spec.whatwg.org/#forgiving-base64-decode>
2+
3+
#[derive(Debug)]
4+
pub struct InvalidBase64(InvalidBase64Details);
5+
6+
#[derive(Debug)]
7+
enum InvalidBase64Details {
8+
UnexpectedSymbol(u8),
9+
AlphabetSymbolAfterPadding,
10+
LoneAlphabetSymbol,
11+
Padding,
12+
}
13+
14+
#[derive(Debug)]
15+
pub enum DecodeError<E> {
16+
InvalidBase64(InvalidBase64),
17+
WriteError(E),
18+
}
19+
20+
impl<E> From<InvalidBase64Details> for DecodeError<E> {
21+
fn from(e: InvalidBase64Details) -> Self {
22+
DecodeError::InvalidBase64(InvalidBase64(e))
23+
}
24+
}
25+
26+
pub(crate) enum Impossible {}
27+
28+
impl From<DecodeError<Impossible>> for InvalidBase64 {
29+
fn from(e: DecodeError<Impossible>) -> Self {
30+
match e {
31+
DecodeError::InvalidBase64(e) => e,
32+
DecodeError::WriteError(e) => match e {}
33+
}
34+
}
35+
}
36+
37+
/// `input` is assumed to be in an ASCII-compatible encoding
38+
pub fn decode_to_vec(input: &[u8]) -> Result<Vec<u8>, InvalidBase64> {
39+
let mut v = Vec::new();
40+
{
41+
let mut decoder = Decoder::new(|bytes| Ok(v.extend_from_slice(bytes)));
42+
decoder.feed(input)?;
43+
decoder.finish()?;
44+
}
45+
Ok(v)
46+
}
47+
48+
/// <https://infra.spec.whatwg.org/#forgiving-base64-decode>
49+
pub struct Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
50+
write_bytes: F,
51+
bit_buffer: u32,
52+
buffer_bit_length: u8,
53+
padding_symbols: u8,
54+
}
55+
56+
impl<F, E> Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
57+
pub fn new(write_bytes: F) -> Self {
58+
Self {
59+
write_bytes,
60+
bit_buffer: 0,
61+
buffer_bit_length: 0,
62+
padding_symbols: 0,
63+
}
64+
}
65+
66+
/// Feed to the decoder partial input in an ASCII-compatible encoding
67+
pub fn feed(&mut self, input: &[u8]) -> Result<(), DecodeError<E>> {
68+
for &byte in input.iter() {
69+
let value = BASE64_DECODE_TABLE[byte as usize];
70+
if value < 0 {
71+
// A character that’s not part of the alphabet
72+
73+
// Remove ASCII whitespace
74+
if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C') {
75+
continue
76+
}
77+
78+
if byte == b'=' {
79+
self.padding_symbols = self.padding_symbols.saturating_add(1);
80+
continue
81+
}
82+
83+
Err(InvalidBase64Details::UnexpectedSymbol(byte))?
84+
}
85+
if self.padding_symbols > 0 {
86+
Err(InvalidBase64Details::AlphabetSymbolAfterPadding)?
87+
}
88+
self.bit_buffer <<= 6;
89+
self.bit_buffer |= value as u32;
90+
// 18 before incrementing means we’ve just reached 24
91+
if self.buffer_bit_length < 18 {
92+
self.buffer_bit_length += 6;
93+
} else {
94+
// We’ve accumulated four times 6 bits, which equals three times 8 bits.
95+
let byte_buffer = [
96+
(self.bit_buffer >> 16) as u8,
97+
(self.bit_buffer >> 8) as u8,
98+
self.bit_buffer as u8,
99+
];
100+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
101+
self.buffer_bit_length = 0;
102+
// No need to reset bit_buffer,
103+
// since next time we’re only gonna read relevant bits.
104+
}
105+
}
106+
Ok(())
107+
}
108+
109+
/// Call this to signal the end of the input
110+
pub fn finish(mut self) -> Result<(), DecodeError<E>> {
111+
match (self.buffer_bit_length, self.padding_symbols) {
112+
(0, 0) => {
113+
// A multiple of four of alphabet symbols, and nothing else.
114+
}
115+
(12, 2) | (12, 0) => {
116+
// A multiple of four of alphabet symbols, followed by two more symbols,
117+
// optionally followed by two padding characters (which make a total multiple of four).
118+
let byte_buffer = [
119+
(self.bit_buffer >> 4) as u8,
120+
];
121+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
122+
}
123+
(18, 1) | (18, 0) => {
124+
// A multiple of four of alphabet symbols, followed by three more symbols,
125+
// optionally followed by one padding character (which make a total multiple of four).
126+
let byte_buffer = [
127+
(self.bit_buffer >> 10) as u8,
128+
(self.bit_buffer >> 2) as u8,
129+
];
130+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
131+
}
132+
(6, _) => {
133+
Err(InvalidBase64Details::LoneAlphabetSymbol)?
134+
}
135+
_ => {
136+
Err(InvalidBase64Details::Padding)?
137+
}
138+
}
139+
Ok(())
140+
}
141+
}
142+
143+
144+
/// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet"
145+
/// at <https://tools.ietf.org/html/rfc4648#section-4>
146+
///
147+
/// Array indices are the byte value of symbols.
148+
/// Array values are their positions in the base64 alphabet,
149+
/// or -1 for symbols not in the alphabet.
150+
/// The position contributes 6 bits to the decoded bytes.
151+
const BASE64_DECODE_TABLE: [i8; 256] = [
152+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
153+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
154+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
155+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
156+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
157+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
158+
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
159+
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
160+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168+
];

0 commit comments

Comments
 (0)