Skip to content

Commit 63493ba

Browse files
committed
Move base64 to a separate module
1 parent 0b58116 commit 63493ba

File tree

2 files changed

+162
-159
lines changed

2 files changed

+162
-159
lines changed

src/forgiving_base64.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
//! <https://infra.spec.whatwg.org/#forgiving-base64-decode>
2+
3+
#[derive(Debug)]
4+
pub struct InvalidBase64(());
5+
6+
#[derive(Debug)]
7+
pub enum DecodeError<E> {
8+
InvalidBase64(InvalidBase64),
9+
WriteError(E),
10+
}
11+
12+
impl<E> From<InvalidBase64> for DecodeError<E> {
13+
fn from(e: InvalidBase64) -> Self { DecodeError::InvalidBase64(e) }
14+
}
15+
16+
pub(crate) enum Impossible {}
17+
18+
impl From<DecodeError<Impossible>> for InvalidBase64 {
19+
fn from(e: DecodeError<Impossible>) -> Self {
20+
match e {
21+
DecodeError::InvalidBase64(e) => e,
22+
DecodeError::WriteError(e) => match e {}
23+
}
24+
}
25+
}
26+
27+
/// `input` is assumed to be in an ASCII-compatible encoding
28+
pub fn decode_to_vec(input: &[u8]) -> Result<Vec<u8>, InvalidBase64> {
29+
let mut v = Vec::new();
30+
{
31+
let mut decoder = Decoder::new(|bytes| Ok(v.extend_from_slice(bytes)));
32+
decoder.feed(input)?;
33+
decoder.finish()?;
34+
}
35+
Ok(v)
36+
}
37+
38+
/// <https://infra.spec.whatwg.org/#forgiving-base64-decode>
39+
pub struct Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
40+
write_bytes: F,
41+
bit_buffer: u32,
42+
buffer_bit_length: u8,
43+
padding_symbols: u8,
44+
}
45+
46+
impl<F, E> Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
47+
pub fn new(write_bytes: F) -> Self {
48+
Self {
49+
write_bytes,
50+
bit_buffer: 0,
51+
buffer_bit_length: 0,
52+
padding_symbols: 0,
53+
}
54+
}
55+
56+
/// Feed to the decoder partial input in an ASCII-compatible encoding
57+
pub fn feed(&mut self, input: &[u8]) -> Result<(), DecodeError<E>> {
58+
for &byte in input.iter() {
59+
let value = BASE64_DECODE_TABLE[byte as usize];
60+
if value < 0 {
61+
// A character that’s not part of the alphabet
62+
63+
// Remove ASCII whitespace
64+
// '\t' | '\n' | '\r' was already filtered by decode_without_base64()
65+
if byte == b' ' || byte == b'\x0C' {
66+
continue
67+
}
68+
69+
if byte == b'=' {
70+
self.padding_symbols = self.padding_symbols.saturating_add(8);
71+
continue
72+
}
73+
74+
Err(InvalidBase64(()))?
75+
}
76+
if self.padding_symbols > 0 {
77+
// Alphabet symbols after padding
78+
Err(InvalidBase64(()))?
79+
}
80+
self.bit_buffer <<= 6;
81+
self.bit_buffer |= value as u32;
82+
if self.buffer_bit_length < 24 {
83+
self.buffer_bit_length += 6;
84+
} else {
85+
// We’ve accumulated four times 6 bits, which equals three times 8 bits.
86+
let byte_buffer = [
87+
(self.bit_buffer >> 16) as u8,
88+
(self.bit_buffer >> 8) as u8,
89+
self.bit_buffer as u8,
90+
];
91+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
92+
self.buffer_bit_length = 0;
93+
// No need to reset bit_buffer,
94+
// since next time we’re only gonna read relevant bits.
95+
}
96+
}
97+
Ok(())
98+
}
99+
100+
/// Call this to signal the end of the input
101+
pub fn finish(mut self) -> Result<(), DecodeError<E>> {
102+
match (self.buffer_bit_length, self.padding_symbols) {
103+
(0, 0) => {
104+
// A multiple of four of alphabet symbols, and nothing else.
105+
}
106+
(12, 2) | (12, 0) => {
107+
// A multiple of four of alphabet symbols, followed by two more symbols,
108+
// optionally followed by two padding characters (which make a total multiple of four).
109+
let byte_buffer = [
110+
(self.bit_buffer >> 4) as u8,
111+
];
112+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
113+
}
114+
(18, 1) | (18, 0) => {
115+
// A multiple of four of alphabet symbols, followed by three more symbols,
116+
// optionally followed by one padding character (which make a total multiple of four).
117+
let byte_buffer = [
118+
(self.bit_buffer >> 10) as u8,
119+
(self.bit_buffer >> 2) as u8,
120+
];
121+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
122+
}
123+
_ => {
124+
// No other combination is acceptable
125+
Err(InvalidBase64(()))?
126+
}
127+
}
128+
Ok(())
129+
}
130+
}
131+
132+
133+
/// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet"
134+
/// at <https://tools.ietf.org/html/rfc4648#section-4>
135+
///
136+
/// Array indices are the byte value of symbols.
137+
/// Array values are their positions in the base64 alphabet,
138+
/// or -1 for symbols not in the alphabet.
139+
/// The position contributes 6 bits to the decoded bytes.
140+
const BASE64_DECODE_TABLE: [i8; 256] = [
141+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
142+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
143+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
144+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
145+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
146+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
147+
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
148+
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
149+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
150+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
151+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
152+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
153+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
154+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
155+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
156+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
157+
];

src/lib.rs

Lines changed: 5 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
#[macro_use] extern crate matches;
1919
pub extern crate mime;
2020

21+
use forgiving_base64::{InvalidBase64, DecodeError};
22+
23+
pub mod forgiving_base64;
24+
2125
pub struct DataUrl<'a> {
2226
mime_type: mime::Mime,
2327
base64: bool,
@@ -30,19 +34,6 @@ pub enum DataUrlError {
3034
NoComma,
3135
}
3236

33-
#[derive(Debug)]
34-
pub enum DecodeError<E> {
35-
InvalidBase64(InvalidBase64),
36-
WriteError(E),
37-
}
38-
39-
#[derive(Debug)]
40-
pub struct InvalidBase64(());
41-
42-
impl<E> From<InvalidBase64> for DecodeError<E> {
43-
fn from(e: InvalidBase64) -> Self { DecodeError::InvalidBase64(e) }
44-
}
45-
4637
impl<'a> DataUrl<'a> {
4738
/// <https://fetch.spec.whatwg.org/#data-url-processor>
4839
/// but starting from a string rather than a parsed `Url`, to avoid extra string copies.
@@ -87,17 +78,6 @@ impl<'a> DataUrl<'a> {
8778
}
8879
}
8980

90-
enum Impossible {}
91-
92-
impl From<DecodeError<Impossible>> for InvalidBase64 {
93-
fn from(e: DecodeError<Impossible>) -> Self {
94-
match e {
95-
DecodeError::InvalidBase64(e) => e,
96-
DecodeError::WriteError(e) => match e {}
97-
}
98-
}
99-
}
100-
10181
/// The URL’s fragment identifier (after `#`)
10282
pub struct FragmentIdentifier<'a>(&'a str);
10383

@@ -312,142 +292,8 @@ fn decode_with_base64<F, E>(encoded_body_plus_fragment: &str, write_bytes: F)
312292
-> Result<Option<FragmentIdentifier>, DecodeError<E>>
313293
where F: FnMut(&[u8]) -> Result<(), E>
314294
{
315-
let mut decoder = ForgivingBase64Decoder::new(write_bytes);
295+
let mut decoder = forgiving_base64::Decoder::new(write_bytes);
316296
let fragment = decode_without_base64(encoded_body_plus_fragment, |bytes| decoder.feed(bytes))?;
317297
decoder.finish()?;
318298
Ok(fragment)
319299
}
320-
321-
/// <https://infra.spec.whatwg.org/#forgiving-base64-decode>
322-
///
323-
/// `input` is assumed to be in an ASCII-compatible encoding
324-
pub fn forgiving_base64_decode_to_vec(input: &[u8]) -> Result<Vec<u8>, InvalidBase64> {
325-
let mut v = Vec::new();
326-
{
327-
let mut decoder = ForgivingBase64Decoder::new(|bytes| Ok(v.extend_from_slice(bytes)));
328-
decoder.feed(input)?;
329-
decoder.finish()?;
330-
}
331-
Ok(v)
332-
}
333-
334-
/// <https://infra.spec.whatwg.org/#forgiving-base64-decode>
335-
pub struct ForgivingBase64Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
336-
write_bytes: F,
337-
bit_buffer: u32,
338-
buffer_bit_length: u8,
339-
padding_symbols: u8,
340-
}
341-
342-
impl<F, E> ForgivingBase64Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
343-
pub fn new(write_bytes: F) -> Self {
344-
Self {
345-
write_bytes,
346-
bit_buffer: 0,
347-
buffer_bit_length: 0,
348-
padding_symbols: 0,
349-
}
350-
}
351-
352-
/// Feed to the decoder partial input in an ASCII-compatible encoding
353-
pub fn feed(&mut self, input: &[u8]) -> Result<(), DecodeError<E>> {
354-
for &byte in input.iter() {
355-
let value = BASE64_DECODE_TABLE[byte as usize];
356-
if value < 0 {
357-
// A character that’s not part of the alphabet
358-
359-
// Remove ASCII whitespace
360-
// '\t' | '\n' | '\r' was already filtered by decode_without_base64()
361-
if byte == b' ' || byte == b'\x0C' {
362-
continue
363-
}
364-
365-
if byte == b'=' {
366-
self.padding_symbols = self.padding_symbols.saturating_add(8);
367-
continue
368-
}
369-
370-
Err(InvalidBase64(()))?
371-
}
372-
if self.padding_symbols > 0 {
373-
// Alphabet symbols after padding
374-
Err(InvalidBase64(()))?
375-
}
376-
self.bit_buffer <<= 6;
377-
self.bit_buffer |= value as u32;
378-
if self.buffer_bit_length < 24 {
379-
self.buffer_bit_length += 6;
380-
} else {
381-
// We’ve accumulated four times 6 bits, which equals three times 8 bits.
382-
let byte_buffer = [
383-
(self.bit_buffer >> 16) as u8,
384-
(self.bit_buffer >> 8) as u8,
385-
self.bit_buffer as u8,
386-
];
387-
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
388-
self.buffer_bit_length = 0;
389-
// No need to reset bit_buffer,
390-
// since next time we’re only gonna read relevant bits.
391-
}
392-
}
393-
Ok(())
394-
}
395-
396-
/// Call this to signal the end of the input
397-
pub fn finish(mut self) -> Result<(), DecodeError<E>> {
398-
match (self.buffer_bit_length, self.padding_symbols) {
399-
(0, 0) => {
400-
// A multiple of four of alphabet symbols, and nothing else.
401-
}
402-
(12, 2) | (12, 0) => {
403-
// A multiple of four of alphabet symbols, followed by two more symbols,
404-
// optionally followed by two padding characters (which make a total multiple of four).
405-
let byte_buffer = [
406-
(self.bit_buffer >> 4) as u8,
407-
];
408-
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
409-
}
410-
(18, 1) | (18, 0) => {
411-
// A multiple of four of alphabet symbols, followed by three more symbols,
412-
// optionally followed by one padding character (which make a total multiple of four).
413-
let byte_buffer = [
414-
(self.bit_buffer >> 10) as u8,
415-
(self.bit_buffer >> 2) as u8,
416-
];
417-
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
418-
}
419-
_ => {
420-
// No other combination is acceptable
421-
Err(InvalidBase64(()))?
422-
}
423-
}
424-
Ok(())
425-
}
426-
}
427-
428-
429-
/// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet"
430-
/// at <https://tools.ietf.org/html/rfc4648#section-4>
431-
///
432-
/// Array indices are the byte value of symbols.
433-
/// Array values are their positions in the base64 alphabet,
434-
/// or -1 for symbols not in the alphabet.
435-
/// The position contributes 6 bits to the decoded bytes.
436-
const BASE64_DECODE_TABLE: [i8; 256] = [
437-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
438-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
439-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
440-
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
441-
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
442-
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
443-
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
444-
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
445-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
446-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
447-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
448-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
449-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
450-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
451-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
452-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
453-
];

0 commit comments

Comments
 (0)