Skip to content

Commit e447349

Browse files
author
bors-servo
authored
Auto merge of #432 - servo:data, r=nox
Add data-url crate This is an implementation of https://fetch.spec.whatwg.org/#data-urls. It is independent of the `url` crate, but it seems within the scope of this repository and I didn’t feel like creating yet another repository :) <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/rust-url/432) <!-- Reviewable:end -->
2 parents 0f0813f + a6e63fe commit e447349

17 files changed

+4953
-18
lines changed

.travis.yml

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,29 @@
11
language: rust
2-
script: make test
32

43
jobs:
54
include:
6-
- rust: nightly
7-
- rust: beta
8-
- rust: stable
95
- rust: 1.17.0
10-
- rust: nightly
11-
env:
12-
- WASM32=true # just to show it in travis UI
136
install:
14-
- rustup target add wasm32-unknown-unknown
15-
script:
16-
- cargo build --target=wasm32-unknown-unknown
7+
# --precise requires Cargo.lock to already exist
8+
- cargo update
9+
# getopts is only used in tests. Its versions 0.2.16+ don’t build on 1.17.0
10+
- cargo update -p getopts --precise 0.2.15
11+
# data-url uses pub(crate) which is unstable in 1.17
12+
script: cargo test --all-features -p url -p idna -p percent-encoding -p url_serde
13+
14+
- rust: stable
15+
script: cargo test --all-features --all
16+
17+
- rust: beta
18+
script: cargo test --all-features --all
19+
20+
- rust: nightly
21+
script: cargo test --all-features --all
22+
23+
- rust: nightly
24+
env: TARGET=WASM32 # For job list UI
25+
install: rustup target add wasm32-unknown-unknown
26+
script: cargo build --all --target=wasm32-unknown-unknown
1727

1828
notifications:
1929
webhooks: http://build.servo.org:54856/travis

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ travis-ci = { repository = "servo/rust-url" }
1818
appveyor = { repository = "Manishearth/rust-url" }
1919

2020
[workspace]
21-
members = [".", "idna", "percent_encoding", "url_serde"]
21+
members = [".", "idna", "percent_encoding", "url_serde", "data-url"]
2222

2323
[[test]]
2424
name = "unit"

Makefile

Lines changed: 0 additions & 6 deletions
This file was deleted.

data-url/Cargo.toml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[package]
2+
name = "data-url"
3+
version = "0.1.0"
4+
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
5+
description = "Processing of data: URL according to WHATWG’s Fetch Standard"
6+
repository = "https://github.com/servo/rust-url"
7+
license = "MIT OR Apache-2.0"
8+
9+
[dependencies]
10+
matches = "0.1"
11+
12+
[dev-dependencies]
13+
rustc-test = "0.3"
14+
serde = {version = "1.0", features = ["derive"]}
15+
serde_json = "1.0"
16+
17+
[lib]
18+
test = false
19+
20+
[[test]]
21+
name = "wpt"
22+
harness = false

data-url/LICENSE-APACHE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../LICENSE-APACHE

data-url/LICENSE-MIT

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../LICENSE-MIT

data-url/README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# data-url
2+
3+
![crates.io](https://img.shields.io/crates/v/url.svg)
4+
![docs.rs](https://docs.rs/data-url/)
5+
6+
Processing of `data:` URLs in Rust according to the Fetch Standard:
7+
<https://fetch.spec.whatwg.org/#data-urls>
8+
but starting from a string rather than a parsed URL to avoid extra copies.
9+
10+
```rust
11+
use data_url::{DataUrl, mime};
12+
//!
13+
let url = DataUrl::process("data:,Hello%20World!").unwrap();
14+
let (body, fragment) = url.decode_to_vec().unwrap();
15+
//!
16+
assert_eq!(url.mime_type().type_, "text");
17+
assert_eq!(url.mime_type().subtype, "plain");
18+
assert_eq!(url.mime_type().get_parameter("charset"), Some("US-ASCII"));
19+
assert_eq!(body, b"Hello World!");
20+
assert!(fragment.is_none());
21+
```

data-url/src/forgiving_base64.rs

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
//! <https://infra.spec.whatwg.org/#forgiving-base64-decode>
2+
3+
#[derive(Debug)]
4+
pub struct InvalidBase64(InvalidBase64Details);
5+
6+
#[derive(Debug)]
7+
enum InvalidBase64Details {
8+
UnexpectedSymbol(u8),
9+
AlphabetSymbolAfterPadding,
10+
LoneAlphabetSymbol,
11+
Padding,
12+
}
13+
14+
#[derive(Debug)]
15+
pub enum DecodeError<E> {
16+
InvalidBase64(InvalidBase64),
17+
WriteError(E),
18+
}
19+
20+
impl<E> From<InvalidBase64Details> for DecodeError<E> {
21+
fn from(e: InvalidBase64Details) -> Self {
22+
DecodeError::InvalidBase64(InvalidBase64(e))
23+
}
24+
}
25+
26+
pub(crate) enum Impossible {}
27+
28+
impl From<DecodeError<Impossible>> for InvalidBase64 {
29+
fn from(e: DecodeError<Impossible>) -> Self {
30+
match e {
31+
DecodeError::InvalidBase64(e) => e,
32+
DecodeError::WriteError(e) => match e {}
33+
}
34+
}
35+
}
36+
37+
/// `input` is assumed to be in an ASCII-compatible encoding
38+
pub fn decode_to_vec(input: &[u8]) -> Result<Vec<u8>, InvalidBase64> {
39+
let mut v = Vec::new();
40+
{
41+
let mut decoder = Decoder::new(|bytes| Ok(v.extend_from_slice(bytes)));
42+
decoder.feed(input)?;
43+
decoder.finish()?;
44+
}
45+
Ok(v)
46+
}
47+
48+
/// <https://infra.spec.whatwg.org/#forgiving-base64-decode>
49+
pub struct Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
50+
write_bytes: F,
51+
bit_buffer: u32,
52+
buffer_bit_length: u8,
53+
padding_symbols: u8,
54+
}
55+
56+
impl<F, E> Decoder<F, E> where F: FnMut(&[u8]) -> Result<(), E> {
57+
pub fn new(write_bytes: F) -> Self {
58+
Self {
59+
write_bytes,
60+
bit_buffer: 0,
61+
buffer_bit_length: 0,
62+
padding_symbols: 0,
63+
}
64+
}
65+
66+
/// Feed to the decoder partial input in an ASCII-compatible encoding
67+
pub fn feed(&mut self, input: &[u8]) -> Result<(), DecodeError<E>> {
68+
for &byte in input.iter() {
69+
let value = BASE64_DECODE_TABLE[byte as usize];
70+
if value < 0 {
71+
// A character that’s not part of the alphabet
72+
73+
// Remove ASCII whitespace
74+
if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C') {
75+
continue
76+
}
77+
78+
if byte == b'=' {
79+
self.padding_symbols = self.padding_symbols.saturating_add(1);
80+
continue
81+
}
82+
83+
Err(InvalidBase64Details::UnexpectedSymbol(byte))?
84+
}
85+
if self.padding_symbols > 0 {
86+
Err(InvalidBase64Details::AlphabetSymbolAfterPadding)?
87+
}
88+
self.bit_buffer <<= 6;
89+
self.bit_buffer |= value as u32;
90+
// 18 before incrementing means we’ve just reached 24
91+
if self.buffer_bit_length < 18 {
92+
self.buffer_bit_length += 6;
93+
} else {
94+
// We’ve accumulated four times 6 bits, which equals three times 8 bits.
95+
let byte_buffer = [
96+
(self.bit_buffer >> 16) as u8,
97+
(self.bit_buffer >> 8) as u8,
98+
self.bit_buffer as u8,
99+
];
100+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
101+
self.buffer_bit_length = 0;
102+
// No need to reset bit_buffer,
103+
// since next time we’re only gonna read relevant bits.
104+
}
105+
}
106+
Ok(())
107+
}
108+
109+
/// Call this to signal the end of the input
110+
pub fn finish(mut self) -> Result<(), DecodeError<E>> {
111+
match (self.buffer_bit_length, self.padding_symbols) {
112+
(0, 0) => {
113+
// A multiple of four of alphabet symbols, and nothing else.
114+
}
115+
(12, 2) | (12, 0) => {
116+
// A multiple of four of alphabet symbols, followed by two more symbols,
117+
// optionally followed by two padding characters (which make a total multiple of four).
118+
let byte_buffer = [
119+
(self.bit_buffer >> 4) as u8,
120+
];
121+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
122+
}
123+
(18, 1) | (18, 0) => {
124+
// A multiple of four of alphabet symbols, followed by three more symbols,
125+
// optionally followed by one padding character (which make a total multiple of four).
126+
let byte_buffer = [
127+
(self.bit_buffer >> 10) as u8,
128+
(self.bit_buffer >> 2) as u8,
129+
];
130+
(self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
131+
}
132+
(6, _) => {
133+
Err(InvalidBase64Details::LoneAlphabetSymbol)?
134+
}
135+
_ => {
136+
Err(InvalidBase64Details::Padding)?
137+
}
138+
}
139+
Ok(())
140+
}
141+
}
142+
143+
144+
/// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet"
145+
/// at <https://tools.ietf.org/html/rfc4648#section-4>
146+
///
147+
/// Array indices are the byte value of symbols.
148+
/// Array values are their positions in the base64 alphabet,
149+
/// or -1 for symbols not in the alphabet.
150+
/// The position contributes 6 bits to the decoded bytes.
151+
const BASE64_DECODE_TABLE: [i8; 256] = [
152+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
153+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
154+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
155+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
156+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
157+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
158+
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
159+
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
160+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168+
];

0 commit comments

Comments
 (0)