Skip to content

Commit b054045

Browse files
author
bors-servo
authored
Auto merge of #362 - servo:percent-crate, r=SimonSapin
Move percent encoding to its own crate This is a rebase of #347 with some additional changes. Original work by @seanmonstar. Fixes #347. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/rust-url/362) <!-- Reviewable:end -->
2 parents 52a45a1 + dfaacf0 commit b054045

File tree

4 files changed

+87
-7
lines changed

4 files changed

+87
-7
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ travis-ci = { repository = "servo/rust-url" }
1818
appveyor = { repository = "servo/rust-url" }
1919

2020
[workspace]
21-
members = [".", "idna", "url_serde"]
21+
members = [".", "idna", "percent_encoding", "url_serde"]
2222

2323
[[test]]
2424
name = "unit"
@@ -44,5 +44,6 @@ encoding = {version = "0.2", optional = true}
4444
heapsize = {version = ">=0.1.1, <0.5", optional = true}
4545
idna = { version = "0.1.0", path = "./idna" }
4646
matches = "0.1"
47+
percent-encoding = { version = "1.0.0", path = "./percent_encoding" }
4748
rustc-serialize = {version = "0.3", optional = true}
4849
serde = {version = ">=0.6.1, <0.9", optional = true}

percent_encoding/Cargo.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[package]
2+
name = "percent-encoding"
3+
version = "1.0.0"
4+
authors = ["The rust-url developers"]
5+
description = "Percent encoding and decoding"
6+
repository = "https://github.com/servo/rust-url/"
7+
license = "MIT/Apache-2.0"
8+
9+
[lib]
10+
doctest = false
11+
test = false
12+
path = "lib.rs"
13+
14+
[dev-dependencies]
15+
rustc-test = "0.1"
16+
rustc-serialize = "0.3"

src/percent_encoding.rs renamed to percent_encoding/lib.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
//! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
3333
//! ```
3434
35-
use encoding;
3635
use std::ascii::AsciiExt;
3736
use std::borrow::Cow;
3837
use std::fmt;
@@ -70,8 +69,8 @@ pub trait EncodeSet: Clone {
7069
/// =======
7170
///
7271
/// ```rust
73-
/// #[macro_use] extern crate url;
74-
/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
72+
/// #[macro_use] extern crate percent_encoding;
73+
/// use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
7574
/// define_encode_set! {
7675
/// /// This encode set is used in the URL parser for query strings.
7776
/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
@@ -88,7 +87,7 @@ macro_rules! define_encode_set {
8887
#[allow(non_camel_case_types)]
8988
pub struct $name;
9089

91-
impl $crate::percent_encoding::EncodeSet for $name {
90+
impl $crate::EncodeSet for $name {
9291
#[inline]
9392
fn contains(&self, byte: u8) -> bool {
9493
match byte as char {
@@ -419,6 +418,25 @@ impl<'a> PercentDecode<'a> {
419418
/// Invalid UTF-8 percent-encoded byte sequences will be replaced � U+FFFD,
420419
/// the replacement character.
421420
pub fn decode_utf8_lossy(self) -> Cow<'a, str> {
422-
encoding::decode_utf8_lossy(self.clone().into())
421+
decode_utf8_lossy(self.clone().into())
423422
}
424423
}
424+
425+
fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
426+
match input {
427+
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
428+
Cow::Owned(bytes) => {
429+
let raw_utf8: *const [u8];
430+
match String::from_utf8_lossy(&bytes) {
431+
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
432+
Cow::Owned(s) => return s.into(),
433+
}
434+
// from_utf8_lossy returned a borrow of `bytes` unchanged.
435+
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
436+
// Reuse the existing `Vec` allocation.
437+
unsafe { String::from_utf8_unchecked(bytes) }.into()
438+
}
439+
}
440+
}
441+
442+

src/lib.rs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
112112
#[cfg(feature="heapsize")] #[macro_use] extern crate heapsize;
113113

114114
pub extern crate idna;
115+
pub extern crate percent_encoding;
115116

116117
use encoding::EncodingOverride;
117118
#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
@@ -145,7 +146,6 @@ mod parser;
145146
mod slicing;
146147

147148
pub mod form_urlencoded;
148-
pub mod percent_encoding;
149149
pub mod quirks;
150150

151151
/// A parsed URL record.
@@ -2103,3 +2103,48 @@ impl<'a> Drop for UrlQuery<'a> {
21032103
self.url.restore_already_parsed_fragment(self.fragment.take())
21042104
}
21052105
}
2106+
2107+
2108+
/// Define a new struct
2109+
/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait,
2110+
/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html)
2111+
/// and related functions.
2112+
///
2113+
/// Parameters are characters to include in the set in addition to those of the base set.
2114+
/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
2115+
///
2116+
/// Example
2117+
/// =======
2118+
///
2119+
/// ```rust
2120+
/// #[macro_use] extern crate url;
2121+
/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
2122+
/// define_encode_set! {
2123+
/// /// This encode set is used in the URL parser for query strings.
2124+
/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
2125+
/// }
2126+
/// # fn main() {
2127+
/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(), "foo%20bar");
2128+
/// # }
2129+
/// ```
2130+
#[macro_export]
2131+
macro_rules! define_encode_set {
2132+
($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => {
2133+
$(#[$attr])*
2134+
#[derive(Copy, Clone)]
2135+
#[allow(non_camel_case_types)]
2136+
pub struct $name;
2137+
2138+
impl $crate::percent_encoding::EncodeSet for $name {
2139+
#[inline]
2140+
fn contains(&self, byte: u8) -> bool {
2141+
match byte as char {
2142+
$(
2143+
$ch => true,
2144+
)*
2145+
_ => $base_set.contains(byte)
2146+
}
2147+
}
2148+
}
2149+
}
2150+
}

0 commit comments

Comments
 (0)