Skip to content

Commit df366f1

Browse files
AndyGaugeSimonSapin
authored andcommitted
Documentation and examples enhancements of percent encoding
1 parent 31b8d63 commit df366f1

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed

rust-url-todo

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
* standalone path parsing?
2+
* Test setters
3+
* Test trim C0/space
4+
* Test remove tab & newline
5+
6+
7+
8+
#[test]
9+
fn test_path_segments() {
10+
let mut url = Url::parse("http://example.net").unwrap();
11+
url.push_path_segment("foo").unwrap();
12+
url.extend_path_segments(&["bar", "b/az"]).unwrap();
13+
assert_eq!(url.as_str(), "http://example.net/foo");
14+
}

src/percent_encoding.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,39 @@
66
// option. This file may not be copied, modified, or distributed
77
// except according to those terms.
88

9+
//! URLs use special chacters to indicate the parts of the request. For example, a forward slash
10+
//! indicates a path. In order for that charcter to exist outside of a path separator, that
11+
//! charcter would need to be encoded.
12+
//!
13+
//! Percent encoding replaces reserved charcters with the `%` escape charcter followed by hexidecimal
14+
//! ASCII representaton. For non-ASCII charcters that are percent encoded, a UTF-8 byte sequence
15+
//! becomes percent encoded. A simple example can be seen when the space literal is replaced with
16+
//! `%20`.
17+
//!
18+
//! Percent encoding is further complicated by the fact that different parts of the URI have
19+
//! different encoding requirements. In order to support the variety of encoding requirements,
20+
//! `url::percent_encoding` includes encoding sets that are defined in [IETF RFC 3986][rfc] and
21+
//! updated through the [Living Standard][living].
22+
//!
23+
//! [`url::percent_encoding::EncodeSet`](trait.EncodeSet.html) Trait allows a sequence of bytes
24+
//! to be converted to a percent encoded sequence of bytes stripped of particular reserved
25+
//! characters. This trait is applied to the `*_ENCODE_SET` structs. If your application requires
26+
//! custom set of Encoding, see [`define_encode_set!`](../macro.define_encode_set!.html) macro.
27+
//!
28+
//! # Examples
29+
//!
30+
//! ```
31+
//! extern crate url;
32+
//! use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET};
33+
//!
34+
//! //prints "foo%20bar%3F"
35+
//! # fn main() {
36+
//! println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::<String>());
37+
//! # }
38+
//! ```
39+
//! [rfc]:https://tools.ietf.org/html/rfc3986
40+
//! [living]:https://url.spec.whatwg.org
41+
942
use encoding;
1043
use std::ascii::AsciiExt;
1144
use std::borrow::Cow;
@@ -77,6 +110,9 @@ macro_rules! define_encode_set {
77110
}
78111

79112
/// This encode set is used for the path of cannot-be-a-base URLs.
113+
///
114+
/// All ASCII charcters less than hexidecimal 20 and greater than 7E are encoded. This includes
115+
/// special charcters such as line feed, carriage return, NULL, etc.
80116
#[derive(Copy, Clone, Debug)]
81117
#[allow(non_camel_case_types)]
82118
pub struct SIMPLE_ENCODE_SET;
@@ -90,21 +126,39 @@ impl EncodeSet for SIMPLE_ENCODE_SET {
90126

91127
define_encode_set! {
92128
/// This encode set is used in the URL parser for query strings.
129+
///
130+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
131+
/// space, double quote ("), hash (#), and inequality qualifiers (<), (>) are encoded.
93132
pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
94133
}
95134

96135
define_encode_set! {
97136
/// This encode set is used for path components.
137+
///
138+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
139+
/// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
140+
/// question mark (?), and curly brackets ({), (}) are encoded.
98141
pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'}
99142
}
100143

101144
define_encode_set! {
102145
/// This encode set is used for on '/'-separated path segment
146+
///
147+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
148+
/// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
149+
/// question mark (?), and curly brackets ({), (}), percent sign (%), forward slash (/) are
150+
/// encoded.
103151
pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'}
104152
}
105153

106154
define_encode_set! {
107155
/// This encode set is used for username and password.
156+
///
157+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
158+
/// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
159+
/// question mark (?), and curly brackets ({), (}), forward slash (/), colon (:), semi-colon (;),
160+
/// equality (=), at (@), backslash (\\), square brackets ([), (]), caret (\^), and pipe (|) are
161+
/// encoded.
108162
pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | {
109163
'/', ':', ';', '=', '@', '[', '\\', ']', '^', '|'
110164
}
@@ -113,6 +167,21 @@ define_encode_set! {
113167
/// Return the percent-encoding of the given bytes.
114168
///
115169
/// This is unconditional, unlike `percent_encode()` which uses an encode set.
170+
///
171+
/// # Examples
172+
///
173+
/// ```
174+
/// extern crate url;
175+
/// use url::percent_encoding::percent_encode_byte;
176+
///
177+
/// //prints %66%6F%6F%20%62%61%72
178+
/// # fn main() {
179+
/// let sample = b"foo bar";
180+
/// for character in sample {
181+
/// print!("{}", percent_encode_byte(*character));
182+
/// }
183+
/// # }
184+
/// ```
116185
pub fn percent_encode_byte(byte: u8) -> &'static str {
117186
let index = usize::from(byte) * 3;
118187
&"\
@@ -146,6 +215,18 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
146215
/// that also implements `Display` and `Into<Cow<str>>`.
147216
/// The latter returns `Cow::Borrowed` when none of the bytes in `input`
148217
/// are in the given encode set.
218+
///
219+
/// # Examples
220+
///
221+
/// ```
222+
/// extern crate url;
223+
/// use url::percent_encoding::{percent_encode, DEFAULT_ENCODE_SET};
224+
///
225+
/// //prints foo%20bar%3F
226+
/// # fn main() {
227+
/// println!("{}", percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).collect::<String>());
228+
/// # }
229+
/// ```
149230
#[inline]
150231
pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncode<E> {
151232
PercentEncode {
@@ -157,6 +238,18 @@ pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncod
157238
/// Percent-encode the UTF-8 encoding of the given string.
158239
///
159240
/// See `percent_encode()` for how to use the return value.
241+
///
242+
/// # Examples
243+
///
244+
/// ```
245+
/// extern crate url;
246+
/// use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET};
247+
///
248+
/// //prints "foo%20bar%3F"
249+
/// # fn main() {
250+
/// println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::<String>());
251+
/// # }
252+
/// ```
160253
#[inline]
161254
pub fn utf8_percent_encode<E: EncodeSet>(input: &str, encode_set: E) -> PercentEncode<E> {
162255
percent_encode(input.as_bytes(), encode_set)
@@ -241,6 +334,22 @@ impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
241334
/// that also implements `Into<Cow<u8>>`
242335
/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence)
243336
/// and has `decode_utf8()` and `decode_utf8_lossy()` methods.
337+
///
338+
/// # Examples
339+
///
340+
/// ```
341+
/// extern crate url;
342+
/// use url::percent_encoding::percent_decode;
343+
///
344+
/// //prints "foo bar?"
345+
/// # fn run() -> Result<(), std::str::Utf8Error> {
346+
/// println!("{}", percent_decode(b"foo%20bar%3F").decode_utf8()?);
347+
/// # Ok( () )
348+
/// # }
349+
/// # fn main() {
350+
/// # run().unwrap();
351+
/// # }
352+
/// ```
244353
#[inline]
245354
pub fn percent_decode(input: &[u8]) -> PercentDecode {
246355
PercentDecode {

0 commit comments

Comments
 (0)