Skip to content

Commit 52a45a1

Browse files
author
bors-servo
authored
Auto merge of #361 - servo:percent-encoding-docs, r=SimonSapin,nox
Percent encoding docs Fixes #298, fixes #355. This is a rebase of #355 with some additional changes. Original work by @AndyGauge. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/rust-url/361) <!-- Reviewable:end -->
2 parents 31b8d63 + ea2f97f commit 52a45a1

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

rust-url-todo

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
* standalone path parsing?
2+
* Test setters
3+
* Test trim C0/space
4+
* Test remove tab & newline
5+
6+
7+
8+
#[test]
9+
fn test_path_segments() {
10+
let mut url = Url::parse("http://example.net").unwrap();
11+
url.push_path_segment("foo").unwrap();
12+
url.extend_path_segments(&["bar", "b/az"]).unwrap();
13+
assert_eq!(url.as_str(), "http://example.net/foo");
14+
}

src/percent_encoding.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,32 @@
66
// option. This file may not be copied, modified, or distributed
77
// except according to those terms.
88

9+
//! URLs use special chacters to indicate the parts of the request. For example, a forward slash
10+
//! indicates a path. In order for that charcter to exist outside of a path separator, that
11+
//! charcter would need to be encoded.
12+
//!
13+
//! Percent encoding replaces reserved charcters with the `%` escape charcter followed by hexidecimal
14+
//! ASCII representaton. For non-ASCII charcters that are percent encoded, a UTF-8 byte sequence
15+
//! becomes percent encoded. A simple example can be seen when the space literal is replaced with
16+
//! `%20`.
17+
//!
18+
//! Percent encoding is further complicated by the fact that different parts of an URL have
19+
//! different encoding requirements. In order to support the variety of encoding requirements,
20+
//! `url::percent_encoding` includes different *encode sets*.
21+
//! See [URL Standard](https://url.spec.whatwg.org/#percent-encoded-bytes) for details.
22+
//!
23+
//! This module provides some `*_ENCODE_SET` constants.
24+
//! If a different set is required, it can be created with
25+
//! the [`define_encode_set!`](../macro.define_encode_set!.html) macro.
26+
//!
27+
//! # Examples
28+
//!
29+
//! ```
30+
//! use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET};
31+
//!
32+
//! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
33+
//! ```
34+
935
use encoding;
1036
use std::ascii::AsciiExt;
1137
use std::borrow::Cow;
@@ -77,6 +103,9 @@ macro_rules! define_encode_set {
77103
}
78104

79105
/// This encode set is used for the path of cannot-be-a-base URLs.
106+
///
107+
/// All ASCII charcters less than hexidecimal 20 and greater than 7E are encoded. This includes
108+
/// special charcters such as line feed, carriage return, NULL, etc.
80109
#[derive(Copy, Clone, Debug)]
81110
#[allow(non_camel_case_types)]
82111
pub struct SIMPLE_ENCODE_SET;
@@ -90,21 +119,39 @@ impl EncodeSet for SIMPLE_ENCODE_SET {
90119

91120
define_encode_set! {
92121
/// This encode set is used in the URL parser for query strings.
122+
///
123+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
124+
/// space, double quote ("), hash (#), and inequality qualifiers (<), (>) are encoded.
93125
pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
94126
}
95127

96128
define_encode_set! {
97129
/// This encode set is used for path components.
130+
///
131+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
132+
/// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
133+
/// question mark (?), and curly brackets ({), (}) are encoded.
98134
pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'}
99135
}
100136

101137
define_encode_set! {
102138
/// This encode set is used for on '/'-separated path segment
139+
///
140+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
141+
/// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
142+
/// question mark (?), and curly brackets ({), (}), percent sign (%), forward slash (/) are
143+
/// encoded.
103144
pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'}
104145
}
105146

106147
define_encode_set! {
107148
/// This encode set is used for username and password.
149+
///
150+
/// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
151+
/// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
152+
/// question mark (?), and curly brackets ({), (}), forward slash (/), colon (:), semi-colon (;),
153+
/// equality (=), at (@), backslash (\\), square brackets ([), (]), caret (\^), and pipe (|) are
154+
/// encoded.
108155
pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | {
109156
'/', ':', ';', '=', '@', '[', '\\', ']', '^', '|'
110157
}
@@ -113,6 +160,15 @@ define_encode_set! {
113160
/// Return the percent-encoding of the given bytes.
114161
///
115162
/// This is unconditional, unlike `percent_encode()` which uses an encode set.
163+
///
164+
/// # Examples
165+
///
166+
/// ```
167+
/// use url::percent_encoding::percent_encode_byte;
168+
///
169+
/// assert_eq!("foo bar".bytes().map(percent_encode_byte).collect::<String>(),
170+
/// "%66%6F%6F%20%62%61%72");
171+
/// ```
116172
pub fn percent_encode_byte(byte: u8) -> &'static str {
117173
let index = usize::from(byte) * 3;
118174
&"\
@@ -146,6 +202,14 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
146202
/// that also implements `Display` and `Into<Cow<str>>`.
147203
/// The latter returns `Cow::Borrowed` when none of the bytes in `input`
148204
/// are in the given encode set.
205+
///
206+
/// # Examples
207+
///
208+
/// ```
209+
/// use url::percent_encoding::{percent_encode, DEFAULT_ENCODE_SET};
210+
///
211+
/// assert_eq!(percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
212+
/// ```
149213
#[inline]
150214
pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncode<E> {
151215
PercentEncode {
@@ -157,6 +221,14 @@ pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncod
157221
/// Percent-encode the UTF-8 encoding of the given string.
158222
///
159223
/// See `percent_encode()` for how to use the return value.
224+
///
225+
/// # Examples
226+
///
227+
/// ```
228+
/// use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET};
229+
///
230+
/// assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
231+
/// ```
160232
#[inline]
161233
pub fn utf8_percent_encode<E: EncodeSet>(input: &str, encode_set: E) -> PercentEncode<E> {
162234
percent_encode(input.as_bytes(), encode_set)
@@ -241,6 +313,14 @@ impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
241313
/// that also implements `Into<Cow<u8>>`
242314
/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence)
243315
/// and has `decode_utf8()` and `decode_utf8_lossy()` methods.
316+
///
317+
/// # Examples
318+
///
319+
/// ```
320+
/// use url::percent_encoding::percent_decode;
321+
///
322+
/// assert_eq!(percent_decode(b"foo%20bar%3F").decode_utf8().unwrap(), "foo bar?");
323+
/// ```
244324
#[inline]
245325
pub fn percent_decode(input: &[u8]) -> PercentDecode {
246326
PercentDecode {

0 commit comments

Comments
 (0)