6
6
// option. This file may not be copied, modified, or distributed
7
7
// except according to those terms.
8
8
9
+ //! URLs use special chacters to indicate the parts of the request. For example, a forward slash
10
+ //! indicates a path. In order for that charcter to exist outside of a path separator, that
11
+ //! charcter would need to be encoded.
12
+ //!
13
+ //! Percent encoding replaces reserved charcters with the `%` escape charcter followed by hexidecimal
14
+ //! ASCII representaton. For non-ASCII charcters that are percent encoded, a UTF-8 byte sequence
15
+ //! becomes percent encoded. A simple example can be seen when the space literal is replaced with
16
+ //! `%20`.
17
+ //!
18
+ //! Percent encoding is further complicated by the fact that different parts of the URI have
19
+ //! different encoding requirements. In order to support the variety of encoding requirements,
20
+ //! `url::percent_encoding` includes encoding sets that are defined in [IETF RFC 3986][rfc] and
21
+ //! updated through the [Living Standard][living].
22
+ //!
23
+ //! [`url::percent_encoding::EncodeSet`](trait.EncodeSet.html) Trait allows a sequence of bytes
24
+ //! to be converted to a percent encoded sequence of bytes stripped of particular reserved
25
+ //! characters. This trait is applied to the `*_ENCODE_SET` structs. If your application requires
26
+ //! custom set of Encoding, see [`define_encode_set!`](../macro.define_encode_set!.html) macro.
27
+ //!
28
+ //! # Examples
29
+ //!
30
+ //! ```
31
+ //! extern crate url;
32
+ //! use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET};
33
+ //!
34
+ //! //prints "foo%20bar%3F"
35
+ //! # fn main() {
36
+ //! println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::<String>());
37
+ //! # }
38
+ //! ```
39
+ //! [rfc]:https://tools.ietf.org/html/rfc3986
40
+ //! [living]:https://url.spec.whatwg.org
41
+
9
42
use encoding;
10
43
use std:: ascii:: AsciiExt ;
11
44
use std:: borrow:: Cow ;
@@ -77,6 +110,9 @@ macro_rules! define_encode_set {
77
110
}
78
111
79
112
/// This encode set is used for the path of cannot-be-a-base URLs.
113
+ ///
114
+ /// All ASCII charcters less than hexidecimal 20 and greater than 7E are encoded. This includes
115
+ /// special charcters such as line feed, carriage return, NULL, etc.
80
116
#[ derive( Copy , Clone , Debug ) ]
81
117
#[ allow( non_camel_case_types) ]
82
118
pub struct SIMPLE_ENCODE_SET ;
@@ -90,21 +126,39 @@ impl EncodeSet for SIMPLE_ENCODE_SET {
90
126
91
127
define_encode_set ! {
92
128
/// This encode set is used in the URL parser for query strings.
129
+ ///
130
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
131
+ /// space, double quote ("), hash (#), and inequality qualifiers (<), (>) are encoded.
93
132
pub QUERY_ENCODE_SET = [ SIMPLE_ENCODE_SET ] | { ' ' , '"' , '#' , '<' , '>' }
94
133
}
95
134
96
135
define_encode_set ! {
97
136
/// This encode set is used for path components.
137
+ ///
138
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
139
+ /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
140
+ /// question mark (?), and curly brackets ({), (}) are encoded.
98
141
pub DEFAULT_ENCODE_SET = [ QUERY_ENCODE_SET ] | { '`' , '?' , '{' , '}' }
99
142
}
100
143
101
144
define_encode_set ! {
102
145
/// This encode set is used for on '/'-separated path segment
146
+ ///
147
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
148
+ /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
149
+ /// question mark (?), and curly brackets ({), (}), percent sign (%), forward slash (/) are
150
+ /// encoded.
103
151
pub PATH_SEGMENT_ENCODE_SET = [ DEFAULT_ENCODE_SET ] | { '%' , '/' }
104
152
}
105
153
106
154
define_encode_set ! {
107
155
/// This encode set is used for username and password.
156
+ ///
157
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
158
+ /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
159
+ /// question mark (?), and curly brackets ({), (}), forward slash (/), colon (:), semi-colon (;),
160
+ /// equality (=), at (@), backslash (\\), square brackets ([), (]), caret (\^), and pipe (|) are
161
+ /// encoded.
108
162
pub USERINFO_ENCODE_SET = [ DEFAULT_ENCODE_SET ] | {
109
163
'/' , ':' , ';' , '=' , '@' , '[' , '\\' , ']' , '^' , '|'
110
164
}
@@ -113,6 +167,21 @@ define_encode_set! {
113
167
/// Return the percent-encoding of the given bytes.
114
168
///
115
169
/// This is unconditional, unlike `percent_encode()` which uses an encode set.
170
+ ///
171
+ /// # Examples
172
+ ///
173
+ /// ```
174
+ /// extern crate url;
175
+ /// use url::percent_encoding::percent_encode_byte;
176
+ ///
177
+ /// //prints %66%6F%6F%20%62%61%72
178
+ /// # fn main() {
179
+ /// let sample = b"foo bar";
180
+ /// for character in sample {
181
+ /// print!("{}", percent_encode_byte(*character));
182
+ /// }
183
+ /// # }
184
+ /// ```
116
185
pub fn percent_encode_byte ( byte : u8 ) -> & ' static str {
117
186
let index = usize:: from ( byte) * 3 ;
118
187
& "\
@@ -146,6 +215,18 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
146
215
/// that also implements `Display` and `Into<Cow<str>>`.
147
216
/// The latter returns `Cow::Borrowed` when none of the bytes in `input`
148
217
/// are in the given encode set.
218
+ ///
219
+ /// # Examples
220
+ ///
221
+ /// ```
222
+ /// extern crate url;
223
+ /// use url::percent_encoding::{percent_encode, DEFAULT_ENCODE_SET};
224
+ ///
225
+ /// //prints foo%20bar%3F
226
+ /// # fn main() {
227
+ /// println!("{}", percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).collect::<String>());
228
+ /// # }
229
+ /// ```
149
230
#[ inline]
150
231
pub fn percent_encode < E : EncodeSet > ( input : & [ u8 ] , encode_set : E ) -> PercentEncode < E > {
151
232
PercentEncode {
@@ -157,6 +238,18 @@ pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncod
157
238
/// Percent-encode the UTF-8 encoding of the given string.
158
239
///
159
240
/// See `percent_encode()` for how to use the return value.
241
+ ///
242
+ /// # Examples
243
+ ///
244
+ /// ```
245
+ /// extern crate url;
246
+ /// use url::percent_encoding::{utf8_percent_encode, QUERY_ENCODE_SET};
247
+ ///
248
+ /// //prints "foo%20bar%3F"
249
+ /// # fn main() {
250
+ /// println!("{}", utf8_percent_encode("foo bar?", QUERY_ENCODE_SET).collect::<String>());
251
+ /// # }
252
+ /// ```
160
253
#[ inline]
161
254
pub fn utf8_percent_encode < E : EncodeSet > ( input : & str , encode_set : E ) -> PercentEncode < E > {
162
255
percent_encode ( input. as_bytes ( ) , encode_set)
@@ -241,6 +334,22 @@ impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
241
334
/// that also implements `Into<Cow<u8>>`
242
335
/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence)
243
336
/// and has `decode_utf8()` and `decode_utf8_lossy()` methods.
337
+ ///
338
+ /// # Examples
339
+ ///
340
+ /// ```
341
+ /// extern crate url;
342
+ /// use url::percent_encoding::percent_decode;
343
+ ///
344
+ /// //prints "foo bar?"
345
+ /// # fn run() -> Result<(), std::str::Utf8Error> {
346
+ /// println!("{}", percent_decode(b"foo%20bar%3F").decode_utf8()?);
347
+ /// # Ok( () )
348
+ /// # }
349
+ /// # fn main() {
350
+ /// # run().unwrap();
351
+ /// # }
352
+ /// ```
244
353
#[ inline]
245
354
pub fn percent_decode ( input : & [ u8 ] ) -> PercentDecode {
246
355
PercentDecode {
0 commit comments