6
6
// option. This file may not be copied, modified, or distributed
7
7
// except according to those terms.
8
8
9
+ //! URLs use special chacters to indicate the parts of the request. For example, a forward slash
10
+ //! indicates a path. In order for that charcter to exist outside of a path separator, that
11
+ //! charcter would need to be encoded.
12
+ //!
13
+ //! Percent encoding replaces reserved charcters with the `%` escape charcter followed by hexidecimal
14
+ //! ASCII representaton. For non-ASCII charcters that are percent encoded, a UTF-8 byte sequence
15
+ //! becomes percent encoded. A simple example can be seen when the space literal is replaced with
16
+ //! `%20`.
17
+ //!
18
+ //! Percent encoding is further complicated by the fact that different parts of an URL have
19
+ //! different encoding requirements. In order to support the variety of encoding requirements,
20
+ //! `url::percent_encoding` includes different *encode sets*.
21
+ //! See [URL Standard](https://url.spec.whatwg.org/#percent-encoded-bytes) for details.
22
+ //!
23
+ //! This module provides some `*_ENCODE_SET` constants.
24
+ //! If a different set is required, it can be created with
25
+ //! the [`define_encode_set!`](../macro.define_encode_set!.html) macro.
26
+ //!
27
+ //! # Examples
28
+ //!
29
+ //! ```
30
+ //! use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET};
31
+ //!
32
+ //! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
33
+ //! ```
34
+
9
35
use encoding;
10
36
use std:: ascii:: AsciiExt ;
11
37
use std:: borrow:: Cow ;
@@ -77,6 +103,9 @@ macro_rules! define_encode_set {
77
103
}
78
104
79
105
/// This encode set is used for the path of cannot-be-a-base URLs.
106
+ ///
107
+ /// All ASCII charcters less than hexidecimal 20 and greater than 7E are encoded. This includes
108
+ /// special charcters such as line feed, carriage return, NULL, etc.
80
109
#[ derive( Copy , Clone , Debug ) ]
81
110
#[ allow( non_camel_case_types) ]
82
111
pub struct SIMPLE_ENCODE_SET ;
@@ -90,21 +119,39 @@ impl EncodeSet for SIMPLE_ENCODE_SET {
90
119
91
120
define_encode_set ! {
92
121
/// This encode set is used in the URL parser for query strings.
122
+ ///
123
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
124
+ /// space, double quote ("), hash (#), and inequality qualifiers (<), (>) are encoded.
93
125
pub QUERY_ENCODE_SET = [ SIMPLE_ENCODE_SET ] | { ' ' , '"' , '#' , '<' , '>' }
94
126
}
95
127
96
128
define_encode_set ! {
97
129
/// This encode set is used for path components.
130
+ ///
131
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
132
+ /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
133
+ /// question mark (?), and curly brackets ({), (}) are encoded.
98
134
pub DEFAULT_ENCODE_SET = [ QUERY_ENCODE_SET ] | { '`' , '?' , '{' , '}' }
99
135
}
100
136
101
137
define_encode_set ! {
102
138
/// This encode set is used for on '/'-separated path segment
139
+ ///
140
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
141
+ /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
142
+ /// question mark (?), and curly brackets ({), (}), percent sign (%), forward slash (/) are
143
+ /// encoded.
103
144
pub PATH_SEGMENT_ENCODE_SET = [ DEFAULT_ENCODE_SET ] | { '%' , '/' }
104
145
}
105
146
106
147
define_encode_set ! {
107
148
/// This encode set is used for username and password.
149
+ ///
150
+ /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
151
+ /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
152
+ /// question mark (?), and curly brackets ({), (}), forward slash (/), colon (:), semi-colon (;),
153
+ /// equality (=), at (@), backslash (\\), square brackets ([), (]), caret (\^), and pipe (|) are
154
+ /// encoded.
108
155
pub USERINFO_ENCODE_SET = [ DEFAULT_ENCODE_SET ] | {
109
156
'/' , ':' , ';' , '=' , '@' , '[' , '\\' , ']' , '^' , '|'
110
157
}
@@ -113,6 +160,15 @@ define_encode_set! {
113
160
/// Return the percent-encoding of the given bytes.
114
161
///
115
162
/// This is unconditional, unlike `percent_encode()` which uses an encode set.
163
+ ///
164
+ /// # Examples
165
+ ///
166
+ /// ```
167
+ /// use url::percent_encoding::percent_encode_byte;
168
+ ///
169
+ /// assert_eq!("foo bar".bytes().map(percent_encode_byte).collect::<String>(),
170
+ /// "%66%6F%6F%20%62%61%72");
171
+ /// ```
116
172
pub fn percent_encode_byte ( byte : u8 ) -> & ' static str {
117
173
let index = usize:: from ( byte) * 3 ;
118
174
& "\
@@ -146,6 +202,14 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
146
202
/// that also implements `Display` and `Into<Cow<str>>`.
147
203
/// The latter returns `Cow::Borrowed` when none of the bytes in `input`
148
204
/// are in the given encode set.
205
+ ///
206
+ /// # Examples
207
+ ///
208
+ /// ```
209
+ /// use url::percent_encoding::{percent_encode, DEFAULT_ENCODE_SET};
210
+ ///
211
+ /// assert_eq!(percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
212
+ /// ```
149
213
#[ inline]
150
214
pub fn percent_encode < E : EncodeSet > ( input : & [ u8 ] , encode_set : E ) -> PercentEncode < E > {
151
215
PercentEncode {
@@ -157,6 +221,14 @@ pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncod
157
221
/// Percent-encode the UTF-8 encoding of the given string.
158
222
///
159
223
/// See `percent_encode()` for how to use the return value.
224
+ ///
225
+ /// # Examples
226
+ ///
227
+ /// ```
228
+ /// use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET};
229
+ ///
230
+ /// assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
231
+ /// ```
160
232
#[ inline]
161
233
pub fn utf8_percent_encode < E : EncodeSet > ( input : & str , encode_set : E ) -> PercentEncode < E > {
162
234
percent_encode ( input. as_bytes ( ) , encode_set)
@@ -241,6 +313,14 @@ impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
241
313
/// that also implements `Into<Cow<u8>>`
242
314
/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence)
243
315
/// and has `decode_utf8()` and `decode_utf8_lossy()` methods.
316
+ ///
317
+ /// # Examples
318
+ ///
319
+ /// ```
320
+ /// use url::percent_encoding::percent_decode;
321
+ ///
322
+ /// assert_eq!(percent_decode(b"foo%20bar%3F").decode_utf8().unwrap(), "foo bar?");
323
+ /// ```
244
324
#[ inline]
245
325
pub fn percent_decode ( input : & [ u8 ] ) -> PercentDecode {
246
326
PercentDecode {
0 commit comments