Skip to content

Commit 20bebc2

Browse files
committed
Escape backslash in special URL path components
Closes #468
1 parent a07eac0 commit 20bebc2

File tree

3 files changed

+39
-3
lines changed

3 files changed

+39
-3
lines changed

percent_encoding/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,11 @@ define_encode_set! {
140140
/// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
141141
/// question mark (?), and curly brackets ({), (}), percent sign (%), forward slash (/) are
142142
/// encoded.
143+
///
144+
/// # Note
145+
///
146+
/// For [special URLs](https://url.spec.whatwg.org/#is-special), the backslash (\) character should
147+
/// additionally be escaped, but that is *not* included in this encode set.
143148
pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'}
144149
}
145150

src/parser.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,24 @@ use host::{Host, HostInternal};
1919
use percent_encoding::{
2020
utf8_percent_encode, percent_encode,
2121
SIMPLE_ENCODE_SET, DEFAULT_ENCODE_SET, USERINFO_ENCODE_SET, QUERY_ENCODE_SET,
22-
PATH_SEGMENT_ENCODE_SET
22+
PATH_SEGMENT_ENCODE_SET, EncodeSet
2323
};
2424

25+
// The backslash (\) character is treated as a path separator in special URLs
26+
// so it needs to be additionally escaped in that case.
27+
#[derive(Clone)]
28+
struct SPECIAL_PATH_SEGMENT_ENCODE_SET;
29+
30+
impl EncodeSet for SPECIAL_PATH_SEGMENT_ENCODE_SET {
31+
#[inline]
32+
fn contains(&self, byte: u8) -> bool {
33+
match byte {
34+
b'\\' => true,
35+
_ => PATH_SEGMENT_ENCODE_SET.contains(byte)
36+
}
37+
}
38+
}
39+
2540
pub type ParseResult<T> = Result<T, ParseError>;
2641

2742
macro_rules! simple_enum_error {
@@ -1011,8 +1026,13 @@ impl<'a> Parser<'a> {
10111026
_ => {
10121027
self.check_url_code_point(c, &input);
10131028
if self.context == Context::PathSegmentSetter {
1014-
self.serialization.extend(utf8_percent_encode(
1015-
utf8_c, PATH_SEGMENT_ENCODE_SET));
1029+
if scheme_type.is_special() {
1030+
self.serialization.extend(utf8_percent_encode(
1031+
utf8_c, SPECIAL_PATH_SEGMENT_ENCODE_SET));
1032+
} else {
1033+
self.serialization.extend(utf8_percent_encode(
1034+
utf8_c, PATH_SEGMENT_ENCODE_SET));
1035+
}
10161036
} else {
10171037
self.serialization.extend(utf8_percent_encode(
10181038
utf8_c, DEFAULT_ENCODE_SET));

tests/unit.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,17 @@ fn new_directory_paths() {
109109
}
110110
}
111111

112+
#[test]
113+
fn path_backslash_fun() {
114+
let mut special_url = "http://foobar.com".parse::<Url>().unwrap();
115+
special_url.path_segments_mut().unwrap().push("foo\\bar");
116+
assert_eq!(special_url.as_str(), "http://foobar.com/foo%5Cbar");
117+
118+
let mut nonspecial_url = "thing://foobar.com".parse::<Url>().unwrap();
119+
nonspecial_url.path_segments_mut().unwrap().push("foo\\bar");
120+
assert_eq!(nonspecial_url.as_str(), "thing://foobar.com/foo\\bar");
121+
}
122+
112123
#[test]
113124
fn from_str() {
114125
assert!("http://testing.com/this".parse::<Url>().is_ok());

0 commit comments

Comments
 (0)