Skip to content

Commit 267afa5

Browse files
authored
perf: improve to_file_path() (servo#1018)
* perf: improve to_file_path() * fix * make estimate more exact * use saturating_sub * better to try_reserve
1 parent 79ff014 commit 267afa5

File tree

2 files changed

+62
-13
lines changed

2 files changed

+62
-13
lines changed

url/benches/parse_url.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,19 @@ fn punycode_rtl(bench: &mut Bencher) {
9696
bench.iter(|| black_box(url).parse::<Url>().unwrap());
9797
}
9898

99+
fn url_to_file_path(bench: &mut Bencher) {
100+
let url = if cfg!(windows) {
101+
"file:///C:/dir/next_dir/sub_sub_dir/testing/testing.json"
102+
} else {
103+
"file:///data/dir/next_dir/sub_sub_dir/testing/testing.json"
104+
};
105+
let url = url.parse::<Url>().unwrap();
106+
107+
bench.iter(|| {
108+
black_box(url.to_file_path().unwrap());
109+
});
110+
}
111+
99112
benchmark_group!(
100113
benches,
101114
short,
@@ -111,5 +124,6 @@ benchmark_group!(
111124
punycode_ltr,
112125
unicode_rtl,
113126
punycode_rtl,
127+
url_to_file_path
114128
);
115129
benchmark_main!(benches);

url/src/lib.rs

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,7 +2720,26 @@ impl Url {
27202720
_ => return Err(()),
27212721
};
27222722

2723-
return file_url_segments_to_pathbuf(host, segments);
2723+
let str_len = self.as_str().len();
2724+
let estimated_capacity = if cfg!(target_os = "redox") {
2725+
let scheme_len = self.scheme().len();
2726+
let file_scheme_len = "file".len();
2727+
// remove only // because it still has file:
2728+
if scheme_len < file_scheme_len {
2729+
let scheme_diff = file_scheme_len - scheme_len;
2730+
(str_len + scheme_diff).saturating_sub(2)
2731+
} else {
2732+
let scheme_diff = scheme_len - file_scheme_len;
2733+
str_len.saturating_sub(scheme_diff + 2)
2734+
}
2735+
} else if cfg!(windows) {
2736+
// remove scheme: - has posssible \\ for hostname
2737+
str_len.saturating_sub(self.scheme().len() + 1)
2738+
} else {
2739+
// remove scheme://
2740+
str_len.saturating_sub(self.scheme().len() + 3)
2741+
};
2742+
return file_url_segments_to_pathbuf(estimated_capacity, host, segments);
27242743
}
27252744
Err(())
27262745
}
@@ -3030,6 +3049,7 @@ fn path_to_file_url_segments_windows(
30303049
any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
30313050
))]
30323051
fn file_url_segments_to_pathbuf(
3052+
estimated_capacity: usize,
30333053
host: Option<&str>,
30343054
segments: str::Split<'_, char>,
30353055
) -> Result<PathBuf, ()> {
@@ -3047,11 +3067,11 @@ fn file_url_segments_to_pathbuf(
30473067
return Err(());
30483068
}
30493069

3050-
let mut bytes = if cfg!(target_os = "redox") {
3051-
b"file:".to_vec()
3052-
} else {
3053-
Vec::new()
3054-
};
3070+
let mut bytes = Vec::new();
3071+
bytes.try_reserve(estimated_capacity).map_err(|_| ())?;
3072+
if cfg!(target_os = "redox") {
3073+
bytes.extend(b"file:");
3074+
}
30553075

30563076
for segment in segments {
30573077
bytes.push(b'/');
@@ -3083,22 +3103,27 @@ fn file_url_segments_to_pathbuf(
30833103

30843104
#[cfg(all(feature = "std", windows))]
30853105
fn file_url_segments_to_pathbuf(
3106+
estimated_capacity: usize,
30863107
host: Option<&str>,
30873108
segments: str::Split<char>,
30883109
) -> Result<PathBuf, ()> {
3089-
file_url_segments_to_pathbuf_windows(host, segments)
3110+
file_url_segments_to_pathbuf_windows(estimated_capacity, host, segments)
30903111
}
30913112

30923113
// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
30933114
#[cfg(feature = "std")]
30943115
#[cfg_attr(not(windows), allow(dead_code))]
30953116
fn file_url_segments_to_pathbuf_windows(
3117+
estimated_capacity: usize,
30963118
host: Option<&str>,
30973119
mut segments: str::Split<'_, char>,
30983120
) -> Result<PathBuf, ()> {
3099-
use percent_encoding::percent_decode;
3100-
let mut string = if let Some(host) = host {
3101-
r"\\".to_owned() + host
3121+
use percent_encoding::percent_decode_str;
3122+
let mut string = String::new();
3123+
string.try_reserve(estimated_capacity).map_err(|_| ())?;
3124+
if let Some(host) = host {
3125+
string.push_str(r"\\");
3126+
string.push_str(host);
31023127
} else {
31033128
let first = segments.next().ok_or(())?;
31043129

@@ -3108,7 +3133,7 @@ fn file_url_segments_to_pathbuf_windows(
31083133
return Err(());
31093134
}
31103135

3111-
first.to_owned()
3136+
string.push_str(first);
31123137
}
31133138

31143139
4 => {
@@ -3120,7 +3145,8 @@ fn file_url_segments_to_pathbuf_windows(
31203145
return Err(());
31213146
}
31223147

3123-
first[0..1].to_owned() + ":"
3148+
string.push_str(&first[0..1]);
3149+
string.push(':');
31243150
}
31253151

31263152
_ => return Err(()),
@@ -3131,11 +3157,20 @@ fn file_url_segments_to_pathbuf_windows(
31313157
string.push('\\');
31323158

31333159
// Currently non-unicode windows paths cannot be represented
3134-
match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3160+
match percent_decode_str(segment).decode_utf8() {
31353161
Ok(s) => string.push_str(&s),
31363162
Err(..) => return Err(()),
31373163
}
31383164
}
3165+
// ensure our estimated capacity was good
3166+
if cfg!(test) {
3167+
debug_assert!(
3168+
string.len() <= estimated_capacity,
3169+
"len: {}, capacity: {}",
3170+
string.len(),
3171+
estimated_capacity
3172+
);
3173+
}
31393174
let path = PathBuf::from(string);
31403175
debug_assert!(
31413176
path.is_absolute(),

0 commit comments

Comments
 (0)