Skip to content

Commit adb8660

Browse files
authored
perf: url encode path segments in longer string slices (#1026)
* perf: url encode path segments in longer string slices * stable rust * revert quote change * Revert this too. Signed-off-by: David Sherret <dsherret@users.noreply.github.com> --------- Signed-off-by: David Sherret <dsherret@users.noreply.github.com>
1 parent 39a1201 commit adb8660

File tree

2 files changed

+75
-14
lines changed

2 files changed

+75
-14
lines changed

url/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3061,7 +3061,6 @@ fn file_url_segments_to_pathbuf(
30613061
use std::os::hermit::ffi::OsStrExt;
30623062
#[cfg(any(unix, target_os = "redox"))]
30633063
use std::os::unix::prelude::OsStrExt;
3064-
use std::path::PathBuf;
30653064

30663065
if host.is_some() {
30673066
return Err(());

url/src/parser.rs

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,32 +1191,96 @@ impl<'a> Parser<'a> {
11911191
path_start: usize,
11921192
mut input: Input<'i>,
11931193
) -> Input<'i> {
1194+
// it's much faster to call utf8_percent_encode in bulk
1195+
fn push_pending(
1196+
serialization: &mut String,
1197+
start_str: &str,
1198+
remaining_len: usize,
1199+
context: Context,
1200+
scheme_type: SchemeType,
1201+
) {
1202+
let text = &start_str[..start_str.len() - remaining_len];
1203+
if text.is_empty() {
1204+
return;
1205+
}
1206+
if context == Context::PathSegmentSetter {
1207+
if scheme_type.is_special() {
1208+
serialization.extend(utf8_percent_encode(text, SPECIAL_PATH_SEGMENT));
1209+
} else {
1210+
serialization.extend(utf8_percent_encode(text, PATH_SEGMENT));
1211+
}
1212+
} else {
1213+
serialization.extend(utf8_percent_encode(text, PATH));
1214+
}
1215+
}
1216+
11941217
// Relative path state
11951218
loop {
11961219
let mut segment_start = self.serialization.len();
11971220
let mut ends_with_slash = false;
1221+
let mut start_str = input.chars.as_str();
11981222
loop {
11991223
let input_before_c = input.clone();
1200-
let (c, utf8_c) = if let Some(x) = input.next_utf8() {
1201-
x
1224+
// bypass input.next() and manually handle ascii_tab_or_new_line
1225+
// in order to encode string slices in bulk
1226+
let c = if let Some(c) = input.chars.next() {
1227+
c
12021228
} else {
1229+
push_pending(
1230+
&mut self.serialization,
1231+
start_str,
1232+
0,
1233+
self.context,
1234+
scheme_type,
1235+
);
12031236
break;
12041237
};
12051238
match c {
1239+
ascii_tab_or_new_line_pattern!() => {
1240+
push_pending(
1241+
&mut self.serialization,
1242+
start_str,
1243+
input_before_c.chars.as_str().len(),
1244+
self.context,
1245+
scheme_type,
1246+
);
1247+
start_str = input.chars.as_str();
1248+
}
12061249
'/' if self.context != Context::PathSegmentSetter => {
1250+
push_pending(
1251+
&mut self.serialization,
1252+
start_str,
1253+
input_before_c.chars.as_str().len(),
1254+
self.context,
1255+
scheme_type,
1256+
);
12071257
self.serialization.push(c);
12081258
ends_with_slash = true;
12091259
break;
12101260
}
12111261
'\\' if self.context != Context::PathSegmentSetter
12121262
&& scheme_type.is_special() =>
12131263
{
1264+
push_pending(
1265+
&mut self.serialization,
1266+
start_str,
1267+
input_before_c.chars.as_str().len(),
1268+
self.context,
1269+
scheme_type,
1270+
);
12141271
self.log_violation(SyntaxViolation::Backslash);
12151272
self.serialization.push('/');
12161273
ends_with_slash = true;
12171274
break;
12181275
}
12191276
'?' | '#' if self.context == Context::UrlParser => {
1277+
push_pending(
1278+
&mut self.serialization,
1279+
start_str,
1280+
input_before_c.chars.as_str().len(),
1281+
self.context,
1282+
scheme_type,
1283+
);
12201284
input = input_before_c;
12211285
break;
12221286
}
@@ -1228,23 +1292,21 @@ impl<'a> Parser<'a> {
12281292
&self.serialization[path_start + 1..],
12291293
)
12301294
{
1295+
push_pending(
1296+
&mut self.serialization,
1297+
start_str,
1298+
input_before_c.chars.as_str().len(),
1299+
self.context,
1300+
scheme_type,
1301+
);
1302+
start_str = input_before_c.chars.as_str();
12311303
self.serialization.push('/');
12321304
segment_start += 1;
12331305
}
1234-
if self.context == Context::PathSegmentSetter {
1235-
if scheme_type.is_special() {
1236-
self.serialization
1237-
.extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT));
1238-
} else {
1239-
self.serialization
1240-
.extend(utf8_percent_encode(utf8_c, PATH_SEGMENT));
1241-
}
1242-
} else {
1243-
self.serialization.extend(utf8_percent_encode(utf8_c, PATH));
1244-
}
12451306
}
12461307
}
12471308
}
1309+
12481310
let segment_before_slash = if ends_with_slash {
12491311
&self.serialization[segment_start..self.serialization.len() - 1]
12501312
} else {

0 commit comments

Comments
 (0)