-
Notifications
You must be signed in to change notification settings - Fork 123
Improvements to the speed of latin1_to_string. #587
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Narfinger
wants to merge
3
commits into
servo:main
Choose a base branch
from
Narfinger:latin1-improvements
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
use criterion::measurement::WallTime; | ||
use criterion::{ | ||
criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, Throughput, | ||
}; | ||
use mozjs::conversions::jsstr_to_string; | ||
use mozjs::glue::{CreateJSExternalStringCallbacks, JSExternalStringCallbacksTraps}; | ||
use mozjs::jsapi::{ | ||
JSAutoRealm, JS_NewExternalStringLatin1, JS_NewGlobalObject, OnNewGlobalHookOption, | ||
}; | ||
use mozjs::rooted; | ||
use mozjs::rust::{JSEngine, RealmOptions, Runtime, SIMPLE_GLOBAL_CLASS}; | ||
use mozjs_sys::jsapi::JSContext; | ||
use std::ffi::c_void; | ||
use std::{iter, ptr}; | ||
|
||
// Todo make a trait for creating a latin1 str of a required length, so that we can | ||
// try different kinds of content. | ||
fn bench_str_repetition( | ||
group: &mut BenchmarkGroup<WallTime>, | ||
context: *mut JSContext, | ||
variant_name: &str, | ||
latin1str_16_bytes: &[u8], | ||
) { | ||
assert_eq!(latin1str_16_bytes.len(), 16); | ||
for repetitions in [1, 4, 16, 64, 256, 1024, 4096].iter() { | ||
let str_len = repetitions * latin1str_16_bytes.len(); | ||
let latin1_base = iter::repeat_n(latin1str_16_bytes, *repetitions).fold( | ||
Vec::with_capacity(str_len), | ||
|mut acc, x| { | ||
acc.extend_from_slice(x); | ||
acc | ||
}, | ||
); | ||
let latin1_boxed = latin1_base.into_boxed_slice(); | ||
let latin1_chars = Box::into_raw(latin1_boxed).cast::<u8>(); | ||
let callbacks = unsafe { | ||
CreateJSExternalStringCallbacks( | ||
&EXTERNAL_STRING_CALLBACKS_TRAPS, | ||
str_len as *mut c_void, | ||
) | ||
}; | ||
rooted!(in(context) let latin1_jsstr = unsafe { JS_NewExternalStringLatin1( | ||
context, | ||
latin1_chars, | ||
str_len, | ||
callbacks | ||
)}); | ||
group.throughput(Throughput::Bytes(str_len as u64)); | ||
group.bench_with_input( | ||
BenchmarkId::new(variant_name, str_len), | ||
&latin1_jsstr, | ||
|b, js_str| { | ||
b.iter(|| { | ||
unsafe { jsstr_to_string(context, js_str.get()) }; | ||
}) | ||
}, | ||
); | ||
} | ||
} | ||
fn external_string(c: &mut Criterion) { | ||
let engine = JSEngine::init().unwrap(); | ||
let runtime = Runtime::new(engine.handle()); | ||
let context = runtime.cx(); | ||
let h_option = OnNewGlobalHookOption::FireOnNewGlobalHook; | ||
let c_option = RealmOptions::default(); | ||
rooted!(in(context) let global = unsafe { JS_NewGlobalObject( | ||
context, | ||
&SIMPLE_GLOBAL_CLASS, | ||
ptr::null_mut(), | ||
h_option, | ||
&*c_option, | ||
)}); | ||
let _ac = JSAutoRealm::new(context, global.get()); | ||
|
||
let mut group = c.benchmark_group("Latin1 conversion"); | ||
|
||
let ascii_example = b"test latin-1 tes"; | ||
bench_str_repetition(&mut group, context, "ascii a-z", ascii_example); | ||
// fastpath for the first few characters, then slowpath for the remaining (long part) | ||
// todo: make generator functions, so we can define at which percentage of the size | ||
// the first high byte shows up (which forces the slow path). | ||
let ascii_with_high = b"test latin-1 \xD6\xC0\xFF"; | ||
bench_str_repetition(&mut group, context, "ascii with high", ascii_with_high); | ||
} | ||
|
||
static EXTERNAL_STRING_CALLBACKS_TRAPS: JSExternalStringCallbacksTraps = | ||
JSExternalStringCallbacksTraps { | ||
latin1Finalize: Some(latin1::finalize), | ||
latin1SizeOfBuffer: Some(latin1::size_of), | ||
utf16Finalize: Some(utf16::finalize), | ||
utf16SizeOfBuffer: Some(utf16::size_of), | ||
}; | ||
|
||
mod latin1 { | ||
use std::ffi::c_void; | ||
use std::slice; | ||
|
||
use mozjs::jsapi::mozilla::MallocSizeOf; | ||
|
||
pub unsafe extern "C" fn finalize(data: *const c_void, chars: *mut u8) { | ||
let slice = slice::from_raw_parts_mut(chars, data as usize); | ||
let _ = Box::from_raw(slice); | ||
} | ||
|
||
pub unsafe extern "C" fn size_of(data: *const c_void, _: *const u8, _: MallocSizeOf) -> usize { | ||
data as usize | ||
} | ||
} | ||
|
||
mod utf16 { | ||
use std::ffi::c_void; | ||
use std::slice; | ||
|
||
use mozjs::jsapi::mozilla::MallocSizeOf; | ||
|
||
pub unsafe extern "C" fn finalize(data: *const c_void, chars: *mut u16) { | ||
let slice = slice::from_raw_parts_mut(chars, data as usize); | ||
let _ = Box::from_raw(slice); | ||
} | ||
|
||
pub unsafe extern "C" fn size_of(data: *const c_void, _: *const u16, _: MallocSizeOf) -> usize { | ||
data as usize | ||
} | ||
} | ||
|
||
criterion_group!(benches, external_string); | ||
criterion_main!(benches); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is undefined behavior. See Safety requirements of set_len
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is some existing discussion over at encoding_rs hsivonen/encoding_rs#79