Skip to content

Improvements to the speed of latin1_to_string. #587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions mozjs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,19 @@ profilemozjs = ['mozjs_sys/profilemozjs']
crown = ['mozjs_sys/crown']

[dependencies]
encoding_rs = "0.8.35"
libc.workspace = true
log = "0.4"
mozjs_sys = { path = "../mozjs-sys" }

[dev-dependencies]
criterion = "0.6"

[build-dependencies]
cc.workspace = true
bindgen.workspace = true


[[bench]]
name = "latin1_string_conversion"
harness = false
127 changes: 127 additions & 0 deletions mozjs/benches/latin1_string_conversion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
use criterion::measurement::WallTime;
use criterion::{
criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, Throughput,
};
use mozjs::conversions::jsstr_to_string;
use mozjs::glue::{CreateJSExternalStringCallbacks, JSExternalStringCallbacksTraps};
use mozjs::jsapi::{
JSAutoRealm, JS_NewExternalStringLatin1, JS_NewGlobalObject, OnNewGlobalHookOption,
};
use mozjs::rooted;
use mozjs::rust::{JSEngine, RealmOptions, Runtime, SIMPLE_GLOBAL_CLASS};
use mozjs_sys::jsapi::JSContext;
use std::ffi::c_void;
use std::{iter, ptr};

// Todo make a trait for creating a latin1 str of a required length, so that we can
// try different kinds of content.
fn bench_str_repetition(
group: &mut BenchmarkGroup<WallTime>,
context: *mut JSContext,
variant_name: &str,
latin1str_16_bytes: &[u8],
) {
assert_eq!(latin1str_16_bytes.len(), 16);
for repetitions in [1, 4, 16, 64, 256, 1024, 4096].iter() {
let str_len = repetitions * latin1str_16_bytes.len();
let latin1_base = iter::repeat_n(latin1str_16_bytes, *repetitions).fold(
Vec::with_capacity(str_len),
|mut acc, x| {
acc.extend_from_slice(x);
acc
},
);
let latin1_boxed = latin1_base.into_boxed_slice();
let latin1_chars = Box::into_raw(latin1_boxed).cast::<u8>();
let callbacks = unsafe {
CreateJSExternalStringCallbacks(
&EXTERNAL_STRING_CALLBACKS_TRAPS,
str_len as *mut c_void,
)
};
rooted!(in(context) let latin1_jsstr = unsafe { JS_NewExternalStringLatin1(
context,
latin1_chars,
str_len,
callbacks
)});
group.throughput(Throughput::Bytes(str_len as u64));
group.bench_with_input(
BenchmarkId::new(variant_name, str_len),
&latin1_jsstr,
|b, js_str| {
b.iter(|| {
unsafe { jsstr_to_string(context, js_str.get()) };
})
},
);
}
}
fn external_string(c: &mut Criterion) {
let engine = JSEngine::init().unwrap();
let runtime = Runtime::new(engine.handle());
let context = runtime.cx();
let h_option = OnNewGlobalHookOption::FireOnNewGlobalHook;
let c_option = RealmOptions::default();
rooted!(in(context) let global = unsafe { JS_NewGlobalObject(
context,
&SIMPLE_GLOBAL_CLASS,
ptr::null_mut(),
h_option,
&*c_option,
)});
let _ac = JSAutoRealm::new(context, global.get());

let mut group = c.benchmark_group("Latin1 conversion");

let ascii_example = b"test latin-1 tes";
bench_str_repetition(&mut group, context, "ascii a-z", ascii_example);
// fastpath for the first few characters, then slowpath for the remaining (long part)
// todo: make generator functions, so we can define at which percentage of the size
// the first high byte shows up (which forces the slow path).
let ascii_with_high = b"test latin-1 \xD6\xC0\xFF";
bench_str_repetition(&mut group, context, "ascii with high", ascii_with_high);
}

static EXTERNAL_STRING_CALLBACKS_TRAPS: JSExternalStringCallbacksTraps =
JSExternalStringCallbacksTraps {
latin1Finalize: Some(latin1::finalize),
latin1SizeOfBuffer: Some(latin1::size_of),
utf16Finalize: Some(utf16::finalize),
utf16SizeOfBuffer: Some(utf16::size_of),
};

mod latin1 {
use std::ffi::c_void;
use std::slice;

use mozjs::jsapi::mozilla::MallocSizeOf;

pub unsafe extern "C" fn finalize(data: *const c_void, chars: *mut u8) {
let slice = slice::from_raw_parts_mut(chars, data as usize);
let _ = Box::from_raw(slice);
}

pub unsafe extern "C" fn size_of(data: *const c_void, _: *const u8, _: MallocSizeOf) -> usize {
data as usize
}
}

mod utf16 {
use std::ffi::c_void;
use std::slice;

use mozjs::jsapi::mozilla::MallocSizeOf;

pub unsafe extern "C" fn finalize(data: *const c_void, chars: *mut u16) {
let slice = slice::from_raw_parts_mut(chars, data as usize);
let _ = Box::from_raw(slice);
}

pub unsafe extern "C" fn size_of(data: *const c_void, _: *const u16, _: MallocSizeOf) -> usize {
data as usize
}
}

criterion_group!(benches, external_string);
criterion_main!(benches);
15 changes: 12 additions & 3 deletions mozjs/src/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ use log::debug;
use mozjs_sys::jsgc::Rooted;
use std::borrow::Cow;
use std::mem;
use std::mem::MaybeUninit;
use std::rc::Rc;
use std::{ptr, slice};

Expand Down Expand Up @@ -525,6 +526,16 @@ impl FromJSValConvertible for f64 {
}
}

/// Copies chars to the string
unsafe fn fast_copy(chars: &[u8]) -> String {
let mut v = Vec::with_capacity(chars.len() * 2);
v.set_len(chars.len() * 2);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is undefined behavior. See Safety requirements of set_len

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is some existing discussion over at encoding_rs hsivonen/encoding_rs#79

let real_size = encoding_rs::mem::convert_latin1_to_utf8(chars, v.as_mut_slice());

v.truncate(real_size);
String::from_utf8_unchecked(v)
}

/// Converts a `JSString`, encoded in "Latin1" (i.e. U+0000-U+00FF encoded as 0x00-0xFF) into a
/// `String`.
pub unsafe fn latin1_to_string(cx: *mut JSContext, s: *mut JSString) -> String {
Expand All @@ -535,9 +546,7 @@ pub unsafe fn latin1_to_string(cx: *mut JSContext, s: *mut JSString) -> String {
assert!(!chars.is_null());

let chars = slice::from_raw_parts(chars, length as usize);
let mut s = String::with_capacity(length as usize);
s.extend(chars.iter().map(|&c| c as char));
s
fast_copy(chars)
}

/// Converts a `JSString` into a `String`, regardless of used encoding.
Expand Down
89 changes: 76 additions & 13 deletions mozjs/tests/external_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,45 @@ fn external_string() {
));
let _ac = JSAutoRealm::new(context, global.get());

let latin1_base = "test latin-1";
let latin1_boxed = latin1_base.as_bytes().to_vec().into_boxed_slice();
let latin1_chars = Box::into_raw(latin1_boxed).cast::<u8>();

let callbacks = CreateJSExternalStringCallbacks(
&EXTERNAL_STRING_CALLBACKS_TRAPS,
latin1_base.len() as *mut c_void,
test_latin1_string(context, "test latin1");
test_latin1_string(context, "abcdefghijklmnop"); // exactly 16 bytes
test_latin1_string(context, "abcdefghijklmnopq"); // 17 bytes
test_latin1_string(context, "abcdefghijklmno"); // 15 bytes
test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcdef"); //32 bytes
test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcde"); //31 bytes
test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcdefg"); //33 bytes
//test_latin1_string(context, "test latin-1 Ö"); //testing whole latin1 range.
// whole latin1 table
test_latin1_string(context, " ! \" # $ % & ' ( ) * + , - . /");
test_latin1_string(context, "0 1 2 3 4 5 6 7 8 9 : ; < = > ?");
test_latin1_string(context, "@ A B C D E F G H I J K L M N O");
test_latin1_string(context, "P Q R S T U V W X Y Z [ \\ ] ^ _");
test_latin1_string(context, "` a b c d e f g h i j k l m n o");
test_latin1_string(context, "p q r s t u v w x y z { | } ~");
test_latin1_string_bytes(
context,
b"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF",
);
rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1(
test_latin1_string_bytes(
context,
latin1_chars,
latin1_base.len(),
callbacks
));
assert_eq!(jsstr_to_string(context, latin1_jsstr.get()), latin1_base);
b"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
);
test_latin1_string_bytes(
context,
b"\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF",
);
test_latin1_string_bytes(
context,
b"\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF",
);
test_latin1_string_bytes(
context,
b"\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF",
);
test_latin1_string_bytes(
context,
b"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF",
);

let utf16_base = "test utf-16 $€ \u{10437}\u{24B62}";
let utf16_boxed = utf16_base
Expand All @@ -74,6 +98,45 @@ fn external_string() {
}
}

#[cfg(test)]
unsafe fn test_latin1_string(context: *mut mozjs::jsapi::JSContext, latin1_base: &str) {
let latin1_boxed = latin1_base.as_bytes().to_vec().into_boxed_slice();
let latin1_chars = Box::into_raw(latin1_boxed).cast::<u8>();

let callbacks = CreateJSExternalStringCallbacks(
&EXTERNAL_STRING_CALLBACKS_TRAPS,
latin1_base.len() as *mut c_void,
);
rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1(
context,
latin1_chars,
latin1_base.len(),
callbacks
));
assert_eq!(jsstr_to_string(context, latin1_jsstr.get()), latin1_base);
}

#[cfg(test)]
unsafe fn test_latin1_string_bytes(context: *mut mozjs::jsapi::JSContext, latin1_base: &[u8]) {
let latin1_boxed = latin1_base.to_vec().into_boxed_slice();
let latin1_chars = Box::into_raw(latin1_boxed).cast::<u8>();

let callbacks = CreateJSExternalStringCallbacks(
&EXTERNAL_STRING_CALLBACKS_TRAPS,
latin1_base.len() as *mut c_void,
);
rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1(
context,
latin1_chars,
latin1_base.len(),
callbacks
));
assert_eq!(
jsstr_to_string(context, latin1_jsstr.get()),
encoding_rs::mem::decode_latin1(latin1_base)
);
}

static EXTERNAL_STRING_CALLBACKS_TRAPS: JSExternalStringCallbacksTraps =
JSExternalStringCallbacksTraps {
latin1Finalize: Some(latin1::finalize),
Expand Down
Loading