From 7cc078752669f5e0cc81acbafe7a46fd7cf00954 Mon Sep 17 00:00:00 2001 From: Narfinger Date: Thu, 26 Jun 2025 16:13:34 +0200 Subject: [PATCH 1/3] Improvements to the speed of latin1_to_string. On the benchmark it improves the encoding from around 9 microsends to 6 microseconds. This PR also includes the benchmark and setup as criterion benchmark. Signed-off-by: Narfinger --- mozjs/Cargo.toml | 9 +++ mozjs/benches/latin1_string_conversion.rs | 96 +++++++++++++++++++++++ mozjs/src/conversions.rs | 15 +++- mozjs/tests/external_string.rs | 40 ++++++---- 4 files changed, 142 insertions(+), 18 deletions(-) create mode 100644 mozjs/benches/latin1_string_conversion.rs diff --git a/mozjs/Cargo.toml b/mozjs/Cargo.toml index 51b0667b47..ad14435822 100644 --- a/mozjs/Cargo.toml +++ b/mozjs/Cargo.toml @@ -17,10 +17,19 @@ profilemozjs = ['mozjs_sys/profilemozjs'] crown = ['mozjs_sys/crown'] [dependencies] +encoding_rs = "0.8.35" libc.workspace = true log = "0.4" mozjs_sys = { path = "../mozjs-sys" } +[dev-dependencies] +criterion = "0.6" + [build-dependencies] cc.workspace = true bindgen.workspace = true + + +[[bench]] +name = "latin1_string_conversion" +harness = false diff --git a/mozjs/benches/latin1_string_conversion.rs b/mozjs/benches/latin1_string_conversion.rs new file mode 100644 index 0000000000..89aa788c61 --- /dev/null +++ b/mozjs/benches/latin1_string_conversion.rs @@ -0,0 +1,96 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use std::ffi::c_void; +use std::{iter, ptr}; + +use mozjs::conversions::jsstr_to_string; +use mozjs::glue::{CreateJSExternalStringCallbacks, JSExternalStringCallbacksTraps}; +use mozjs::jsapi::{ + JSAutoRealm, JS_NewExternalStringLatin1, JS_NewGlobalObject, OnNewGlobalHookOption, +}; +use mozjs::rooted; +use mozjs::rust::{JSEngine, RealmOptions, Runtime, SIMPLE_GLOBAL_CLASS}; + +fn external_string(c: &mut Criterion) { + unsafe { + let engine = JSEngine::init().unwrap(); + let runtime = Runtime::new(engine.handle()); + let context = runtime.cx(); + let h_option = OnNewGlobalHookOption::FireOnNewGlobalHook; + let c_option = RealmOptions::default(); + rooted!(in(context) let global = JS_NewGlobalObject( + context, + &SIMPLE_GLOBAL_CLASS, + ptr::null_mut(), + h_option, + &*c_option, + )); + let _ac = JSAutoRealm::new(context, global.get()); + + let latin1_base = + iter::repeat_n("test latin-1 test", 1_000_000).fold(String::new(), |mut acc, x| { + acc.push_str(x); + acc + }); + + let latin1_boxed = latin1_base.as_bytes().to_vec().into_boxed_slice(); + let latin1_chars = Box::into_raw(latin1_boxed).cast::(); + let callbacks = CreateJSExternalStringCallbacks( + &EXTERNAL_STRING_CALLBACKS_TRAPS, + latin1_base.len() as *mut c_void, + ); + rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1( + context, + latin1_chars, + latin1_base.len(), + callbacks + )); + c.bench_function("external_string_latin1", |b| { + b.iter(|| { + jsstr_to_string(context, latin1_jsstr.get()); + }) + }); + } +} + +static EXTERNAL_STRING_CALLBACKS_TRAPS: JSExternalStringCallbacksTraps = + JSExternalStringCallbacksTraps { + latin1Finalize: Some(latin1::finalize), + latin1SizeOfBuffer: Some(latin1::size_of), + utf16Finalize: Some(utf16::finalize), + utf16SizeOfBuffer: Some(utf16::size_of), + }; + +mod latin1 { + use std::ffi::c_void; + use std::slice; + + use mozjs::jsapi::mozilla::MallocSizeOf; + + pub unsafe extern "C" fn finalize(data: *const c_void, chars: *mut u8) { + let slice = slice::from_raw_parts_mut(chars, data as usize); + let _ = Box::from_raw(slice); + } + + pub unsafe extern "C" fn size_of(data: *const c_void, _: *const u8, _: MallocSizeOf) -> usize { + data as usize + } +} + +mod utf16 { + use std::ffi::c_void; + use std::slice; + + use mozjs::jsapi::mozilla::MallocSizeOf; + + pub unsafe extern "C" fn finalize(data: *const c_void, chars: *mut u16) { + let slice = slice::from_raw_parts_mut(chars, data as usize); + let _ = Box::from_raw(slice); + } + + pub unsafe extern "C" fn size_of(data: *const c_void, _: *const u16, _: MallocSizeOf) -> usize { + data as usize + } +} + +criterion_group!(benches, external_string); +criterion_main!(benches); diff --git a/mozjs/src/conversions.rs b/mozjs/src/conversions.rs index ebf00a52ba..1abcb6c88e 100644 --- a/mozjs/src/conversions.rs +++ b/mozjs/src/conversions.rs @@ -48,6 +48,7 @@ use log::debug; use mozjs_sys::jsgc::Rooted; use std::borrow::Cow; use std::mem; +use std::mem::MaybeUninit; use std::rc::Rc; use std::{ptr, slice}; @@ -525,6 +526,16 @@ impl FromJSValConvertible for f64 { } } +/// Copies chars to the string +unsafe fn fast_copy(chars: &[u8]) -> String { + let mut v = Vec::with_capacity(chars.len() * 2); + v.set_len(chars.len() * 2); + let real_size = encoding_rs::mem::convert_latin1_to_utf8(chars, v.as_mut_slice()); + + v.truncate(real_size); + String::from_utf8_unchecked(v) +} + /// Converts a `JSString`, encoded in "Latin1" (i.e. U+0000-U+00FF encoded as 0x00-0xFF) into a /// `String`. pub unsafe fn latin1_to_string(cx: *mut JSContext, s: *mut JSString) -> String { @@ -535,9 +546,7 @@ pub unsafe fn latin1_to_string(cx: *mut JSContext, s: *mut JSString) -> String { assert!(!chars.is_null()); let chars = slice::from_raw_parts(chars, length as usize); - let mut s = String::with_capacity(length as usize); - s.extend(chars.iter().map(|&c| c as char)); - s + fast_copy(chars) } /// Converts a `JSString` into a `String`, regardless of used encoding. diff --git a/mozjs/tests/external_string.rs b/mozjs/tests/external_string.rs index a202810b81..4a356e0123 100644 --- a/mozjs/tests/external_string.rs +++ b/mozjs/tests/external_string.rs @@ -36,21 +36,13 @@ fn external_string() { )); let _ac = JSAutoRealm::new(context, global.get()); - let latin1_base = "test latin-1"; - let latin1_boxed = latin1_base.as_bytes().to_vec().into_boxed_slice(); - let latin1_chars = Box::into_raw(latin1_boxed).cast::(); - - let callbacks = CreateJSExternalStringCallbacks( - &EXTERNAL_STRING_CALLBACKS_TRAPS, - latin1_base.len() as *mut c_void, - ); - rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1( - context, - latin1_chars, - latin1_base.len(), - callbacks - )); - assert_eq!(jsstr_to_string(context, latin1_jsstr.get()), latin1_base); + test_latin1_string(context, "test latin1"); + test_latin1_string(context, "abcdefghijklmnop"); // exactly 16 bytes + test_latin1_string(context, "abcdefghijklmnopq"); // 17 bytes + test_latin1_string(context, "abcdefghijklmno"); // 15 bytes + test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcdef"); //32 bytes + test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcde"); //31 bytes + test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcdefg"); //33 bytes let utf16_base = "test utf-16 $€ \u{10437}\u{24B62}"; let utf16_boxed = utf16_base @@ -74,6 +66,24 @@ fn external_string() { } } +#[cfg(test)] +unsafe fn test_latin1_string(context: *mut mozjs::jsapi::JSContext, latin1_base: &str) { + let latin1_boxed = latin1_base.as_bytes().to_vec().into_boxed_slice(); + let latin1_chars = Box::into_raw(latin1_boxed).cast::(); + + let callbacks = CreateJSExternalStringCallbacks( + &EXTERNAL_STRING_CALLBACKS_TRAPS, + latin1_base.len() as *mut c_void, + ); + rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1( + context, + latin1_chars, + latin1_base.len(), + callbacks + )); + assert_eq!(jsstr_to_string(context, latin1_jsstr.get()), latin1_base); +} + static EXTERNAL_STRING_CALLBACKS_TRAPS: JSExternalStringCallbacksTraps = JSExternalStringCallbacksTraps { latin1Finalize: Some(latin1::finalize), From f09f8cbf05f29652c0d322dd2d1f6056f44ddb9a Mon Sep 17 00:00:00 2001 From: Jonathan Schwender Date: Thu, 10 Jul 2025 11:35:29 +0800 Subject: [PATCH 2/3] latin1 benches: measure multiple input sizes Signed-off-by: Jonathan Schwender --- mozjs/benches/latin1_string_conversion.rs | 107 ++++++++++++++-------- 1 file changed, 69 insertions(+), 38 deletions(-) diff --git a/mozjs/benches/latin1_string_conversion.rs b/mozjs/benches/latin1_string_conversion.rs index 89aa788c61..553af43077 100644 --- a/mozjs/benches/latin1_string_conversion.rs +++ b/mozjs/benches/latin1_string_conversion.rs @@ -1,7 +1,7 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use std::ffi::c_void; -use std::{iter, ptr}; - +use criterion::measurement::WallTime; +use criterion::{ + criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, Throughput, +}; use mozjs::conversions::jsstr_to_string; use mozjs::glue::{CreateJSExternalStringCallbacks, JSExternalStringCallbacksTraps}; use mozjs::jsapi::{ @@ -9,48 +9,79 @@ use mozjs::jsapi::{ }; use mozjs::rooted; use mozjs::rust::{JSEngine, RealmOptions, Runtime, SIMPLE_GLOBAL_CLASS}; +use mozjs_sys::jsapi::JSContext; +use std::ffi::c_void; +use std::{iter, ptr}; -fn external_string(c: &mut Criterion) { - unsafe { - let engine = JSEngine::init().unwrap(); - let runtime = Runtime::new(engine.handle()); - let context = runtime.cx(); - let h_option = OnNewGlobalHookOption::FireOnNewGlobalHook; - let c_option = RealmOptions::default(); - rooted!(in(context) let global = JS_NewGlobalObject( - context, - &SIMPLE_GLOBAL_CLASS, - ptr::null_mut(), - h_option, - &*c_option, - )); - let _ac = JSAutoRealm::new(context, global.get()); - - let latin1_base = - iter::repeat_n("test latin-1 test", 1_000_000).fold(String::new(), |mut acc, x| { - acc.push_str(x); +// Todo make a trait for creating a latin1 str of a required length, so that we can +// try different kinds of content. +fn bench_str_repetition( + group: &mut BenchmarkGroup, + context: *mut JSContext, + variant_name: &str, + latin1str_16_bytes: &[u8], +) { + assert_eq!(latin1str_16_bytes.len(), 16); + for repetitions in [1, 4, 16, 64, 256, 1024, 4096].iter() { + let str_len = repetitions * latin1str_16_bytes.len(); + let latin1_base = iter::repeat_n(latin1str_16_bytes, *repetitions).fold( + Vec::with_capacity(str_len), + |mut acc, x| { + acc.extend_from_slice(x); acc - }); - - let latin1_boxed = latin1_base.as_bytes().to_vec().into_boxed_slice(); - let latin1_chars = Box::into_raw(latin1_boxed).cast::(); - let callbacks = CreateJSExternalStringCallbacks( - &EXTERNAL_STRING_CALLBACKS_TRAPS, - latin1_base.len() as *mut c_void, + }, ); - rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1( + let latin1_boxed = latin1_base.into_boxed_slice(); + let latin1_chars = Box::into_raw(latin1_boxed).cast::(); + let callbacks = unsafe { + CreateJSExternalStringCallbacks( + &EXTERNAL_STRING_CALLBACKS_TRAPS, + str_len as *mut c_void, + ) + }; + rooted!(in(context) let latin1_jsstr = unsafe { JS_NewExternalStringLatin1( context, latin1_chars, - latin1_base.len(), + str_len, callbacks - )); - c.bench_function("external_string_latin1", |b| { - b.iter(|| { - jsstr_to_string(context, latin1_jsstr.get()); - }) - }); + )}); + group.throughput(Throughput::Bytes(str_len as u64)); + group.bench_with_input( + BenchmarkId::new(variant_name, str_len), + &latin1_jsstr, + |b, js_str| { + b.iter(|| { + unsafe { jsstr_to_string(context, js_str.get()) }; + }) + }, + ); } } +fn external_string(c: &mut Criterion) { + let engine = JSEngine::init().unwrap(); + let runtime = Runtime::new(engine.handle()); + let context = runtime.cx(); + let h_option = OnNewGlobalHookOption::FireOnNewGlobalHook; + let c_option = RealmOptions::default(); + rooted!(in(context) let global = unsafe { JS_NewGlobalObject( + context, + &SIMPLE_GLOBAL_CLASS, + ptr::null_mut(), + h_option, + &*c_option, + )}); + let _ac = JSAutoRealm::new(context, global.get()); + + let mut group = c.benchmark_group("Latin1 conversion"); + + let ascii_example = b"test latin-1 tes"; + bench_str_repetition(&mut group, context, "ascii a-z", ascii_example); + // fastpath for the first few characters, then slowpath for the remaining (long part) + // todo: make generator functions, so we can define at which percentage of the size + // the first high byte shows up (which forces the slow path). + let ascii_with_high = b"test latin-1 \xD6\xC0\xFF"; + bench_str_repetition(&mut group, context, "ascii with high", ascii_with_high); +} static EXTERNAL_STRING_CALLBACKS_TRAPS: JSExternalStringCallbacksTraps = JSExternalStringCallbacksTraps { From 0491952502555bb86e906d12a1049e86d0f8e386 Mon Sep 17 00:00:00 2001 From: Narfinger Date: Thu, 10 Jul 2025 10:41:23 +0200 Subject: [PATCH 3/3] Also test the higher parts of latin1 which are not the same. At the moment we have to use encoding_rs to do this encoding leading us to a bit of a cyclical testing. But tests should prepare for future refactoring, so I think this is fine. Signed-off-by: Narfinger --- mozjs/tests/external_string.rs | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/mozjs/tests/external_string.rs b/mozjs/tests/external_string.rs index 4a356e0123..76ed88e1b1 100644 --- a/mozjs/tests/external_string.rs +++ b/mozjs/tests/external_string.rs @@ -43,6 +43,38 @@ fn external_string() { test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcdef"); //32 bytes test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcde"); //31 bytes test_latin1_string(context, "abcdefghijklmnopqrstuvwxyzabcdefg"); //33 bytes + //test_latin1_string(context, "test latin-1 Ö"); //testing whole latin1 range. + // whole latin1 table + test_latin1_string(context, " ! \" # $ % & ' ( ) * + , - . /"); + test_latin1_string(context, "0 1 2 3 4 5 6 7 8 9 : ; < = > ?"); + test_latin1_string(context, "@ A B C D E F G H I J K L M N O"); + test_latin1_string(context, "P Q R S T U V W X Y Z [ \\ ] ^ _"); + test_latin1_string(context, "` a b c d e f g h i j k l m n o"); + test_latin1_string(context, "p q r s t u v w x y z { | } ~"); + test_latin1_string_bytes( + context, + b"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF", + ); + test_latin1_string_bytes( + context, + b"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", + ); + test_latin1_string_bytes( + context, + b"\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF", + ); + test_latin1_string_bytes( + context, + b"\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF", + ); + test_latin1_string_bytes( + context, + b"\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF", + ); + test_latin1_string_bytes( + context, + b"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF", + ); let utf16_base = "test utf-16 $€ \u{10437}\u{24B62}"; let utf16_boxed = utf16_base @@ -84,6 +116,27 @@ unsafe fn test_latin1_string(context: *mut mozjs::jsapi::JSContext, latin1_base: assert_eq!(jsstr_to_string(context, latin1_jsstr.get()), latin1_base); } +#[cfg(test)] +unsafe fn test_latin1_string_bytes(context: *mut mozjs::jsapi::JSContext, latin1_base: &[u8]) { + let latin1_boxed = latin1_base.to_vec().into_boxed_slice(); + let latin1_chars = Box::into_raw(latin1_boxed).cast::(); + + let callbacks = CreateJSExternalStringCallbacks( + &EXTERNAL_STRING_CALLBACKS_TRAPS, + latin1_base.len() as *mut c_void, + ); + rooted!(in(context) let latin1_jsstr = JS_NewExternalStringLatin1( + context, + latin1_chars, + latin1_base.len(), + callbacks + )); + assert_eq!( + jsstr_to_string(context, latin1_jsstr.get()), + encoding_rs::mem::decode_latin1(latin1_base) + ); +} + static EXTERNAL_STRING_CALLBACKS_TRAPS: JSExternalStringCallbacksTraps = JSExternalStringCallbacksTraps { latin1Finalize: Some(latin1::finalize),