Skip to content

Commit 9bc0fd9

Browse files
committed
Make unprefixed consistently override the system allocator
Signed-off-by: Mads Marquart <mads@marquart.dk>
1 parent 0d5deb2 commit 9bc0fd9

File tree

4 files changed

+114
-20
lines changed

4 files changed

+114
-20
lines changed

jemalloc-sys/README.md

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ This crate provides following cargo feature flags:
4848
* `stats` (configure `jemalloc` with `--enable-stats`): Enable statistics
4949
gathering functionality. See the `jemalloc`'s "`opt.stats_print`" option
5050
documentation for usage details.
51-
51+
5252
* `debug` (configure `jemalloc` with `--enable-debug`): Enable assertions and
5353
validation code. This incurs a substantial performance hit, but is very useful
5454
during application development.
55-
55+
5656
* `background_threads_runtime_support` (enabled by default): enables
5757
background-threads run-time support when building `jemalloc-sys` on some POSIX
5858
targets supported by `jemalloc`. Background threads are disabled at run-time
@@ -72,16 +72,25 @@ This crate provides following cargo feature flags:
7272
* `unprefixed_malloc_on_supported_platforms`: when disabled, configure
7373
`jemalloc` with `--with-jemalloc-prefix=_rjem_`. Enabling this causes symbols
7474
like `malloc` to be emitted without a prefix, overriding the ones defined by
75-
libc. This usually causes C and C++ code linked in the same program to use
76-
`jemalloc` as well. On some platforms prefixes are always used because
77-
unprefixing is known to cause segfaults due to allocator mismatches.
78-
75+
libc. This usually causes C, Objective-C and C++ code linked in the same
76+
program to use `jemalloc` as well. On some platforms prefixes are always used
77+
because unprefixing is known to cause segfaults due to allocator mismatches.
78+
79+
Note that to use this, the `jemalloc-sys` crate must actually be visible to
80+
`rustc` (it is not enough to only declare it in `Cargo.toml`). This can be
81+
done by adding:
82+
```rust
83+
use jemalloc_sys as _;
84+
```
85+
86+
In your `main.rs`.
87+
7988
* `disable_initial_exec_tls` (disabled by default): when enabled, jemalloc is
80-
built with the `--disable-initial-exec-tls` option. It disables the
81-
initial-exec TLS model for jemalloc's internal thread-local storage (on those
82-
platforms that support explicit settings). This can allow jemalloc to be
89+
built with the `--disable-initial-exec-tls` option. It disables the
90+
initial-exec TLS model for jemalloc's internal thread-local storage (on those
91+
platforms that support explicit settings). This can allow jemalloc to be
8392
dynamically loaded after program startup (e.g. using dlopen). If you encounter
84-
the error `yourlib.so: cannot allocate memory in static TLS block`, you'll
93+
the error `yourlib.so: cannot allocate memory in static TLS block`, you'll
8594
likely want to enable this.
8695

8796
* `disable_cache_oblivious` (disabled by default): when enabled, jemalloc is
@@ -104,7 +113,7 @@ hyphens `-` are replaced with underscores `_`(see
104113
variable, the `/etc/malloc.conf` symlink, and the `MALLOC_CONF` environment
105114
variable (note: this variable might be prefixed as `_RJEM_MALLOC_CONF`). For
106115
example, to change the default decay time for dirty pages to 30 seconds:
107-
116+
108117
```
109118
JEMALLOC_SYS_WITH_MALLOC_CONF=dirty_decay_ms:30000
110119
```
@@ -115,17 +124,17 @@ hyphens `-` are replaced with underscores `_`(see
115124
allocator page size equal to the system page size, so this option need not be
116125
specified unless the system page size may change between configuration and
117126
execution, e.g. when cross compiling.
118-
127+
119128
* `JEMALLOC_SYS_WITH_LG_HUGEPAGE=<lg-hugepage>`: Specify the base 2 log of the
120129
system huge page size. This option is useful when cross compiling, or when
121130
overriding the default for systems that do not explicitly support huge pages.
122-
123-
131+
132+
124133
* `JEMALLOC_SYS_WITH_LG_QUANTUM=<lg-quantum>`: Specify the base 2 log of the
125134
minimum allocation alignment. jemalloc needs to know the minimum alignment
126135
that meets the following C standard requirement (quoted from the April 12,
127136
2011 draft of the C11 standard):
128-
137+
129138
> The pointer returned if the allocation succeeds is suitably aligned so that
130139
> it may be assigned to a pointer to any type of object with a fundamental
131140
> alignment requirement and then used to access such an object or an array of

jemalloc-sys/build.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,15 @@ fn main() {
111111
.iter()
112112
.any(|i| target.contains(i))
113113
{
114-
warning!(
115-
"Unprefixed `malloc` requested on unsupported platform `{}` => using prefixed `malloc`",
116-
target
117-
);
114+
// Apple targets don't support unprefixed, but they do support
115+
// overriding (if you do the `zone_register` trick), so no need to
116+
// warn there.
117+
if !target.contains("apple") {
118+
warning!(
119+
"Unprefixed `malloc` requested on unsupported platform `{}` => using prefixed `malloc`",
120+
target
121+
);
122+
}
118123
use_prefix = true;
119124
}
120125

jemalloc-sys/src/env.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ pub static NO_BG_THREAD_TARGETS: &[&str] = &["musl"];
2121
// https://github.com/rust-lang/rust/commit/e3b414d8612314e74e2b0ebde1ed5c6997d28e8d
2222
// https://github.com/rust-lang/rust/commit/9f3de647326fbe50e0e283b9018ab7c41abccde3
2323
// https://github.com/rust-lang/rust/commit/ed015456a114ae907a36af80c06f81ea93182a24
24-
pub static NO_UNPREFIXED_MALLOC_TARGETS: &[&str] = &["android", "dragonfly", "darwin"];
24+
pub static NO_UNPREFIXED_MALLOC_TARGETS: &[&str] = &["android", "dragonfly", "apple"];

jemalloc-sys/src/lib.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,3 +890,83 @@ pub type extent_merge_t = unsafe extern "C" fn(
890890
mod env;
891891

892892
pub use env::*;
893+
894+
// When using the `"unprefixed_malloc_on_supported_platforms"` feature flag,
895+
// the user wants us to globally override the system allocator.
896+
//
897+
// However, since we build `jemalloc` as a static library (an archive), the
898+
// linker may decide to not care about our overrides if it can't directly see
899+
// references to the symbols, see the following link for details:
900+
// <https://maskray.me/blog/2021-06-20-symbol-processing#archive-processing>
901+
//
902+
// This is problematic if `jemalloc_sys` is used from a library that by itself
903+
// doesn't allocate, while invoking other shared libraries that do.
904+
//
905+
// Another especially problematic case would be something like the following:
906+
//
907+
// ```
908+
// // Call `malloc` whose symbol is looked up statically.
909+
// let ptr = libc::malloc(42);
910+
//
911+
// // But use a dynamically looked up `free`.
912+
// let free = libc::dlsym(null_mut(), c"free".as_ptr());
913+
// let free = transmute::<*mut c_void, unsafe extern "C" fn(*mut c_void)>(free);
914+
// free(ptr);
915+
// ```
916+
//
917+
// Since if the `malloc` and `free` provided by `jemalloc` end up in different
918+
// object files in the archive (NOTE: In practice, this is unlikely to be an
919+
// issue, since `jemalloc.c` contains all the implementations and is compiled
920+
// as a single object file), the linker would think that only `malloc` was
921+
// used, and would never load the `free` that we also want (and hence we'd end
922+
// up executing jemalloc's `malloc` and the system's `free`, which is UB).
923+
//
924+
// To avoid this problem, we make sure that all the allocator functions are
925+
// visible to the linker, such that it will always override all of them.
926+
//
927+
// We do this by referencing these symbols in `#[used]` statics, which makes
928+
// them known to `rustc`, which will reference them in a `symbols.o` stub file
929+
// that is later passed to the linker. See the following link for details on
930+
// how this works:
931+
// <https://github.com/rust-lang/rust/pull/95604>
932+
933+
#[cfg(not(prefixed))]
934+
mod set_up_statics {
935+
use super::*;
936+
937+
#[used]
938+
static USED_MALLOC: unsafe extern "C" fn(usize) -> *mut c_void = malloc;
939+
#[used]
940+
static USED_CALLOC: unsafe extern "C" fn(usize, usize) -> *mut c_void = calloc;
941+
#[used]
942+
static USED_POSIX_MEMALIGN: unsafe extern "C" fn(*mut *mut c_void, usize, usize) -> c_int =
943+
posix_memalign;
944+
#[used]
945+
static USED_ALIGNED_ALLOC: unsafe extern "C" fn(usize, usize) -> *mut c_void = aligned_alloc;
946+
#[used]
947+
static USED_REALLOC: unsafe extern "C" fn(*mut c_void, usize) -> *mut c_void = realloc;
948+
#[used]
949+
static USED_FREE: unsafe extern "C" fn(*mut c_void) = free;
950+
}
951+
952+
// On macOS, jemalloc doesn't directly override malloc/free, but instead
953+
// registers itself with the allocator's zone APIs in a ctor (`zone_register`
954+
// is marked with `__attribute__((constructor))`).
955+
//
956+
// Similarly to above though, for the Mach-O linker to actually consider ctors
957+
// as "used" when defined in an archive member in a static library, so we need
958+
// to explicitly reference the function via. Rust's `#[used]`.
959+
960+
#[cfg(all(
961+
feature = "unprefixed_malloc_on_supported_platforms",
962+
target_vendor = "apple"
963+
))]
964+
#[used]
965+
static USED_ZONE_REGISTER: unsafe extern "C" fn() = {
966+
extern "C" {
967+
#[cfg_attr(prefixed, link_name = "_rjem_je_zone_register")]
968+
#[cfg_attr(not(prefixed), link_name = "je_zone_register")]
969+
fn zone_register();
970+
}
971+
zone_register
972+
};

0 commit comments

Comments
 (0)