diff --git a/Cargo.lock b/Cargo.lock
index e1cf17e2c01ca..110d17142bb3f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -366,6 +366,26 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "capstone"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a"
+dependencies = [
+ "capstone-sys",
+ "libc",
+]
+
+[[package]]
+name = "capstone-sys"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0"
+dependencies = [
+ "cc",
+ "libc",
+]
+
 [[package]]
 name = "cargo-miri"
 version = "0.1.0"
@@ -738,7 +758,7 @@ dependencies = [
  "tracing-subscriber",
  "unified-diff",
  "walkdir",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -1592,7 +1612,7 @@ dependencies = [
  "js-sys",
  "log",
  "wasm-bindgen",
- "windows-core",
+ "windows-core 0.61.2",
 ]
 
 [[package]]
@@ -1916,6 +1936,25 @@ dependencies = [
  "unic-langid",
 ]
 
+[[package]]
+name = "ipc-channel"
+version = "0.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fb8251fb7bcd9ccd3725ed8deae9fe7db8e586495c9eb5b0c52e6233e5e75ea"
+dependencies = [
+ "bincode",
+ "crossbeam-channel",
+ "fnv",
+ "lazy_static",
+ "libc",
+ "mio",
+ "rand 0.8.5",
+ "serde",
+ "tempfile",
+ "uuid",
+ "windows 0.58.0",
+]
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.1"
@@ -2301,6 +2340,18 @@ dependencies = [
  "adler2",
 ]
 
+[[package]]
+name = "mio"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+dependencies = [
+ "libc",
+ "log",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "miow"
 version = "0.6.0"
@@ -2316,18 +2367,22 @@ version = "0.1.0"
 dependencies = [
  "aes",
  "bitflags",
+ "capstone",
  "chrono",
  "chrono-tz",
  "colored",
  "directories",
  "getrandom 0.3.3",
+ "ipc-channel",
  "libc",
  "libffi",
  "libloading",
  "measureme",
+ "nix",
  "rand 0.9.1",
  "regex",
  "rustc_version",
+ "serde",
  "smallvec",
  "tempfile",
  "tikv-jemalloc-sys",
@@ -3519,7 +3574,7 @@ dependencies = [
  "thorin-dwp",
  "tracing",
  "wasm-encoder 0.219.2",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -3578,7 +3633,7 @@ dependencies = [
  "tempfile",
  "thin-vec",
  "tracing",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -3641,7 +3696,7 @@ dependencies = [
  "shlex",
  "stable_mir",
  "tracing",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -3696,7 +3751,7 @@ dependencies = [
  "termcolor",
  "termize",
  "tracing",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -4457,7 +4512,7 @@ dependencies = [
  "rustc_target",
  "termize",
  "tracing",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -5142,7 +5197,7 @@ dependencies = [
  "libc",
  "objc2-core-foundation",
  "objc2-io-kit",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -6034,6 +6089,16 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
+[[package]]
+name = "windows"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
+dependencies = [
+ "windows-core 0.58.0",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows"
 version = "0.61.3"
@@ -6041,7 +6106,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
 dependencies = [
  "windows-collections",
- "windows-core",
+ "windows-core 0.61.2",
  "windows-future",
  "windows-link",
  "windows-numerics",
@@ -6064,7 +6129,20 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
 dependencies = [
- "windows-core",
+ "windows-core 0.61.2",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
+dependencies = [
+ "windows-implement 0.58.0",
+ "windows-interface 0.58.0",
+ "windows-result 0.2.0",
+ "windows-strings 0.1.0",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -6073,11 +6151,11 @@ version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
 dependencies = [
- "windows-implement",
- "windows-interface",
+ "windows-implement 0.60.0",
+ "windows-interface 0.59.1",
  "windows-link",
- "windows-result",
- "windows-strings",
+ "windows-result 0.3.4",
+ "windows-strings 0.4.2",
 ]
 
 [[package]]
@@ -6086,11 +6164,22 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
 dependencies = [
- "windows-core",
+ "windows-core 0.61.2",
  "windows-link",
  "windows-threading",
 ]
 
+[[package]]
+name = "windows-implement"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.103",
+]
+
 [[package]]
 name = "windows-implement"
 version = "0.60.0"
@@ -6102,6 +6191,17 @@ dependencies = [
  "syn 2.0.103",
 ]
 
+[[package]]
+name = "windows-interface"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.103",
+]
+
 [[package]]
 name = "windows-interface"
 version = "0.59.1"
@@ -6125,10 +6225,19 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
 dependencies = [
- "windows-core",
+ "windows-core 0.61.2",
  "windows-link",
 ]
 
+[[package]]
+name = "windows-result"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-result"
 version = "0.3.4"
@@ -6138,6 +6247,16 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result 0.2.0",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-strings"
 version = "0.4.2"
diff --git a/compiler/rustc_const_eval/src/util/caller_location.rs b/compiler/rustc_const_eval/src/util/caller_location.rs
index ab2de279ed8de..f489b05fbbd7e 100644
--- a/compiler/rustc_const_eval/src/util/caller_location.rs
+++ b/compiler/rustc_const_eval/src/util/caller_location.rs
@@ -21,13 +21,14 @@ fn alloc_caller_location<'tcx>(
     assert!(!filename.as_str().as_bytes().contains(&0));
 
     let loc_details = ecx.tcx.sess.opts.unstable_opts.location_detail;
-    let file_wide_ptr = {
+    let filename = {
         let filename = if loc_details.file { filename.as_str() } else { "<redacted>" };
         let filename_with_nul = filename.to_owned() + "\0";
         // This can fail if rustc runs out of memory right here. Trying to emit an error would be
         // pointless, since that would require allocating more memory than these short strings.
         let file_ptr = ecx.allocate_bytes_dedup(filename_with_nul.as_bytes()).unwrap();
-        Immediate::new_slice(file_ptr.into(), filename_with_nul.len().try_into().unwrap(), ecx)
+        let file_len = u64::try_from(filename.len()).unwrap();
+        Immediate::new_slice(file_ptr.into(), file_len, ecx)
     };
     let line = if loc_details.line { Scalar::from_u32(line) } else { Scalar::from_u32(0) };
     let col = if loc_details.column { Scalar::from_u32(col) } else { Scalar::from_u32(0) };
@@ -41,11 +42,8 @@ fn alloc_caller_location<'tcx>(
     let location = ecx.allocate(loc_layout, MemoryKind::CallerLocation).unwrap();
 
     // Initialize fields.
-    ecx.write_immediate(
-        file_wide_ptr,
-        &ecx.project_field(&location, FieldIdx::from_u32(0)).unwrap(),
-    )
-    .expect("writing to memory we just allocated cannot fail");
+    ecx.write_immediate(filename, &ecx.project_field(&location, FieldIdx::from_u32(0)).unwrap())
+        .expect("writing to memory we just allocated cannot fail");
     ecx.write_scalar(line, &ecx.project_field(&location, FieldIdx::from_u32(1)).unwrap())
         .expect("writing to memory we just allocated cannot fail");
     ecx.write_scalar(col, &ecx.project_field(&location, FieldIdx::from_u32(2)).unwrap())
diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs
index 2b00b7dd27aaf..78a4d47ca3346 100644
--- a/compiler/rustc_macros/src/symbols.rs
+++ b/compiler/rustc_macros/src/symbols.rs
@@ -190,17 +190,6 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
     let mut symbols_stream = quote! {};
     let mut prefill_stream = quote! {};
     let mut entries = Entries::with_capacity(input.keywords.len() + input.symbols.len() + 10);
-    let mut prev_key: Option<(Span, String)> = None;
-
-    let mut check_order = |span: Span, s: &str, errors: &mut Errors| {
-        if let Some((prev_span, ref prev_str)) = prev_key {
-            if s < prev_str {
-                errors.error(span, format!("Symbol `{s}` must precede `{prev_str}`"));
-                errors.error(prev_span, format!("location of previous symbol `{prev_str}`"));
-            }
-        }
-        prev_key = Some((span, s.to_string()));
-    };
 
     // Generate the listed keywords.
     for keyword in input.keywords.iter() {
@@ -219,7 +208,6 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
     // Generate the listed symbols.
     for symbol in input.symbols.iter() {
         let name = &symbol.name;
-        check_order(symbol.name.span(), &name.to_string(), &mut errors);
 
         let value = match &symbol.value {
             Value::SameAsName => name.to_string(),
diff --git a/compiler/rustc_macros/src/symbols/tests.rs b/compiler/rustc_macros/src/symbols/tests.rs
index 9c53453df5b54..f0a7a2106be49 100644
--- a/compiler/rustc_macros/src/symbols/tests.rs
+++ b/compiler/rustc_macros/src/symbols/tests.rs
@@ -84,18 +84,3 @@ fn check_dup_symbol_and_keyword() {
     };
     test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]);
 }
-
-#[test]
-fn check_symbol_order() {
-    let input = quote! {
-        Keywords {}
-        Symbols {
-            zebra,
-            aardvark,
-        }
-    };
-    test_symbols_macro(
-        input,
-        &["Symbol `aardvark` must precede `zebra`", "location of previous symbol `zebra`"],
-    );
-}
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 0045801c9f6d8..a51f4e3dc07f4 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -150,14 +150,10 @@ symbols! {
     // As well as the symbols listed, there are symbols for the strings
     // "0", "1", ..., "9", which are accessible via `sym::integer`.
     //
-    // The proc macro will abort if symbols are not in alphabetical order (as
-    // defined by `impl Ord for str`) or if any symbols are duplicated. Vim
-    // users can sort the list by selecting it and executing the command
-    // `:'<,'>!LC_ALL=C sort`.
-    //
     // There is currently no checking that all symbols are used; that would be
     // nice to have.
     Symbols {
+        // tidy-alphabetical-start
         Abi,
         AcqRel,
         Acquire,
@@ -175,18 +171,18 @@ symbols! {
         AsyncGenPending,
         AsyncGenReady,
         AtomicBool,
-        AtomicI128,
+        AtomicI8,
         AtomicI16,
         AtomicI32,
         AtomicI64,
-        AtomicI8,
+        AtomicI128,
         AtomicIsize,
         AtomicPtr,
-        AtomicU128,
+        AtomicU8,
         AtomicU16,
         AtomicU32,
         AtomicU64,
-        AtomicU8,
+        AtomicU128,
         AtomicUsize,
         BTreeEntry,
         BTreeMap,
@@ -607,10 +603,10 @@ symbols! {
         catch_unwind,
         cause,
         cdylib,
-        ceilf128,
         ceilf16,
         ceilf32,
         ceilf64,
+        ceilf128,
         cfg,
         cfg_accessible,
         cfg_attr,
@@ -747,10 +743,10 @@ symbols! {
         copy,
         copy_closures,
         copy_nonoverlapping,
-        copysignf128,
         copysignf16,
         copysignf32,
         copysignf64,
+        copysignf128,
         core,
         core_panic,
         core_panic_2015_macro,
@@ -763,10 +759,10 @@ symbols! {
         coroutine_state,
         coroutine_yield,
         coroutines,
-        cosf128,
         cosf16,
         cosf32,
         cosf64,
+        cosf128,
         count,
         coverage,
         coverage_attribute,
@@ -874,8 +870,8 @@ symbols! {
         dotdot_in_tuple_patterns,
         dotdoteq_in_patterns,
         dreg,
-        dreg_low16,
         dreg_low8,
+        dreg_low16,
         drop,
         drop_in_place,
         drop_types_in_const,
@@ -928,16 +924,16 @@ symbols! {
         exhaustive_integer_patterns,
         exhaustive_patterns,
         existential_type,
-        exp2f128,
         exp2f16,
         exp2f32,
         exp2f64,
+        exp2f128,
         expect,
         expected,
-        expf128,
         expf16,
         expf32,
         expf64,
+        expf128,
         explicit_extern_abis,
         explicit_generic_args_with_impl_trait,
         explicit_tail_calls,
@@ -958,9 +954,6 @@ symbols! {
         external,
         external_doc,
         f,
-        f128,
-        f128_epsilon,
-        f128_nan,
         f16,
         f16_epsilon,
         f16_nan,
@@ -999,10 +992,13 @@ symbols! {
         f64_legacy_const_neg_infinity,
         f64_legacy_const_radix,
         f64_nan,
-        fabsf128,
+        f128,
+        f128_epsilon,
+        f128_nan,
         fabsf16,
         fabsf32,
         fabsf64,
+        fabsf128,
         fadd_algebraic,
         fadd_fast,
         fake_variadic,
@@ -1024,22 +1020,22 @@ symbols! {
         flags,
         float,
         float_to_int_unchecked,
-        floorf128,
         floorf16,
         floorf32,
         floorf64,
-        fmaf128,
+        floorf128,
         fmaf16,
         fmaf32,
         fmaf64,
+        fmaf128,
         fmt,
         fmt_debug,
         fmul_algebraic,
         fmul_fast,
-        fmuladdf128,
         fmuladdf16,
         fmuladdf32,
         fmuladdf64,
+        fmuladdf128,
         fn_align,
         fn_body,
         fn_delegation,
@@ -1140,13 +1136,12 @@ symbols! {
         html_root_url,
         hwaddress,
         i,
-        i128,
-        i128_legacy_const_max,
-        i128_legacy_const_min,
-        i128_legacy_fn_max_value,
-        i128_legacy_fn_min_value,
-        i128_legacy_mod,
-        i128_type,
+        i8,
+        i8_legacy_const_max,
+        i8_legacy_const_min,
+        i8_legacy_fn_max_value,
+        i8_legacy_fn_min_value,
+        i8_legacy_mod,
         i16,
         i16_legacy_const_max,
         i16_legacy_const_min,
@@ -1165,12 +1160,13 @@ symbols! {
         i64_legacy_fn_max_value,
         i64_legacy_fn_min_value,
         i64_legacy_mod,
-        i8,
-        i8_legacy_const_max,
-        i8_legacy_const_min,
-        i8_legacy_fn_max_value,
-        i8_legacy_fn_min_value,
-        i8_legacy_mod,
+        i128,
+        i128_legacy_const_max,
+        i128_legacy_const_min,
+        i128_legacy_fn_max_value,
+        i128_legacy_fn_min_value,
+        i128_legacy_mod,
+        i128_type,
         ident,
         if_let,
         if_let_guard,
@@ -1292,19 +1288,19 @@ symbols! {
         loaded_from_disk,
         local,
         local_inner_macros,
-        log10f128,
-        log10f16,
-        log10f32,
-        log10f64,
-        log2f128,
         log2f16,
         log2f32,
         log2f64,
+        log2f128,
+        log10f16,
+        log10f32,
+        log10f64,
+        log10f128,
         log_syntax,
-        logf128,
         logf16,
         logf32,
         logf64,
+        logf128,
         loongarch_target_feature,
         loop_break_value,
         loop_match,
@@ -1334,14 +1330,14 @@ symbols! {
         match_beginning_vert,
         match_default_bindings,
         matches_macro,
-        maximumf128,
         maximumf16,
         maximumf32,
         maximumf64,
-        maxnumf128,
+        maximumf128,
         maxnumf16,
         maxnumf32,
         maxnumf64,
+        maxnumf128,
         may_dangle,
         may_unwind,
         maybe_uninit,
@@ -1372,14 +1368,14 @@ symbols! {
         min_generic_const_args,
         min_specialization,
         min_type_alias_impl_trait,
-        minimumf128,
         minimumf16,
         minimumf32,
         minimumf64,
-        minnumf128,
+        minimumf128,
         minnumf16,
         minnumf32,
         minnumf64,
+        minnumf128,
         mips_target_feature,
         mir_assume,
         mir_basic_block,
@@ -1634,14 +1630,14 @@ symbols! {
         post_dash_lto: "post-lto",
         postfix_match,
         powerpc_target_feature,
-        powf128,
         powf16,
         powf32,
         powf64,
-        powif128,
+        powf128,
         powif16,
         powif32,
         powif64,
+        powif128,
         pre_dash_lto: "pre-lto",
         precise_capturing,
         precise_capturing_in_traits,
@@ -1786,14 +1782,14 @@ symbols! {
         ropi_rwpi: "ropi-rwpi",
         rotate_left,
         rotate_right,
-        round_ties_even_f128,
         round_ties_even_f16,
         round_ties_even_f32,
         round_ties_even_f64,
-        roundf128,
+        round_ties_even_f128,
         roundf16,
         roundf32,
         roundf64,
+        roundf128,
         rt,
         rtm_target_feature,
         rust,
@@ -1973,8 +1969,8 @@ symbols! {
         simd_fexp2,
         simd_ffi,
         simd_flog,
-        simd_flog10,
         simd_flog2,
+        simd_flog10,
         simd_floor,
         simd_fma,
         simd_fmax,
@@ -2022,10 +2018,10 @@ symbols! {
         simd_with_exposed_provenance,
         simd_xor,
         since,
-        sinf128,
         sinf16,
         sinf32,
         sinf64,
+        sinf128,
         size,
         size_of,
         size_of_val,
@@ -2047,10 +2043,10 @@ symbols! {
         specialization,
         speed,
         spotlight,
-        sqrtf128,
         sqrtf16,
         sqrtf32,
         sqrtf64,
+        sqrtf128,
         sreg,
         sreg_low16,
         sse,
@@ -2128,10 +2124,10 @@ symbols! {
         target_has_atomic,
         target_has_atomic_equal_alignment,
         target_has_atomic_load_store,
-        target_has_reliable_f128,
-        target_has_reliable_f128_math,
         target_has_reliable_f16,
         target_has_reliable_f16_math,
+        target_has_reliable_f128,
+        target_has_reliable_f128_math,
         target_os,
         target_pointer_width,
         target_thread_local,
@@ -2174,10 +2170,10 @@ symbols! {
         transparent_enums,
         transparent_unions,
         trivial_bounds,
-        truncf128,
         truncf16,
         truncf32,
         truncf64,
+        truncf128,
         try_blocks,
         try_capture,
         try_from,
@@ -2206,12 +2202,12 @@ symbols! {
         type_name,
         type_privacy_lints,
         typed_swap_nonoverlapping,
-        u128,
-        u128_legacy_const_max,
-        u128_legacy_const_min,
-        u128_legacy_fn_max_value,
-        u128_legacy_fn_min_value,
-        u128_legacy_mod,
+        u8,
+        u8_legacy_const_max,
+        u8_legacy_const_min,
+        u8_legacy_fn_max_value,
+        u8_legacy_fn_min_value,
+        u8_legacy_mod,
         u16,
         u16_legacy_const_max,
         u16_legacy_const_min,
@@ -2230,12 +2226,12 @@ symbols! {
         u64_legacy_fn_max_value,
         u64_legacy_fn_min_value,
         u64_legacy_mod,
-        u8,
-        u8_legacy_const_max,
-        u8_legacy_const_min,
-        u8_legacy_fn_max_value,
-        u8_legacy_fn_min_value,
-        u8_legacy_mod,
+        u128,
+        u128_legacy_const_max,
+        u128_legacy_const_min,
+        u128_legacy_fn_max_value,
+        u128_legacy_fn_min_value,
+        u128_legacy_mod,
         ub_checks,
         unaligned_volatile_load,
         unaligned_volatile_store,
@@ -2388,6 +2384,7 @@ symbols! {
         zfh,
         zfhmin,
         zmm_reg,
+        // tidy-alphabetical-end
     }
 }
 
diff --git a/library/alloc/src/collections/btree/set.rs b/library/alloc/src/collections/btree/set.rs
index aa9e5fce1d4cc..d50ce02bda743 100644
--- a/library/alloc/src/collections/btree/set.rs
+++ b/library/alloc/src/collections/btree/set.rs
@@ -1517,9 +1517,7 @@ impl<T: Ord, const N: usize> From<[T; N]> for BTreeSet<T> {
 
         // use stable sort to preserve the insertion order.
         arr.sort();
-        let iter = IntoIterator::into_iter(arr).map(|k| (k, SetValZST::default()));
-        let map = BTreeMap::bulk_build_from_sorted_iter(iter, Global);
-        BTreeSet { map }
+        BTreeSet::from_sorted_iter(IntoIterator::into_iter(arr), Global)
     }
 }
 
diff --git a/library/core/src/panic/location.rs b/library/core/src/panic/location.rs
index f1eedede8aab9..972270208852a 100644
--- a/library/core/src/panic/location.rs
+++ b/library/core/src/panic/location.rs
@@ -1,5 +1,7 @@
 use crate::ffi::CStr;
 use crate::fmt;
+use crate::marker::PhantomData;
+use crate::ptr::NonNull;
 
 /// A struct containing information about the location of a panic.
 ///
@@ -33,14 +35,13 @@ use crate::fmt;
 #[derive(Copy, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
 #[stable(feature = "panic_hooks", since = "1.10.0")]
 pub struct Location<'a> {
-    // Note: this filename will have exactly one nul byte at its end, but otherwise
-    // it must never contain interior nul bytes. This is relied on for the conversion
-    // to `CStr` below.
-    //
-    // The prefix of the string without the trailing nul byte will be a regular UTF8 `str`.
-    file_bytes_with_nul: &'a [u8],
+    // A raw pointer is used rather than a reference because the pointer is valid for one more byte
+    // than the length stored in this pointer; the additional byte is the NUL-terminator used by
+    // `Location::file_with_nul`.
+    filename: NonNull<str>,
     line: u32,
     col: u32,
+    _filename: PhantomData<&'a str>,
 }
 
 #[stable(feature = "panic_hooks", since = "1.10.0")]
@@ -143,10 +144,8 @@ impl<'a> Location<'a> {
     #[stable(feature = "panic_hooks", since = "1.10.0")]
     #[rustc_const_stable(feature = "const_location_fields", since = "1.79.0")]
     pub const fn file(&self) -> &str {
-        let str_len = self.file_bytes_with_nul.len() - 1;
-        // SAFETY: `file_bytes_with_nul` without the trailing nul byte is guaranteed to be
-        // valid UTF8.
-        unsafe { crate::str::from_raw_parts(self.file_bytes_with_nul.as_ptr(), str_len) }
+        // SAFETY: The filename is valid.
+        unsafe { self.filename.as_ref() }
     }
 
     /// Returns the name of the source file as a nul-terminated `CStr`.
@@ -157,9 +156,17 @@ impl<'a> Location<'a> {
     #[unstable(feature = "file_with_nul", issue = "141727")]
     #[inline]
     pub const fn file_with_nul(&self) -> &CStr {
-        // SAFETY: `file_bytes_with_nul` is guaranteed to have a trailing nul byte and no
-        // interior nul bytes.
-        unsafe { CStr::from_bytes_with_nul_unchecked(self.file_bytes_with_nul) }
+        let filename = self.filename.as_ptr();
+
+        // SAFETY: The filename is valid for `filename_len+1` bytes, so this addition can't
+        // overflow.
+        let cstr_len = unsafe { crate::mem::size_of_val_raw(filename).unchecked_add(1) };
+
+        // SAFETY: The filename is valid for `filename_len+1` bytes.
+        let slice = unsafe { crate::slice::from_raw_parts(filename.cast(), cstr_len) };
+
+        // SAFETY: The filename is guaranteed to have a trailing nul byte and no interior nul bytes.
+        unsafe { CStr::from_bytes_with_nul_unchecked(slice) }
     }
 
     /// Returns the line number from which the panic originated.
@@ -220,3 +227,8 @@ impl fmt::Display for Location<'_> {
         write!(formatter, "{}:{}:{}", self.file(), self.line, self.col)
     }
 }
+
+#[stable(feature = "panic_hooks", since = "1.10.0")]
+unsafe impl Send for Location<'_> {}
+#[stable(feature = "panic_hooks", since = "1.10.0")]
+unsafe impl Sync for Location<'_> {}
diff --git a/library/std/src/os/illumos/mod.rs b/library/std/src/os/illumos/mod.rs
index e61926f89356a..5fbe352e7cfbd 100644
--- a/library/std/src/os/illumos/mod.rs
+++ b/library/std/src/os/illumos/mod.rs
@@ -3,4 +3,5 @@
 #![stable(feature = "raw_ext", since = "1.1.0")]
 
 pub mod fs;
+pub mod net;
 pub mod raw;
diff --git a/library/std/src/os/illumos/net.rs b/library/std/src/os/illumos/net.rs
new file mode 100644
index 0000000000000..5ef4e1ec89e36
--- /dev/null
+++ b/library/std/src/os/illumos/net.rs
@@ -0,0 +1,50 @@
+//! illumos-specific networking functionality.
+
+#![unstable(feature = "unix_socket_exclbind", issue = "123481")]
+
+use crate::io;
+use crate::os::unix::net;
+use crate::sealed::Sealed;
+use crate::sys_common::AsInner;
+
+/// illumos-specific functionality for `AF_UNIX` sockets [`UnixDatagram`]
+/// and [`UnixStream`].
+///
+/// [`UnixDatagram`]: net::UnixDatagram
+/// [`UnixStream`]: net::UnixStream
+#[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+pub trait UnixSocketExt: Sealed {
+    /// Enables exclusive binding on the socket.
+    ///
+    /// If true and if the socket had been set with `SO_REUSEADDR`,
+    /// it neutralises its effect.
+    /// See [`man 3 tcp`](https://docs.oracle.com/cd/E88353_01/html/E37843/setsockopt-3c.html)
+    #[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+    fn so_exclbind(&self, excl: bool) -> io::Result<()>;
+
+    /// Get the bind exclusivity bind state of the socket.
+    #[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+    fn exclbind(&self) -> io::Result<bool>;
+}
+
+#[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+impl UnixSocketExt for net::UnixDatagram {
+    fn exclbind(&self) -> io::Result<bool> {
+        self.as_inner().exclbind()
+    }
+
+    fn so_exclbind(&self, excl: bool) -> io::Result<()> {
+        self.as_inner().set_exclbind(excl)
+    }
+}
+
+#[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+impl UnixSocketExt for net::UnixStream {
+    fn exclbind(&self) -> io::Result<bool> {
+        self.as_inner().exclbind()
+    }
+
+    fn so_exclbind(&self, excl: bool) -> io::Result<()> {
+        self.as_inner().set_exclbind(excl)
+    }
+}
diff --git a/library/std/src/os/solaris/mod.rs b/library/std/src/os/solaris/mod.rs
index e4cfd53291a6e..b4e836208491c 100644
--- a/library/std/src/os/solaris/mod.rs
+++ b/library/std/src/os/solaris/mod.rs
@@ -3,4 +3,5 @@
 #![stable(feature = "raw_ext", since = "1.1.0")]
 
 pub mod fs;
+pub mod net;
 pub mod raw;
diff --git a/library/std/src/os/solaris/net.rs b/library/std/src/os/solaris/net.rs
new file mode 100644
index 0000000000000..ca841f15b0e72
--- /dev/null
+++ b/library/std/src/os/solaris/net.rs
@@ -0,0 +1,50 @@
+//! solaris-specific networking functionality.
+
+#![unstable(feature = "unix_socket_exclbind", issue = "123481")]
+
+use crate::io;
+use crate::os::unix::net;
+use crate::sealed::Sealed;
+use crate::sys_common::AsInner;
+
+/// solaris-specific functionality for `AF_UNIX` sockets [`UnixDatagram`]
+/// and [`UnixStream`].
+///
+/// [`UnixDatagram`]: net::UnixDatagram
+/// [`UnixStream`]: net::UnixStream
+#[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+pub trait UnixSocketExt: Sealed {
+    /// Enables exclusive binding on the socket.
+    ///
+    /// If true and if the socket had been set with `SO_REUSEADDR`,
+    /// it neutralises its effect.
+    /// See [`man 3 tcp`](https://docs.oracle.com/cd/E88353_01/html/E37843/setsockopt-3c.html)
+    #[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+    fn so_exclbind(&self, excl: bool) -> io::Result<()>;
+
+    /// Get the bind exclusivity bind state of the socket.
+    #[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+    fn exclbind(&self) -> io::Result<bool>;
+}
+
+#[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+impl UnixSocketExt for net::UnixDatagram {
+    fn exclbind(&self) -> io::Result<bool> {
+        self.as_inner().exclbind()
+    }
+
+    fn so_exclbind(&self, excl: bool) -> io::Result<()> {
+        self.as_inner().set_exclbind(excl)
+    }
+}
+
+#[unstable(feature = "unix_socket_exclbind", issue = "123481")]
+impl UnixSocketExt for net::UnixStream {
+    fn exclbind(&self) -> io::Result<bool> {
+        self.as_inner().exclbind()
+    }
+
+    fn so_exclbind(&self, excl: bool) -> io::Result<()> {
+        self.as_inner().set_exclbind(excl)
+    }
+}
diff --git a/library/std/src/sys/net/connection/socket/unix.rs b/library/std/src/sys/net/connection/socket/unix.rs
index b2a4961c3c536..cc111f3521bc1 100644
--- a/library/std/src/sys/net/connection/socket/unix.rs
+++ b/library/std/src/sys/net/connection/socket/unix.rs
@@ -522,6 +522,21 @@ impl Socket {
         Ok(name)
     }
 
+    #[cfg(any(target_os = "solaris", target_os = "illumos"))]
+    pub fn set_exclbind(&self, excl: bool) -> io::Result<()> {
+        // not yet on libc crate
+        const SO_EXCLBIND: i32 = 0x1015;
+        setsockopt(self, libc::SOL_SOCKET, SO_EXCLBIND, excl)
+    }
+
+    #[cfg(any(target_os = "solaris", target_os = "illumos"))]
+    pub fn exclbind(&self) -> io::Result<bool> {
+        // not yet on libc crate
+        const SO_EXCLBIND: i32 = 0x1015;
+        let raw: c_int = getsockopt(self, libc::SOL_SOCKET, SO_EXCLBIND)?;
+        Ok(raw != 0)
+    }
+
     #[cfg(any(target_os = "android", target_os = "linux",))]
     pub fn set_passcred(&self, passcred: bool) -> io::Result<()> {
         setsockopt(self, libc::SOL_SOCKET, libc::SO_PASSCRED, passcred as libc::c_int)
diff --git a/library/std/src/sys/process/unix/unix.rs b/library/std/src/sys/process/unix/unix.rs
index 1fe80c13b81c8..bbd03e2b0c4e9 100644
--- a/library/std/src/sys/process/unix/unix.rs
+++ b/library/std/src/sys/process/unix/unix.rs
@@ -968,8 +968,8 @@ impl Process {
     }
 
     pub(crate) fn send_signal(&self, signal: i32) -> io::Result<()> {
-        // If we've already waited on this process then the pid can be recycled
-        // and used for another process, and we probably shouldn't be signaling
+        // If we've already waited on this process then the pid can be recycled and
+        // used for another process, and we probably shouldn't be sending signals to
         // random processes, so return Ok because the process has exited already.
         if self.status.is_some() {
             return Ok(());
diff --git a/library/std/src/sys/process/unix/vxworks.rs b/library/std/src/sys/process/unix/vxworks.rs
index 51ae8c56bdb9b..2275cbb946a9c 100644
--- a/library/std/src/sys/process/unix/vxworks.rs
+++ b/library/std/src/sys/process/unix/vxworks.rs
@@ -151,8 +151,8 @@ impl Process {
     }
 
     pub fn send_signal(&self, signal: i32) -> io::Result<()> {
-        // If we've already waited on this process then the pid can be recycled
-        // and used for another process, and we probably shouldn't be killing
+        // If we've already waited on this process then the pid can be recycled and
+        // used for another process, and we probably shouldn't be sending signals to
         // random processes, so return Ok because the process has exited already.
         if self.status.is_some() {
             Ok(())
diff --git a/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh b/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh
index 064ac5b0a5e42..924bdbc761505 100755
--- a/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh
+++ b/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh
@@ -10,4 +10,7 @@ python3 ../x.py build --set rust.debug=true opt-dist
     build-manifest bootstrap
 
 # Use GCC for building GCC, as it seems to behave badly when built with Clang
-CC=/rustroot/bin/cc CXX=/rustroot/bin/c++ python3 ../x.py dist gcc
+# Only build GCC on full builds, not try builds
+if [ "${DIST_TRY_BUILD:-0}" == "0" ]; then
+    CC=/rustroot/bin/cc CXX=/rustroot/bin/c++ python3 ../x.py dist gcc
+fi
diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs
index e16acc9622fc1..e33bdc0db32f8 100644
--- a/src/librustdoc/html/render/print_item.rs
+++ b/src/librustdoc/html/render/print_item.rs
@@ -1703,6 +1703,7 @@ fn render_enum_fields(
                 if v.is_stripped() {
                     continue;
                 }
+                write!(w, "{}", render_attributes_in_pre(v, TAB, cx))?;
                 w.write_str(TAB)?;
                 match v.kind {
                     clean::VariantItem(ref var) => match var.kind {
diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs
index 4c53ea42793cd..0520eff0fa277 100644
--- a/src/tools/build-manifest/src/main.rs
+++ b/src/tools/build-manifest/src/main.rs
@@ -14,6 +14,7 @@ use crate::versions::{PkgType, Versions};
 
 static HOSTS: &[&str] = &[
     "aarch64-apple-darwin",
+    "aarch64-pc-windows-gnullvm",
     "aarch64-pc-windows-msvc",
     "aarch64-unknown-linux-gnu",
     "aarch64-unknown-linux-musl",
@@ -44,6 +45,7 @@ static HOSTS: &[&str] = &[
     "x86_64-apple-darwin",
     "x86_64-pc-solaris",
     "x86_64-pc-windows-gnu",
+    "x86_64-pc-windows-gnullvm",
     "x86_64-pc-windows-msvc",
     "x86_64-unknown-freebsd",
     "x86_64-unknown-illumos",
@@ -470,7 +472,7 @@ impl Builder {
                 }
                 // so is rust-mingw if it's available for the target
                 PkgType::RustMingw => {
-                    if host.contains("pc-windows-gnu") {
+                    if host.ends_with("pc-windows-gnu") {
                         components.push(host_component(pkg));
                     }
                 }
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index 192d4f444c2ff..d3123caaa4788 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -80,6 +80,15 @@ dependencies = [
  "rustc-demangle",
 ]
 
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "bitflags"
 version = "2.9.0"
@@ -112,6 +121,26 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "capstone"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a"
+dependencies = [
+ "capstone-sys",
+ "libc",
+]
+
+[[package]]
+name = "capstone-sys"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0"
+dependencies = [
+ "cc",
+ "libc",
+]
+
 [[package]]
 name = "cargo-platform"
 version = "0.1.9"
@@ -150,6 +179,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "chrono"
 version = "0.4.40"
@@ -224,7 +259,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
 dependencies = [
  "lazy_static",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -243,7 +278,7 @@ dependencies = [
  "libc",
  "once_cell",
  "unicode-width 0.2.0",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -298,7 +333,7 @@ dependencies = [
  "libc",
  "option-ext",
  "redox_users",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -314,7 +349,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -333,6 +368,12 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -400,6 +441,25 @@ dependencies = [
  "generic-array",
 ]
 
+[[package]]
+name = "ipc-channel"
+version = "0.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fb8251fb7bcd9ccd3725ed8deae9fe7db8e586495c9eb5b0c52e6233e5e75ea"
+dependencies = [
+ "bincode",
+ "crossbeam-channel",
+ "fnv",
+ "lazy_static",
+ "libc",
+ "mio",
+ "rand 0.8.5",
+ "serde",
+ "tempfile",
+ "uuid",
+ "windows",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.15"
@@ -533,30 +593,58 @@ dependencies = [
  "adler",
 ]
 
+[[package]]
+name = "mio"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
+dependencies = [
+ "libc",
+ "log",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "miri"
 version = "0.1.0"
 dependencies = [
  "aes",
  "bitflags",
+ "capstone",
  "chrono",
  "chrono-tz",
  "colored",
  "directories",
  "getrandom 0.3.2",
+ "ipc-channel",
  "libc",
  "libffi",
  "libloading",
  "measureme",
+ "nix",
  "rand 0.9.0",
  "regex",
  "rustc_version",
+ "serde",
  "smallvec",
  "tempfile",
  "tikv-jemalloc-sys",
  "ui_test",
 ]
 
+[[package]]
+name = "nix"
+version = "0.30.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "cfg_aliases",
+ "libc",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -748,6 +836,8 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
  "rand_core 0.6.4",
 ]
 
@@ -757,11 +847,21 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
 dependencies = [
- "rand_chacha",
+ "rand_chacha 0.9.0",
  "rand_core 0.9.3",
  "zerocopy",
 ]
 
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
+]
+
 [[package]]
 name = "rand_chacha"
 version = "0.9.0"
@@ -777,6 +877,9 @@ name = "rand_core"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.15",
+]
 
 [[package]]
 name = "rand_core"
@@ -879,7 +982,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -993,7 +1096,7 @@ dependencies = [
  "getrandom 0.3.2",
  "once_cell",
  "rustix",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -1147,6 +1250,15 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
 
+[[package]]
+name = "uuid"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
+dependencies = [
+ "getrandom 0.3.2",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
@@ -1241,6 +1353,79 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "windows"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
+dependencies = [
+ "windows-core",
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-result",
+ "windows-strings",
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result",
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.59.0"
diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml
index a314488bb2533..0b4b8e26bb8d0 100644
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@@ -40,6 +40,12 @@ libc = "0.2"
 libffi = "4.0.0"
 libloading = "0.8"
 
+[target.'cfg(target_os = "linux")'.dependencies]
+nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"] }
+ipc-channel = "0.19.0"
+serde = { version = "1.0.219", features = ["derive"] }
+capstone = "0.13"
+
 [dev-dependencies]
 ui_test = "0.29.1"
 colored = "2"
diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md
index 5554c7975ff3b..e609fb9b6f9e4 100644
--- a/src/tools/miri/README.md
+++ b/src/tools/miri/README.md
@@ -419,6 +419,9 @@ to Miri failing to detect cases of undefined behavior in a program.
   Finally, the flag is **unsound** in the sense that Miri stops tracking details such as
   initialization and provenance on memory shared with native code, so it is easily possible to write
   code that has UB which is missed by Miri.
+* `-Zmiri-native-lib-enable-tracing` enables the WIP detailed tracing mode for invoking native code.
+  Note that this flag is only meaningful on Linux systems; other Unixes (currently) do not support
+  tracing mode.
 * `-Zmiri-measureme=<name>` enables `measureme` profiling for the interpreted program.
    This can be used to find which parts of your program are executing slowly under Miri.
    The profile is written out to a file inside a directory called `<name>`, and can be processed
@@ -592,7 +595,7 @@ Definite bugs found:
 * [Occasional memory leak in `std::mpsc` channels](https://github.com/rust-lang/rust/issues/121582) (original code in [crossbeam](https://github.com/crossbeam-rs/crossbeam/pull/1084))
 * [Weak-memory-induced memory leak in Windows thread-local storage](https://github.com/rust-lang/rust/pull/124281)
 * [A bug in the new `RwLock::downgrade` implementation](https://rust-lang.zulipchat.com/#narrow/channel/269128-miri/topic/Miri.20error.20library.20test) (caught by Miri before it landed in the Rust repo)
-* [Mockall reading unintialized memory when mocking `std::io::Read::read`, even if all expectations are satisfied](https://github.com/asomers/mockall/issues/647) (caught by Miri running Tokio's test suite)
+* [Mockall reading uninitialized memory when mocking `std::io::Read::read`, even if all expectations are satisfied](https://github.com/asomers/mockall/issues/647) (caught by Miri running Tokio's test suite)
 * [`ReentrantLock` not correctly dealing with reuse of addresses for TLS storage of different threads](https://github.com/rust-lang/rust/pull/141248)
 
 Violations of [Stacked Borrows] found that are likely bugs (but Stacked Borrows is currently just an experiment):
diff --git a/src/tools/miri/miri-script/src/commands.rs b/src/tools/miri/miri-script/src/commands.rs
index 86362145d4759..d18e9c7791fc2 100644
--- a/src/tools/miri/miri-script/src/commands.rs
+++ b/src/tools/miri/miri-script/src/commands.rs
@@ -32,6 +32,7 @@ impl MiriEnv {
         &mut self,
         quiet: bool,
         target: Option<impl AsRef<OsStr>>,
+        features: &[String],
     ) -> Result<PathBuf> {
         if let Some(miri_sysroot) = self.sh.var_os("MIRI_SYSROOT") {
             // Sysroot already set, use that.
@@ -39,8 +40,8 @@ impl MiriEnv {
         }
 
         // Make sure everything is built. Also Miri itself.
-        self.build(".", &[], quiet)?;
-        self.build("cargo-miri", &[], quiet)?;
+        self.build(".", features, &[], quiet)?;
+        self.build("cargo-miri", &[], &[], quiet)?;
 
         let target_flag = if let Some(target) = &target {
             vec![OsStr::new("--target"), target.as_ref()]
@@ -58,7 +59,7 @@ impl MiriEnv {
         }
 
         let mut cmd = self
-            .cargo_cmd("cargo-miri", "run")
+            .cargo_cmd("cargo-miri", "run", &[])
             .arg("--quiet")
             .arg("--")
             .args(&["miri", "setup", "--print-sysroot"])
@@ -90,7 +91,9 @@ impl Command {
             Self::fmt(vec![])?;
         }
         if auto_clippy {
-            Self::clippy(vec![])?;
+            // no features for auto actions, see
+            // https://github.com/rust-lang/miri/pull/4396#discussion_r2149654845
+            Self::clippy(vec![], vec![])?;
         }
 
         Ok(())
@@ -175,16 +178,16 @@ impl Command {
         }
         // Then run the actual command.
         match self {
-            Command::Install { flags } => Self::install(flags),
-            Command::Build { flags } => Self::build(flags),
-            Command::Check { flags } => Self::check(flags),
-            Command::Test { bless, flags, target, coverage } =>
-                Self::test(bless, flags, target, coverage),
-            Command::Run { dep, verbose, target, edition, flags } =>
-                Self::run(dep, verbose, target, edition, flags),
-            Command::Doc { flags } => Self::doc(flags),
+            Command::Install { features, flags } => Self::install(features, flags),
+            Command::Build { features, flags } => Self::build(features, flags),
+            Command::Check { features, flags } => Self::check(features, flags),
+            Command::Test { bless, target, coverage, features, flags } =>
+                Self::test(bless, target, coverage, features, flags),
+            Command::Run { dep, verbose, target, edition, features, flags } =>
+                Self::run(dep, verbose, target, edition, features, flags),
+            Command::Doc { features, flags } => Self::doc(features, flags),
             Command::Fmt { flags } => Self::fmt(flags),
-            Command::Clippy { flags } => Self::clippy(flags),
+            Command::Clippy { features, flags } => Self::clippy(features, flags),
             Command::Bench { target, no_install, save_baseline, load_baseline, benches } =>
                 Self::bench(target, no_install, save_baseline, load_baseline, benches),
             Command::Toolchain { flags } => Self::toolchain(flags),
@@ -494,7 +497,7 @@ impl Command {
 
         if !no_install {
             // Make sure we have an up-to-date Miri installed and selected the right toolchain.
-            Self::install(vec![])?;
+            Self::install(vec![], vec![])?;
         }
         let results_json_dir = if save_baseline.is_some() || load_baseline.is_some() {
             Some(TempDir::new()?)
@@ -601,47 +604,48 @@ impl Command {
         Ok(())
     }
 
-    fn install(flags: Vec<String>) -> Result<()> {
+    fn install(features: Vec<String>, flags: Vec<String>) -> Result<()> {
         let e = MiriEnv::new()?;
-        e.install_to_sysroot(e.miri_dir.clone(), &flags)?;
-        e.install_to_sysroot(path!(e.miri_dir / "cargo-miri"), &flags)?;
+        e.install_to_sysroot(".", &features, &flags)?;
+        e.install_to_sysroot("cargo-miri", &[], &flags)?;
         Ok(())
     }
 
-    fn build(flags: Vec<String>) -> Result<()> {
+    fn build(features: Vec<String>, flags: Vec<String>) -> Result<()> {
         let e = MiriEnv::new()?;
-        e.build(".", &flags, /* quiet */ false)?;
-        e.build("cargo-miri", &flags, /* quiet */ false)?;
+        e.build(".", &features, &flags, /* quiet */ false)?;
+        e.build("cargo-miri", &[], &flags, /* quiet */ false)?;
         Ok(())
     }
 
-    fn check(flags: Vec<String>) -> Result<()> {
+    fn check(features: Vec<String>, flags: Vec<String>) -> Result<()> {
         let e = MiriEnv::new()?;
-        e.check(".", &flags)?;
-        e.check("cargo-miri", &flags)?;
+        e.check(".", &features, &flags)?;
+        e.check("cargo-miri", &[], &flags)?;
         Ok(())
     }
 
-    fn doc(flags: Vec<String>) -> Result<()> {
+    fn doc(features: Vec<String>, flags: Vec<String>) -> Result<()> {
         let e = MiriEnv::new()?;
-        e.doc(".", &flags)?;
-        e.doc("cargo-miri", &flags)?;
+        e.doc(".", &features, &flags)?;
+        e.doc("cargo-miri", &[], &flags)?;
         Ok(())
     }
 
-    fn clippy(flags: Vec<String>) -> Result<()> {
+    fn clippy(features: Vec<String>, flags: Vec<String>) -> Result<()> {
         let e = MiriEnv::new()?;
-        e.clippy(".", &flags)?;
-        e.clippy("cargo-miri", &flags)?;
-        e.clippy("miri-script", &flags)?;
+        e.clippy(".", &features, &flags)?;
+        e.clippy("cargo-miri", &[], &flags)?;
+        e.clippy("miri-script", &[], &flags)?;
         Ok(())
     }
 
     fn test(
         bless: bool,
-        mut flags: Vec<String>,
         target: Option<String>,
         coverage: bool,
+        features: Vec<String>,
+        mut flags: Vec<String>,
     ) -> Result<()> {
         let mut e = MiriEnv::new()?;
 
@@ -652,7 +656,7 @@ impl Command {
         }
 
         // Prepare a sysroot. (Also builds cargo-miri, which we need.)
-        e.build_miri_sysroot(/* quiet */ false, target.as_deref())?;
+        e.build_miri_sysroot(/* quiet */ false, target.as_deref(), &features)?;
 
         // Forward information to test harness.
         if bless {
@@ -672,10 +676,10 @@ impl Command {
 
         // Then test, and let caller control flags.
         // Only in root project as `cargo-miri` has no tests.
-        e.test(".", &flags)?;
+        e.test(".", &features, &flags)?;
 
         if let Some(coverage) = &coverage {
-            coverage.show_coverage_report(&e)?;
+            coverage.show_coverage_report(&e, &features)?;
         }
 
         Ok(())
@@ -686,14 +690,17 @@ impl Command {
         verbose: bool,
         target: Option<String>,
         edition: Option<String>,
+        features: Vec<String>,
         flags: Vec<String>,
     ) -> Result<()> {
         let mut e = MiriEnv::new()?;
 
         // Preparation: get a sysroot, and get the miri binary.
-        let miri_sysroot = e.build_miri_sysroot(/* quiet */ !verbose, target.as_deref())?;
-        let miri_bin =
-            e.build_get_binary(".").context("failed to get filename of miri executable")?;
+        let miri_sysroot =
+            e.build_miri_sysroot(/* quiet */ !verbose, target.as_deref(), &features)?;
+        let miri_bin = e
+            .build_get_binary(".", &features)
+            .context("failed to get filename of miri executable")?;
 
         // More flags that we will pass before `flags`
         // (because `flags` may contain `--`).
@@ -718,7 +725,7 @@ impl Command {
         // The basic command that executes the Miri driver.
         let mut cmd = if dep {
             // We invoke the test suite as that has all the logic for running with dependencies.
-            e.cargo_cmd(".", "test")
+            e.cargo_cmd(".", "test", &features)
                 .args(&["--test", "ui"])
                 .args(quiet_flag)
                 .arg("--")
diff --git a/src/tools/miri/miri-script/src/coverage.rs b/src/tools/miri/miri-script/src/coverage.rs
index 8cafcea0d16f3..cdf2bbb93304f 100644
--- a/src/tools/miri/miri-script/src/coverage.rs
+++ b/src/tools/miri/miri-script/src/coverage.rs
@@ -49,7 +49,7 @@ impl CoverageReport {
 
     /// show_coverage_report will print coverage information using the artifact
     /// files in `self.path`.
-    pub fn show_coverage_report(&self, e: &MiriEnv) -> Result<()> {
+    pub fn show_coverage_report(&self, e: &MiriEnv, features: &[String]) -> Result<()> {
         let profraw_files = self.profraw_files()?;
 
         let profdata_bin = path!(e.libdir / ".." / "bin" / "llvm-profdata");
@@ -63,8 +63,9 @@ impl CoverageReport {
 
         // Create the coverage report.
         let cov_bin = path!(e.libdir / ".." / "bin" / "llvm-cov");
-        let miri_bin =
-            e.build_get_binary(".").context("failed to get filename of miri executable")?;
+        let miri_bin = e
+            .build_get_binary(".", features)
+            .context("failed to get filename of miri executable")?;
         cmd!(
             e.sh,
             "{cov_bin} report --instr-profile={merged_file} --object {miri_bin} --sources src/"
diff --git a/src/tools/miri/miri-script/src/main.rs b/src/tools/miri/miri-script/src/main.rs
index 6aab2f79bd78c..673d658cf1d5d 100644
--- a/src/tools/miri/miri-script/src/main.rs
+++ b/src/tools/miri/miri-script/src/main.rs
@@ -14,24 +14,40 @@ pub enum Command {
     /// Sets up the rpath such that the installed binary should work in any
     /// working directory.
     Install {
+        /// Pass features to cargo invocations on the "miri" crate in the root. This option does
+        /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri".
+        #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)]
+        features: Vec<String>,
         /// Flags that are passed through to `cargo install`.
         #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
         flags: Vec<String>,
     },
     /// Build Miri.
     Build {
+        /// Pass features to cargo invocations on the "miri" crate in the root. This option does
+        /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri".
+        #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)]
+        features: Vec<String>,
         /// Flags that are passed through to `cargo build`.
         #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
         flags: Vec<String>,
     },
     /// Check Miri.
     Check {
+        /// Pass features to cargo invocations on the "miri" crate in the root. This option does
+        /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri".
+        #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)]
+        features: Vec<String>,
         /// Flags that are passed through to `cargo check`.
         #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
         flags: Vec<String>,
     },
     /// Check Miri with Clippy.
     Clippy {
+        /// Pass features to cargo invocations on the "miri" crate in the root. This option does
+        /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri".
+        #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)]
+        features: Vec<String>,
         /// Flags that are passed through to `cargo clippy`.
         #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
         flags: Vec<String>,
@@ -47,6 +63,10 @@ pub enum Command {
         /// Produce coverage report.
         #[arg(long)]
         coverage: bool,
+        /// Pass features to cargo invocations on the "miri" crate in the root. This option does
+        /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri".
+        #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)]
+        features: Vec<String>,
         /// Flags that are passed through to the test harness.
         #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
         flags: Vec<String>,
@@ -67,6 +87,10 @@ pub enum Command {
         /// The Rust edition.
         #[arg(long)]
         edition: Option<String>,
+        /// Pass features to cargo invocations on the "miri" crate in the root. This option does
+        /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri".
+        #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)]
+        features: Vec<String>,
         /// Flags that are passed through to `miri`.
         ///
         /// The flags set in `MIRIFLAGS` are added in front of these flags.
@@ -75,6 +99,10 @@ pub enum Command {
     },
     /// Build documentation.
     Doc {
+        /// Pass features to cargo invocations on the "miri" crate in the root. This option does
+        /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri".
+        #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)]
+        features: Vec<String>,
         /// Flags that are passed through to `cargo doc`.
         #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
         flags: Vec<String>,
@@ -144,13 +172,13 @@ impl Command {
         }
 
         match self {
-            Self::Install { flags }
-            | Self::Build { flags }
-            | Self::Check { flags }
-            | Self::Doc { flags }
+            Self::Install { flags, .. }
+            | Self::Build { flags, .. }
+            | Self::Check { flags, .. }
+            | Self::Doc { flags, .. }
             | Self::Fmt { flags }
             | Self::Toolchain { flags }
-            | Self::Clippy { flags }
+            | Self::Clippy { flags, .. }
             | Self::Run { flags, .. }
             | Self::Test { flags, .. } => {
                 flags.extend(remainder);
diff --git a/src/tools/miri/miri-script/src/util.rs b/src/tools/miri/miri-script/src/util.rs
index 5c2a055990fdd..c100cf195ba49 100644
--- a/src/tools/miri/miri-script/src/util.rs
+++ b/src/tools/miri/miri-script/src/util.rs
@@ -26,6 +26,12 @@ pub fn flagsplit(flags: &str) -> Vec<String> {
     flags.split(' ').map(str::trim).filter(|s| !s.is_empty()).map(str::to_string).collect()
 }
 
+/// Turns a list of features into a list of arguments to pass to cargo invocations.
+/// Each feature will go in its own argument, e.g. "--features feat1 --features feat2".
+fn features_to_args(features: &[String]) -> impl IntoIterator<Item = &str> {
+    features.iter().flat_map(|feat| ["--features", feat])
+}
+
 /// Some extra state we track for building Miri, such as the right RUSTFLAGS.
 pub struct MiriEnv {
     /// miri_dir is the root of the miri repository checkout we are working in.
@@ -116,44 +122,70 @@ impl MiriEnv {
         Ok(MiriEnv { miri_dir, toolchain, sh, sysroot, cargo_extra_flags, libdir })
     }
 
-    pub fn cargo_cmd(&self, crate_dir: impl AsRef<OsStr>, cmd: &str) -> Cmd<'_> {
+    /// Make sure the `features` you pass here exist for the specified `crate_dir`. For example, the
+    /// "--features" parameter of [crate::Command]s is intended only for the "miri" root crate.
+    pub fn cargo_cmd(
+        &self,
+        crate_dir: impl AsRef<OsStr>,
+        cmd: &str,
+        features: &[String],
+    ) -> Cmd<'_> {
         let MiriEnv { toolchain, cargo_extra_flags, .. } = self;
         let manifest_path = path!(self.miri_dir / crate_dir.as_ref() / "Cargo.toml");
+        let features = features_to_args(features);
         cmd!(
             self.sh,
-            "cargo +{toolchain} {cmd} {cargo_extra_flags...} --manifest-path {manifest_path}"
+            "cargo +{toolchain} {cmd} {cargo_extra_flags...} --manifest-path {manifest_path} {features...}"
         )
     }
 
+    /// Make sure the `features` you pass here exist for the specified `crate_dir`. For example, the
+    /// "--features" parameter of [crate::Command]s is intended only for the "miri" root crate.
     pub fn install_to_sysroot(
         &self,
-        path: impl AsRef<OsStr>,
+        crate_dir: impl AsRef<OsStr>,
+        features: &[String],
         args: impl IntoIterator<Item = impl AsRef<OsStr>>,
     ) -> Result<()> {
         let MiriEnv { sysroot, toolchain, cargo_extra_flags, .. } = self;
-        let path = path!(self.miri_dir / path.as_ref());
+        let path = path!(self.miri_dir / crate_dir.as_ref());
+        let features = features_to_args(features);
         // Install binaries to the miri toolchain's `sysroot` so they do not interact with other toolchains.
         // (Not using `cargo_cmd` as `install` is special and doesn't use `--manifest-path`.)
-        cmd!(self.sh, "cargo +{toolchain} install {cargo_extra_flags...} --path {path} --force --root {sysroot} {args...}").run()?;
+        cmd!(self.sh, "cargo +{toolchain} install {cargo_extra_flags...} --path {path} --force --root {sysroot} {features...} {args...}").run()?;
         Ok(())
     }
 
-    pub fn build(&self, crate_dir: impl AsRef<OsStr>, args: &[String], quiet: bool) -> Result<()> {
+    pub fn build(
+        &self,
+        crate_dir: impl AsRef<OsStr>,
+        features: &[String],
+        args: &[String],
+        quiet: bool,
+    ) -> Result<()> {
         let quiet_flag = if quiet { Some("--quiet") } else { None };
         // We build all targets, since building *just* the bin target doesnot include
         // `dev-dependencies` and that changes feature resolution. This also gets us more
         // parallelism in `./miri test` as we build Miri and its tests together.
-        let mut cmd =
-            self.cargo_cmd(crate_dir, "build").args(&["--all-targets"]).args(quiet_flag).args(args);
+        let mut cmd = self
+            .cargo_cmd(crate_dir, "build", features)
+            .args(&["--all-targets"])
+            .args(quiet_flag)
+            .args(args);
         cmd.set_quiet(quiet);
         cmd.run()?;
         Ok(())
     }
 
     /// Returns the path to the main crate binary. Assumes that `build` has been called before.
-    pub fn build_get_binary(&self, crate_dir: impl AsRef<OsStr>) -> Result<PathBuf> {
-        let cmd =
-            self.cargo_cmd(crate_dir, "build").args(&["--all-targets", "--message-format=json"]);
+    pub fn build_get_binary(
+        &self,
+        crate_dir: impl AsRef<OsStr>,
+        features: &[String],
+    ) -> Result<PathBuf> {
+        let cmd = self
+            .cargo_cmd(crate_dir, "build", features)
+            .args(&["--all-targets", "--message-format=json"]);
         let output = cmd.output()?;
         let mut bin = None;
         for line in output.stdout.lines() {
@@ -174,23 +206,43 @@ impl MiriEnv {
         bin.ok_or_else(|| anyhow!("found no binary in cargo output"))
     }
 
-    pub fn check(&self, crate_dir: impl AsRef<OsStr>, args: &[String]) -> Result<()> {
-        self.cargo_cmd(crate_dir, "check").arg("--all-targets").args(args).run()?;
+    pub fn check(
+        &self,
+        crate_dir: impl AsRef<OsStr>,
+        features: &[String],
+        args: &[String],
+    ) -> Result<()> {
+        self.cargo_cmd(crate_dir, "check", features).arg("--all-targets").args(args).run()?;
         Ok(())
     }
 
-    pub fn doc(&self, crate_dir: impl AsRef<OsStr>, args: &[String]) -> Result<()> {
-        self.cargo_cmd(crate_dir, "doc").args(args).run()?;
+    pub fn doc(
+        &self,
+        crate_dir: impl AsRef<OsStr>,
+        features: &[String],
+        args: &[String],
+    ) -> Result<()> {
+        self.cargo_cmd(crate_dir, "doc", features).args(args).run()?;
         Ok(())
     }
 
-    pub fn clippy(&self, crate_dir: impl AsRef<OsStr>, args: &[String]) -> Result<()> {
-        self.cargo_cmd(crate_dir, "clippy").arg("--all-targets").args(args).run()?;
+    pub fn clippy(
+        &self,
+        crate_dir: impl AsRef<OsStr>,
+        features: &[String],
+        args: &[String],
+    ) -> Result<()> {
+        self.cargo_cmd(crate_dir, "clippy", features).arg("--all-targets").args(args).run()?;
         Ok(())
     }
 
-    pub fn test(&self, crate_dir: impl AsRef<OsStr>, args: &[String]) -> Result<()> {
-        self.cargo_cmd(crate_dir, "test").args(args).run()?;
+    pub fn test(
+        &self,
+        crate_dir: impl AsRef<OsStr>,
+        features: &[String],
+        args: &[String],
+    ) -> Result<()> {
+        self.cargo_cmd(crate_dir, "test", features).args(args).run()?;
         Ok(())
     }
 
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 90c6e31e9d119..b2ab17fd76ad0 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-0cbc0764380630780a275c437260e4d4d5f28c92
+d41e12f1f4e4884c356f319b881921aa37040de5
diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs
index 3a7879f372abc..a7bb9b4da7580 100644
--- a/src/tools/miri/src/alloc/isolated_alloc.rs
+++ b/src/tools/miri/src/alloc/isolated_alloc.rs
@@ -1,5 +1,7 @@
-use std::alloc::{self, Layout};
+use std::alloc::Layout;
+use std::ptr::NonNull;
 
+use nix::sys::mman;
 use rustc_index::bit_set::DenseBitSet;
 
 /// How many bytes of memory each bit in the bitset represents.
@@ -11,8 +13,8 @@ const COMPRESSION_FACTOR: usize = 4;
 pub struct IsolatedAlloc {
     /// Pointers to page-aligned memory that has been claimed by the allocator.
     /// Every pointer here must point to a page-sized allocation claimed via
-    /// the global allocator. These pointers are used for "small" allocations.
-    page_ptrs: Vec<*mut u8>,
+    /// mmap. These pointers are used for "small" allocations.
+    page_ptrs: Vec<NonNull<u8>>,
     /// Metadata about which bytes have been allocated on each page. The length
     /// of this vector must be the same as that of `page_ptrs`, and the domain
     /// size of the bitset must be exactly `page_size / COMPRESSION_FACTOR`.
@@ -24,7 +26,7 @@ pub struct IsolatedAlloc {
     page_infos: Vec<DenseBitSet<usize>>,
     /// Pointers to multiple-page-sized allocations. These must also be page-aligned,
     /// with their size stored as the second element of the vector.
-    huge_ptrs: Vec<(*mut u8, usize)>,
+    huge_ptrs: Vec<(NonNull<u8>, usize)>,
     /// The host (not emulated) page size.
     page_size: usize,
 }
@@ -52,20 +54,26 @@ impl IsolatedAlloc {
         Layout::from_size_align(size, align).unwrap()
     }
 
-    /// Returns the layout used to allocate the pages that hold small allocations.
+    /// For greater-than-page-sized allocations, returns the allocation size we need to request
+    /// including the slack we need to satisfy the alignment request.
     #[inline]
-    fn page_layout(&self) -> Layout {
-        Layout::from_size_align(self.page_size, self.page_size).unwrap()
-    }
-
-    /// If the allocation is greater than a page, then round to the nearest page #.
-    #[inline]
-    fn huge_normalized_layout(layout: Layout, page_size: usize) -> Layout {
+    fn huge_normalized_layout(&self, layout: Layout) -> usize {
         // Allocate in page-sized chunks
-        let size = layout.size().next_multiple_of(page_size);
+        let size = layout.size().next_multiple_of(self.page_size);
         // And make sure the align is at least one page
-        let align = std::cmp::max(layout.align(), page_size);
-        Layout::from_size_align(size, align).unwrap()
+        let align = std::cmp::max(layout.align(), self.page_size);
+        // pg_count gives us the # of pages needed to satisfy the size. For
+        // align > page_size where align = n * page_size, a sufficently-aligned
+        // address must exist somewhere in the range of
+        // some_page_aligned_address..some_page_aligned_address + (n-1) * page_size
+        // (since if some_page_aligned_address + n * page_size is sufficently aligned,
+        // then so is some_page_aligned_address itself per the definition of n, so we
+        // can avoid using that 1 extra page).
+        // Thus we allocate n-1 extra pages
+        let pg_count = size.div_ceil(self.page_size);
+        let extra_pages = align.strict_div(self.page_size).saturating_sub(1);
+
+        pg_count.strict_add(extra_pages).strict_mul(self.page_size)
     }
 
     /// Determined whether a given normalized (size, align) should be sent to
@@ -78,7 +86,7 @@ impl IsolatedAlloc {
     /// Allocates memory as described in `Layout`. This memory should be deallocated
     /// by calling `dealloc` on this same allocator.
     ///
-    /// SAFETY: See `alloc::alloc()`
+    /// SAFETY: See `alloc::alloc()`.
     pub unsafe fn alloc(&mut self, layout: Layout) -> *mut u8 {
         // SAFETY: Upheld by caller
         unsafe { self.allocate(layout, false) }
@@ -86,7 +94,7 @@ impl IsolatedAlloc {
 
     /// Same as `alloc`, but zeroes out the memory.
     ///
-    /// SAFETY: See `alloc::alloc_zeroed()`
+    /// SAFETY: See `alloc::alloc_zeroed()`.
     pub unsafe fn alloc_zeroed(&mut self, layout: Layout) -> *mut u8 {
         // SAFETY: Upheld by caller
         unsafe { self.allocate(layout, true) }
@@ -95,14 +103,13 @@ impl IsolatedAlloc {
     /// Abstracts over the logic of `alloc_zeroed` vs `alloc`, as determined by
     /// the `zeroed` argument.
     ///
-    /// SAFETY: See `alloc::alloc()`, with the added restriction that `page_size`
-    /// corresponds to the host pagesize.
+    /// SAFETY: See `alloc::alloc()`.
     unsafe fn allocate(&mut self, layout: Layout, zeroed: bool) -> *mut u8 {
         let layout = IsolatedAlloc::normalized_layout(layout);
         if self.is_huge_alloc(&layout) {
             // SAFETY: Validity of `layout` upheld by caller; we checked that
             // the size and alignment are appropriate for being a huge alloc
-            unsafe { self.alloc_huge(layout, zeroed) }
+            unsafe { self.alloc_huge(layout) }
         } else {
             for (&mut page, pinfo) in std::iter::zip(&mut self.page_ptrs, &mut self.page_infos) {
                 // SAFETY: The value in `self.page_size` is used to allocate
@@ -132,7 +139,7 @@ impl IsolatedAlloc {
     unsafe fn alloc_small(
         page_size: usize,
         layout: Layout,
-        page: *mut u8,
+        page: NonNull<u8>,
         pinfo: &mut DenseBitSet<usize>,
         zeroed: bool,
     ) -> Option<*mut u8> {
@@ -159,7 +166,7 @@ impl IsolatedAlloc {
                         // zero out, even if we allocated more
                         ptr.write_bytes(0, layout.size());
                     }
-                    return Some(ptr);
+                    return Some(ptr.as_ptr());
                 }
             }
         }
@@ -167,26 +174,50 @@ impl IsolatedAlloc {
     }
 
     /// Expands the available memory pool by adding one page.
-    fn add_page(&mut self) -> (*mut u8, &mut DenseBitSet<usize>) {
-        // SAFETY: The system page size, which is the layout size, cannot be 0
-        let page_ptr = unsafe { alloc::alloc(self.page_layout()) };
+    fn add_page(&mut self) -> (NonNull<u8>, &mut DenseBitSet<usize>) {
+        // SAFETY: mmap is always safe to call when requesting anonymous memory
+        let page_ptr = unsafe {
+            libc::mmap(
+                std::ptr::null_mut(),
+                self.page_size,
+                libc::PROT_READ | libc::PROT_WRITE,
+                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
+                -1,
+                0,
+            )
+            .cast::<u8>()
+        };
+        assert_ne!(page_ptr.addr(), usize::MAX, "mmap failed");
         // `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page.
-        assert!(self.page_size % COMPRESSION_FACTOR == 0);
+        assert!(self.page_size.is_multiple_of(COMPRESSION_FACTOR));
         self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR));
-        self.page_ptrs.push(page_ptr);
-        (page_ptr, self.page_infos.last_mut().unwrap())
+        self.page_ptrs.push(NonNull::new(page_ptr).unwrap());
+        (NonNull::new(page_ptr).unwrap(), self.page_infos.last_mut().unwrap())
     }
 
     /// Allocates in multiples of one page on the host system.
+    /// Will always be zeroed.
     ///
     /// SAFETY: Same as `alloc()`.
-    unsafe fn alloc_huge(&mut self, layout: Layout, zeroed: bool) -> *mut u8 {
-        let layout = IsolatedAlloc::huge_normalized_layout(layout, self.page_size);
-        // SAFETY: Upheld by caller
-        let ret =
-            unsafe { if zeroed { alloc::alloc_zeroed(layout) } else { alloc::alloc(layout) } };
-        self.huge_ptrs.push((ret, layout.size()));
-        ret
+    unsafe fn alloc_huge(&mut self, layout: Layout) -> *mut u8 {
+        let size = self.huge_normalized_layout(layout);
+        // SAFETY: mmap is always safe to call when requesting anonymous memory
+        let ret = unsafe {
+            libc::mmap(
+                std::ptr::null_mut(),
+                size,
+                libc::PROT_READ | libc::PROT_WRITE,
+                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
+                -1,
+                0,
+            )
+            .cast::<u8>()
+        };
+        assert_ne!(ret.addr(), usize::MAX, "mmap failed");
+        self.huge_ptrs.push((NonNull::new(ret).unwrap(), size));
+        // huge_normalized_layout ensures that we've overallocated enough space
+        // for this to be valid.
+        ret.map_addr(|a| a.next_multiple_of(layout.align()))
     }
 
     /// Deallocates a pointer from this allocator.
@@ -215,15 +246,15 @@ impl IsolatedAlloc {
                 let page_ptr = self.page_ptrs.remove(idx);
                 // SAFETY: We checked that there are no outstanding allocations
                 // from us pointing to this page, and we know it was allocated
-                // with this layout
+                // in add_page as exactly a single page.
                 unsafe {
-                    alloc::dealloc(page_ptr, self.page_layout());
+                    assert_eq!(libc::munmap(page_ptr.as_ptr().cast(), self.page_size), 0);
                 }
             }
         }
     }
 
-    /// Returns the index of the page that this was deallocated from
+    /// Returns the index of the page that this was deallocated from.
     ///
     /// SAFETY: the pointer must have been allocated with `alloc_small`.
     unsafe fn dealloc_small(&mut self, ptr: *mut u8, layout: Layout) -> usize {
@@ -236,7 +267,7 @@ impl IsolatedAlloc {
         // This could be made faster if the list was sorted -- the allocator isn't fully optimized at the moment.
         let pinfo = std::iter::zip(&mut self.page_ptrs, &mut self.page_infos)
             .enumerate()
-            .find(|(_, (page, _))| page.addr() == page_addr);
+            .find(|(_, (page, _))| page.addr().get() == page_addr);
         let Some((idx_of_pinfo, (_, pinfo))) = pinfo else {
             panic!("Freeing in an unallocated page: {ptr:?}\nHolding pages {:?}", self.page_ptrs)
         };
@@ -252,19 +283,72 @@ impl IsolatedAlloc {
     /// SAFETY: Same as `dealloc()` with the added requirement that `layout`
     /// must ask for a size larger than the host pagesize.
     unsafe fn dealloc_huge(&mut self, ptr: *mut u8, layout: Layout) {
-        let layout = IsolatedAlloc::huge_normalized_layout(layout, self.page_size);
-        // Find the pointer matching in address with the one we got
+        let size = self.huge_normalized_layout(layout);
+        // Find the huge allocation containing `ptr`.
         let idx = self
             .huge_ptrs
             .iter()
-            .position(|pg| ptr.addr() == pg.0.addr())
+            .position(|&(pg, size)| {
+                pg.addr().get() <= ptr.addr() && ptr.addr() < pg.addr().get().strict_add(size)
+            })
             .expect("Freeing unallocated pages");
         // And kick it from the list
-        self.huge_ptrs.remove(idx);
-        // SAFETY: Caller ensures validity of the layout
+        let (un_offset_ptr, size2) = self.huge_ptrs.remove(idx);
+        assert_eq!(size, size2, "got wrong layout in dealloc");
+        // SAFETY: huge_ptrs contains allocations made with mmap with the size recorded there.
         unsafe {
-            alloc::dealloc(ptr, layout);
+            let ret = libc::munmap(un_offset_ptr.as_ptr().cast(), size);
+            assert_eq!(ret, 0);
+        }
+    }
+
+    /// Returns a vector of page addresses managed by the allocator.
+    pub fn pages(&self) -> Vec<usize> {
+        let mut pages: Vec<usize> =
+            self.page_ptrs.iter().map(|p| p.expose_provenance().get()).collect();
+        for (ptr, size) in self.huge_ptrs.iter() {
+            for i in 0..size / self.page_size {
+                pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size));
+            }
+        }
+        pages
+    }
+
+    /// Protects all owned memory as `PROT_NONE`, preventing accesses.
+    ///
+    /// SAFETY: Accessing memory after this point will result in a segfault
+    /// unless it is first unprotected.
+    pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> {
+        let prot = mman::ProtFlags::PROT_NONE;
+        unsafe { self.mprotect(prot) }
+    }
+
+    /// Deprotects all owned memory by setting it to RW. Erroring here is very
+    /// likely unrecoverable, so it may panic if applying those permissions
+    /// fails.
+    pub fn unprep_ffi(&mut self) {
+        let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE;
+        unsafe {
+            self.mprotect(prot).unwrap();
+        }
+    }
+
+    /// Applies `prot` to every page managed by the allocator.
+    ///
+    /// SAFETY: Accessing memory in violation of the protection flags will
+    /// trigger a segfault.
+    unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> {
+        for &pg in &self.page_ptrs {
+            unsafe {
+                mman::mprotect(pg.cast(), self.page_size, prot)?;
+            }
+        }
+        for &(hpg, size) in &self.huge_ptrs {
+            unsafe {
+                mman::mprotect(hpg.cast(), size.next_multiple_of(self.page_size), prot)?;
+            }
         }
+        Ok(())
     }
 }
 
@@ -347,12 +431,15 @@ mod tests {
         sizes.append(&mut vec![256; 12]);
         // Give it some multi-page ones too
         sizes.append(&mut vec![32 * 1024; 4]);
+        sizes.push(4 * 1024);
 
         // Matching aligns for the sizes
         let mut aligns = vec![16; 12];
         aligns.append(&mut vec![256; 2]);
         aligns.append(&mut vec![64; 12]);
         aligns.append(&mut vec![4096; 4]);
+        // And one that requests align > page_size
+        aligns.push(64 * 1024);
 
         // Make sure we didn't mess up in the test itself!
         assert_eq!(sizes.len(), aligns.len());
diff --git a/src/tools/miri/src/alloc_addresses/reuse_pool.rs b/src/tools/miri/src/alloc_addresses/reuse_pool.rs
index ab6aaed5e3e1c..b6cc017f77230 100644
--- a/src/tools/miri/src/alloc_addresses/reuse_pool.rs
+++ b/src/tools/miri/src/alloc_addresses/reuse_pool.rs
@@ -129,7 +129,7 @@ impl ReusePool {
         let idx = rng.random_range(begin..end);
         // Remove it from the pool and return.
         let (chosen_addr, chosen_size, chosen_thread, clock) = subpool.remove(idx);
-        debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
+        debug_assert!(chosen_size >= size && chosen_addr.is_multiple_of(align.bytes()));
         debug_assert!(cross_thread_reuse || chosen_thread == thread);
         // No synchronization needed if we reused from the current thread.
         Some((chosen_addr, if chosen_thread == thread { None } else { Some(clock) }))
diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs
index 72057f812f962..b80327d9d1a44 100644
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@@ -227,10 +227,11 @@ impl rustc_driver::Callbacks for MiriCompilerCalls {
         } else {
             let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None)
                 .unwrap_or_else(|| {
+                    #[cfg(target_os = "linux")]
+                    miri::native_lib::register_retcode_sv(rustc_driver::EXIT_FAILURE);
                     tcx.dcx().abort_if_errors();
                     rustc_driver::EXIT_FAILURE
                 });
-
             std::process::exit(return_code);
         }
 
@@ -723,6 +724,8 @@ fn main() {
             } else {
                 show_error!("-Zmiri-native-lib `{}` does not exist", filename);
             }
+        } else if arg == "-Zmiri-native-lib-enable-tracing" {
+            miri_config.native_lib_enable_tracing = true;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-num-cpus=") {
             let num_cpus = param
                 .parse::<u32>()
@@ -793,6 +796,16 @@ fn main() {
 
     debug!("rustc arguments: {:?}", rustc_args);
     debug!("crate arguments: {:?}", miri_config.args);
+    #[cfg(target_os = "linux")]
+    if !miri_config.native_lib.is_empty() && miri_config.native_lib_enable_tracing {
+        // FIXME: This should display a diagnostic / warning on error
+        // SAFETY: If any other threads exist at this point (namely for the ctrlc
+        // handler), they will not interact with anything on the main rustc/Miri
+        // thread in an async-signal-unsafe way such as by accessing shared
+        // semaphores, etc.; the handler only calls `sleep()` and `exit()`, which
+        // are async-signal-safe, as is accessing atomics
+        let _ = unsafe { miri::native_lib::init_sv() };
+    }
     run_compiler_and_exit(
         &rustc_args,
         &mut MiriCompilerCalls::new(miri_config, many_seeds, genmc_config),
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
index 99171b0349ed3..a0761cb07a1da 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@@ -468,10 +468,8 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // - when `extern type` is involved we use the size of the known prefix,
         // - if the pointer is not reborrowed (raw pointer) then we override the size
         //   to do a zero-length reborrow.
-        let reborrow_size = this
-            .size_and_align_of_val(place)?
-            .map(|(size, _)| size)
-            .unwrap_or(place.layout.size);
+        let reborrow_size =
+            this.size_and_align_of_val(place)?.map(|(size, _)| size).unwrap_or(place.layout.size);
         trace!("Creating new permission: {:?} with size {:?}", new_perm, reborrow_size);
 
         // This new tag is not guaranteed to actually be used.
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs
index dcd5a6cb02305..ad0a565dfd857 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs
@@ -327,7 +327,7 @@ mod tests {
         for i in 0..1000 {
             i.hash(&mut hasher);
             let rng = hasher.finish();
-            let op = rng % 3 == 0;
+            let op = rng.is_multiple_of(3);
             let key = (rng / 2) % 50;
             let val = (rng / 100) % 1000;
             if op {
diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs
index 38b5d4c0f06ec..c8a408fd8ace8 100644
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@@ -582,88 +582,6 @@ impl<'tcx> ThreadManager<'tcx> {
         interp_ok(())
     }
 
-    /// Mark that the active thread tries to join the thread with `joined_thread_id`.
-    fn join_thread(
-        &mut self,
-        joined_thread_id: ThreadId,
-        data_race_handler: &mut GlobalDataRaceHandler,
-    ) -> InterpResult<'tcx> {
-        if self.threads[joined_thread_id].join_status == ThreadJoinStatus::Detached {
-            // On Windows this corresponds to joining on a closed handle.
-            throw_ub_format!("trying to join a detached thread");
-        }
-
-        fn after_join<'tcx>(
-            threads: &mut ThreadManager<'_>,
-            joined_thread_id: ThreadId,
-            data_race_handler: &mut GlobalDataRaceHandler,
-        ) -> InterpResult<'tcx> {
-            match data_race_handler {
-                GlobalDataRaceHandler::None => {}
-                GlobalDataRaceHandler::Vclocks(data_race) =>
-                    data_race.thread_joined(threads, joined_thread_id),
-                GlobalDataRaceHandler::Genmc(genmc_ctx) =>
-                    genmc_ctx.handle_thread_join(threads.active_thread, joined_thread_id)?,
-            }
-            interp_ok(())
-        }
-
-        // Mark the joined thread as being joined so that we detect if other
-        // threads try to join it.
-        self.threads[joined_thread_id].join_status = ThreadJoinStatus::Joined;
-        if !self.threads[joined_thread_id].state.is_terminated() {
-            trace!(
-                "{:?} blocked on {:?} when trying to join",
-                self.active_thread, joined_thread_id
-            );
-            // The joined thread is still running, we need to wait for it.
-            // Unce we get unblocked, perform the appropriate synchronization.
-            self.block_thread(
-                BlockReason::Join(joined_thread_id),
-                None,
-                callback!(
-                    @capture<'tcx> {
-                        joined_thread_id: ThreadId,
-                    }
-                    |this, unblock: UnblockKind| {
-                        assert_eq!(unblock, UnblockKind::Ready);
-                        after_join(&mut this.machine.threads, joined_thread_id, &mut this.machine.data_race)
-                    }
-                ),
-            );
-        } else {
-            // The thread has already terminated - establish happens-before
-            after_join(self, joined_thread_id, data_race_handler)?;
-        }
-        interp_ok(())
-    }
-
-    /// Mark that the active thread tries to exclusively join the thread with `joined_thread_id`.
-    /// If the thread is already joined by another thread, it will throw UB
-    fn join_thread_exclusive(
-        &mut self,
-        joined_thread_id: ThreadId,
-        data_race_handler: &mut GlobalDataRaceHandler,
-    ) -> InterpResult<'tcx> {
-        if self.threads[joined_thread_id].join_status == ThreadJoinStatus::Joined {
-            throw_ub_format!("trying to join an already joined thread");
-        }
-
-        if joined_thread_id == self.active_thread {
-            throw_ub_format!("trying to join itself");
-        }
-
-        // Sanity check `join_status`.
-        assert!(
-            self.threads
-                .iter()
-                .all(|thread| { !thread.state.is_blocked_on(BlockReason::Join(joined_thread_id)) }),
-            "this thread already has threads waiting for its termination"
-        );
-
-        self.join_thread(joined_thread_id, data_race_handler)
-    }
-
     /// Set the name of the given thread.
     pub fn set_thread_name(&mut self, thread: ThreadId, new_thread_name: Vec<u8>) {
         self.threads[thread].thread_name = Some(new_thread_name);
@@ -1114,20 +1032,102 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         this.machine.threads.detach_thread(thread_id, allow_terminated_joined)
     }
 
-    #[inline]
-    fn join_thread(&mut self, joined_thread_id: ThreadId) -> InterpResult<'tcx> {
+    /// Mark that the active thread tries to join the thread with `joined_thread_id`.
+    ///
+    /// When the join is successful (immediately, or as soon as the joined thread finishes), `success_retval` will be written to `return_dest`.
+    fn join_thread(
+        &mut self,
+        joined_thread_id: ThreadId,
+        success_retval: Scalar,
+        return_dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.machine.threads.join_thread(joined_thread_id, &mut this.machine.data_race)?;
+        let thread_mgr = &mut this.machine.threads;
+        if thread_mgr.threads[joined_thread_id].join_status == ThreadJoinStatus::Detached {
+            // On Windows this corresponds to joining on a closed handle.
+            throw_ub_format!("trying to join a detached thread");
+        }
+
+        fn after_join<'tcx>(
+            this: &mut InterpCx<'tcx, MiriMachine<'tcx>>,
+            joined_thread_id: ThreadId,
+            success_retval: Scalar,
+            return_dest: &MPlaceTy<'tcx>,
+        ) -> InterpResult<'tcx> {
+            let threads = &this.machine.threads;
+            match &mut this.machine.data_race {
+                GlobalDataRaceHandler::None => {}
+                GlobalDataRaceHandler::Vclocks(data_race) =>
+                    data_race.thread_joined(threads, joined_thread_id),
+                GlobalDataRaceHandler::Genmc(genmc_ctx) =>
+                    genmc_ctx.handle_thread_join(threads.active_thread, joined_thread_id)?,
+            }
+            this.write_scalar(success_retval, return_dest)?;
+            interp_ok(())
+        }
+
+        // Mark the joined thread as being joined so that we detect if other
+        // threads try to join it.
+        thread_mgr.threads[joined_thread_id].join_status = ThreadJoinStatus::Joined;
+        if !thread_mgr.threads[joined_thread_id].state.is_terminated() {
+            trace!(
+                "{:?} blocked on {:?} when trying to join",
+                thread_mgr.active_thread, joined_thread_id
+            );
+            // The joined thread is still running, we need to wait for it.
+            // Once we get unblocked, perform the appropriate synchronization and write the return value.
+            let dest = return_dest.clone();
+            thread_mgr.block_thread(
+                BlockReason::Join(joined_thread_id),
+                None,
+                callback!(
+                    @capture<'tcx> {
+                        joined_thread_id: ThreadId,
+                        dest: MPlaceTy<'tcx>,
+                        success_retval: Scalar,
+                    }
+                    |this, unblock: UnblockKind| {
+                        assert_eq!(unblock, UnblockKind::Ready);
+                        after_join(this, joined_thread_id, success_retval, &dest)
+                    }
+                ),
+            );
+        } else {
+            // The thread has already terminated - establish happens-before and write the return value.
+            after_join(this, joined_thread_id, success_retval, return_dest)?;
+        }
         interp_ok(())
     }
 
-    #[inline]
-    fn join_thread_exclusive(&mut self, joined_thread_id: ThreadId) -> InterpResult<'tcx> {
+    /// Mark that the active thread tries to exclusively join the thread with `joined_thread_id`.
+    /// If the thread is already joined by another thread, it will throw UB.
+    ///
+    /// When the join is successful (immediately, or as soon as the joined thread finishes), `success_retval` will be written to `return_dest`.
+    fn join_thread_exclusive(
+        &mut self,
+        joined_thread_id: ThreadId,
+        success_retval: Scalar,
+        return_dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.machine
-            .threads
-            .join_thread_exclusive(joined_thread_id, &mut this.machine.data_race)?;
-        interp_ok(())
+        let threads = &this.machine.threads.threads;
+        if threads[joined_thread_id].join_status == ThreadJoinStatus::Joined {
+            throw_ub_format!("trying to join an already joined thread");
+        }
+
+        if joined_thread_id == this.machine.threads.active_thread {
+            throw_ub_format!("trying to join itself");
+        }
+
+        // Sanity check `join_status`.
+        assert!(
+            threads
+                .iter()
+                .all(|thread| { !thread.state.is_blocked_on(BlockReason::Join(joined_thread_id)) }),
+            "this thread already has threads waiting for its termination"
+        );
+
+        this.join_thread(joined_thread_id, success_retval, return_dest)
     }
 
     #[inline]
diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs
index 54a7c1407eaa2..9ecbd31c5b9f9 100644
--- a/src/tools/miri/src/diagnostics.rs
+++ b/src/tools/miri/src/diagnostics.rs
@@ -132,7 +132,9 @@ pub enum NonHaltingDiagnostic {
     Int2Ptr {
         details: bool,
     },
-    NativeCallSharedMem,
+    NativeCallSharedMem {
+        tracing: bool,
+    },
     WeakMemoryOutdatedLoad {
         ptr: Pointer,
     },
@@ -627,7 +629,7 @@ impl<'tcx> MiriMachine<'tcx> {
             RejectedIsolatedOp(_) =>
                 ("operation rejected by isolation".to_string(), DiagLevel::Warning),
             Int2Ptr { .. } => ("integer-to-pointer cast".to_string(), DiagLevel::Warning),
-            NativeCallSharedMem =>
+            NativeCallSharedMem { .. } =>
                 ("sharing memory with a native function".to_string(), DiagLevel::Warning),
             ExternTypeReborrow =>
                 ("reborrow of reference to `extern type`".to_string(), DiagLevel::Warning),
@@ -663,7 +665,8 @@ impl<'tcx> MiriMachine<'tcx> {
             ProgressReport { .. } =>
                 format!("progress report: current operation being executed is here"),
             Int2Ptr { .. } => format!("integer-to-pointer cast"),
-            NativeCallSharedMem => format!("sharing memory with a native function called via FFI"),
+            NativeCallSharedMem { .. } =>
+                format!("sharing memory with a native function called via FFI"),
             WeakMemoryOutdatedLoad { ptr } =>
                 format!("weak memory emulation: outdated value returned from load at {ptr}"),
             ExternTypeReborrow =>
@@ -709,22 +712,41 @@ impl<'tcx> MiriMachine<'tcx> {
                 }
                 v
             }
-            NativeCallSharedMem => {
-                vec![
-                    note!(
-                        "when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory"
-                    ),
-                    note!(
-                        "in particular, Miri assumes that the native call initializes all memory it has access to"
-                    ),
-                    note!(
-                        "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory"
-                    ),
-                    note!(
-                        "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free"
-                    ),
-                ]
-            }
+            NativeCallSharedMem { tracing } =>
+                if *tracing {
+                    vec![
+                        note!(
+                            "when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis"
+                        ),
+                        note!(
+                            "in particular, Miri assumes that the native call initializes all memory it has written to"
+                        ),
+                        note!(
+                            "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory"
+                        ),
+                        note!(
+                            "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free"
+                        ),
+                        note!(
+                            "tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here"
+                        ),
+                    ]
+                } else {
+                    vec![
+                        note!(
+                            "when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory"
+                        ),
+                        note!(
+                            "in particular, Miri assumes that the native call initializes all memory it has access to"
+                        ),
+                        note!(
+                            "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory"
+                        ),
+                        note!(
+                            "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free"
+                        ),
+                    ]
+                },
             ExternTypeReborrow => {
                 assert!(self.borrow_tracker.as_ref().is_some_and(|b| {
                     matches!(
diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs
index 04fe08b30ee36..44612dae1ee54 100644
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@@ -150,6 +150,8 @@ pub struct MiriConfig {
     pub retag_fields: RetagFields,
     /// The location of the shared object files to load when calling external functions
     pub native_lib: Vec<PathBuf>,
+    /// Whether to enable the new native lib tracing system.
+    pub native_lib_enable_tracing: bool,
     /// Run a garbage collector for BorTags every N basic blocks.
     pub gc_interval: u32,
     /// The number of CPUs to be reported by miri.
@@ -199,6 +201,7 @@ impl Default for MiriConfig {
             report_progress: None,
             retag_fields: RetagFields::Yes,
             native_lib: vec![],
+            native_lib_enable_tracing: false,
             gc_interval: 10_000,
             num_cpus: 1,
             page_size: None,
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index ea56582ee482c..af0dff4bd0352 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -1,4 +1,5 @@
 #![cfg_attr(bootstrap, feature(cfg_match))]
+#![cfg_attr(bootstrap, feature(nonnull_provenance))]
 #![cfg_attr(not(bootstrap), feature(cfg_select))]
 #![feature(rustc_private)]
 #![feature(float_gamma)]
@@ -99,6 +100,11 @@ pub use rustc_const_eval::interpret::{self, AllocMap, Provenance as _};
 use rustc_middle::{bug, span_bug};
 use tracing::{info, trace};
 
+#[cfg(target_os = "linux")]
+pub mod native_lib {
+    pub use crate::shims::{init_sv, register_retcode_sv};
+}
+
 // Type aliases that set the provenance parameter.
 pub type Pointer = interpret::Pointer<Option<machine::Provenance>>;
 pub type StrictPointer = interpret::Pointer<machine::Provenance>;
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index b4d7db34efa7b..3a748c4c68726 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -1056,7 +1056,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
             // What's the offset between us and the promised alignment?
             let distance = offset.bytes().wrapping_sub(promised_offset.bytes());
             // That must also be aligned.
-            if distance % align.bytes() == 0 {
+            if distance.is_multiple_of(align.bytes()) {
                 // All looking good!
                 None
             } else {
@@ -1612,7 +1612,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
         ecx.machine.since_gc += 1;
         // Possibly report our progress. This will point at the terminator we are about to execute.
         if let Some(report_progress) = ecx.machine.report_progress {
-            if ecx.machine.basic_block_count % u64::from(report_progress) == 0 {
+            if ecx.machine.basic_block_count.is_multiple_of(u64::from(report_progress)) {
                 ecx.emit_diagnostic(NonHaltingDiagnostic::ProgressReport {
                     block_count: ecx.machine.basic_block_count,
                 });
diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs
index b498551ace34c..44e145dd5d7ca 100644
--- a/src/tools/miri/src/shims/mod.rs
+++ b/src/tools/miri/src/shims/mod.rs
@@ -21,6 +21,8 @@ pub mod time;
 pub mod tls;
 
 pub use self::files::FdTable;
+#[cfg(target_os = "linux")]
+pub use self::native_lib::trace::{init_sv, register_retcode_sv};
 pub use self::unix::{DirTable, EpollInterestTable};
 
 /// What needs to be done after emulating an item (a shim or an intrinsic) is done.
diff --git a/src/tools/miri/src/shims/native_lib.rs b/src/tools/miri/src/shims/native_lib/mod.rs
similarity index 67%
rename from src/tools/miri/src/shims/native_lib.rs
rename to src/tools/miri/src/shims/native_lib/mod.rs
index 030c2e3672169..f6f4766144e66 100644
--- a/src/tools/miri/src/shims/native_lib.rs
+++ b/src/tools/miri/src/shims/native_lib/mod.rs
@@ -1,4 +1,8 @@
 //! Implements calling functions from a native library.
+
+#[cfg(target_os = "linux")]
+pub mod trace;
+
 use std::ops::Deref;
 
 use libffi::high::call as ffi;
@@ -8,8 +12,15 @@ use rustc_middle::mir::interpret::Pointer;
 use rustc_middle::ty::{self as ty, IntTy, UintTy};
 use rustc_span::Symbol;
 
+#[cfg(target_os = "linux")]
+use self::trace::Supervisor;
 use crate::*;
 
+#[cfg(target_os = "linux")]
+type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<self::trace::messages::MemEvents>)>;
+#[cfg(not(target_os = "linux"))]
+type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<!>)>;
+
 impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
 trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
     /// Call native host function and return the output as an immediate.
@@ -19,71 +30,93 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
         dest: &MPlaceTy<'tcx>,
         ptr: CodePtr,
         libffi_args: Vec<libffi::high::Arg<'a>>,
-    ) -> InterpResult<'tcx, ImmTy<'tcx>> {
+    ) -> CallResult<'tcx> {
         let this = self.eval_context_mut();
+        #[cfg(target_os = "linux")]
+        let alloc = this.machine.allocator.as_ref().unwrap();
+
+        // SAFETY: We don't touch the machine memory past this point.
+        #[cfg(target_os = "linux")]
+        let (guard, stack_ptr) = unsafe { Supervisor::start_ffi(alloc) };
 
         // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
         // as the specified primitive integer type
-        let scalar = match dest.layout.ty.kind() {
-            // ints
-            ty::Int(IntTy::I8) => {
-                // Unsafe because of the call to native code.
-                // Because this is calling a C function it is not necessarily sound,
-                // but there is no way around this and we've checked as much as we can.
-                let x = unsafe { ffi::call::<i8>(ptr, libffi_args.as_slice()) };
-                Scalar::from_i8(x)
-            }
-            ty::Int(IntTy::I16) => {
-                let x = unsafe { ffi::call::<i16>(ptr, libffi_args.as_slice()) };
-                Scalar::from_i16(x)
-            }
-            ty::Int(IntTy::I32) => {
-                let x = unsafe { ffi::call::<i32>(ptr, libffi_args.as_slice()) };
-                Scalar::from_i32(x)
-            }
-            ty::Int(IntTy::I64) => {
-                let x = unsafe { ffi::call::<i64>(ptr, libffi_args.as_slice()) };
-                Scalar::from_i64(x)
-            }
-            ty::Int(IntTy::Isize) => {
-                let x = unsafe { ffi::call::<isize>(ptr, libffi_args.as_slice()) };
-                Scalar::from_target_isize(x.try_into().unwrap(), this)
-            }
-            // uints
-            ty::Uint(UintTy::U8) => {
-                let x = unsafe { ffi::call::<u8>(ptr, libffi_args.as_slice()) };
-                Scalar::from_u8(x)
-            }
-            ty::Uint(UintTy::U16) => {
-                let x = unsafe { ffi::call::<u16>(ptr, libffi_args.as_slice()) };
-                Scalar::from_u16(x)
-            }
-            ty::Uint(UintTy::U32) => {
-                let x = unsafe { ffi::call::<u32>(ptr, libffi_args.as_slice()) };
-                Scalar::from_u32(x)
-            }
-            ty::Uint(UintTy::U64) => {
-                let x = unsafe { ffi::call::<u64>(ptr, libffi_args.as_slice()) };
-                Scalar::from_u64(x)
-            }
-            ty::Uint(UintTy::Usize) => {
-                let x = unsafe { ffi::call::<usize>(ptr, libffi_args.as_slice()) };
-                Scalar::from_target_usize(x.try_into().unwrap(), this)
-            }
-            // Functions with no declared return type (i.e., the default return)
-            // have the output_type `Tuple([])`.
-            ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
-                unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
-                return interp_ok(ImmTy::uninit(dest.layout));
-            }
-            ty::RawPtr(..) => {
-                let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
-                let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
-                Scalar::from_pointer(ptr, this)
-            }
-            _ => throw_unsup_format!("unsupported return type for native call: {:?}", link_name),
+        let res = 'res: {
+            let scalar = match dest.layout.ty.kind() {
+                // ints
+                ty::Int(IntTy::I8) => {
+                    // Unsafe because of the call to native code.
+                    // Because this is calling a C function it is not necessarily sound,
+                    // but there is no way around this and we've checked as much as we can.
+                    let x = unsafe { ffi::call::<i8>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_i8(x)
+                }
+                ty::Int(IntTy::I16) => {
+                    let x = unsafe { ffi::call::<i16>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_i16(x)
+                }
+                ty::Int(IntTy::I32) => {
+                    let x = unsafe { ffi::call::<i32>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_i32(x)
+                }
+                ty::Int(IntTy::I64) => {
+                    let x = unsafe { ffi::call::<i64>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_i64(x)
+                }
+                ty::Int(IntTy::Isize) => {
+                    let x = unsafe { ffi::call::<isize>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_target_isize(x.try_into().unwrap(), this)
+                }
+                // uints
+                ty::Uint(UintTy::U8) => {
+                    let x = unsafe { ffi::call::<u8>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_u8(x)
+                }
+                ty::Uint(UintTy::U16) => {
+                    let x = unsafe { ffi::call::<u16>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_u16(x)
+                }
+                ty::Uint(UintTy::U32) => {
+                    let x = unsafe { ffi::call::<u32>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_u32(x)
+                }
+                ty::Uint(UintTy::U64) => {
+                    let x = unsafe { ffi::call::<u64>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_u64(x)
+                }
+                ty::Uint(UintTy::Usize) => {
+                    let x = unsafe { ffi::call::<usize>(ptr, libffi_args.as_slice()) };
+                    Scalar::from_target_usize(x.try_into().unwrap(), this)
+                }
+                // Functions with no declared return type (i.e., the default return)
+                // have the output_type `Tuple([])`.
+                ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
+                    unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
+                    break 'res interp_ok(ImmTy::uninit(dest.layout));
+                }
+                ty::RawPtr(..) => {
+                    let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
+                    let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
+                    Scalar::from_pointer(ptr, this)
+                }
+                _ =>
+                    break 'res Err(err_unsup_format!(
+                        "unsupported return type for native call: {:?}",
+                        link_name
+                    ))
+                    .into(),
+            };
+            interp_ok(ImmTy::from_scalar(scalar, dest.layout))
         };
-        interp_ok(ImmTy::from_scalar(scalar, dest.layout))
+
+        // SAFETY: We got the guard and stack pointer from start_ffi, and
+        // the allocator is the same
+        #[cfg(target_os = "linux")]
+        let events = unsafe { Supervisor::end_ffi(alloc, guard, stack_ptr) };
+        #[cfg(not(target_os = "linux"))]
+        let events = None;
+
+        interp_ok((res?, events))
     }
 
     /// Get the pointer to the function of the specified name in the shared object file,
@@ -179,7 +212,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // The first time this happens, print a warning.
                 if !this.machine.native_call_mem_warned.replace(true) {
                     // Newly set, so first time we get here.
-                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem);
+                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem {
+                        #[cfg(target_os = "linux")]
+                        tracing: self::trace::Supervisor::is_enabled(),
+                        #[cfg(not(target_os = "linux"))]
+                        tracing: false,
+                    });
                 }
 
                 this.expose_provenance(prov)?;
@@ -196,7 +234,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             .collect::<Vec<libffi::high::Arg<'_>>>();
 
         // Call the function and store output, depending on return type in the function signature.
-        let ret = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
+        let (ret, maybe_memevents) =
+            this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
+
+        if cfg!(target_os = "linux")
+            && let Some(events) = maybe_memevents
+        {
+            trace!("Registered FFI events:\n{events:#0x?}");
+        }
+
         this.write_immediate(*ret, dest)?;
         interp_ok(true)
     }
diff --git a/src/tools/miri/src/shims/native_lib/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs
new file mode 100644
index 0000000000000..4961e875c7751
--- /dev/null
+++ b/src/tools/miri/src/shims/native_lib/trace/child.rs
@@ -0,0 +1,251 @@
+use std::cell::RefCell;
+use std::rc::Rc;
+
+use ipc_channel::ipc;
+use nix::sys::{ptrace, signal};
+use nix::unistd;
+
+use super::CALLBACK_STACK_SIZE;
+use super::messages::{Confirmation, MemEvents, StartFfiInfo, TraceRequest};
+use super::parent::{ChildListener, sv_loop};
+use crate::alloc::isolated_alloc::IsolatedAlloc;
+
+static SUPERVISOR: std::sync::Mutex<Option<Supervisor>> = std::sync::Mutex::new(None);
+
+/// The main means of communication between the child and parent process,
+/// allowing the former to send requests and get info from the latter.
+pub struct Supervisor {
+    /// Sender for FFI-mode-related requests.
+    message_tx: ipc::IpcSender<TraceRequest>,
+    /// Used for synchronisation, allowing us to receive confirmation that the
+    /// parent process has handled the request from `message_tx`.
+    confirm_rx: ipc::IpcReceiver<Confirmation>,
+    /// Receiver for memory acceses that ocurred during the FFI call.
+    event_rx: ipc::IpcReceiver<MemEvents>,
+}
+
+/// Marker representing that an error occurred during creation of the supervisor.
+#[derive(Debug)]
+pub struct SvInitError;
+
+impl Supervisor {
+    /// Returns `true` if the supervisor process exists, and `false` otherwise.
+    pub fn is_enabled() -> bool {
+        SUPERVISOR.lock().unwrap().is_some()
+    }
+
+    /// Begins preparations for doing an FFI call. This should be called at
+    /// the last possible moment before entering said call. `alloc` points to
+    /// the allocator which handed out the memory used for this machine.
+    ///
+    /// As this locks the supervisor via a mutex, no other threads may enter FFI
+    /// until this one returns and its guard is dropped via `end_ffi`. The
+    /// pointer returned should be passed to `end_ffi` to avoid a memory leak.
+    ///
+    /// SAFETY: The resulting guard must be dropped *via `end_ffi`* immediately
+    /// after the desired call has concluded.
+    pub unsafe fn start_ffi(
+        alloc: &Rc<RefCell<IsolatedAlloc>>,
+    ) -> (std::sync::MutexGuard<'static, Option<Supervisor>>, Option<*mut [u8; CALLBACK_STACK_SIZE]>)
+    {
+        let mut sv_guard = SUPERVISOR.lock().unwrap();
+        // If the supervisor is not initialised for whatever reason, fast-fail.
+        // This might be desired behaviour, as even on platforms where ptracing
+        // is not implemented it enables us to enforce that only one FFI call
+        // happens at a time.
+        let Some(sv) = sv_guard.take() else {
+            return (sv_guard, None);
+        };
+
+        // Get pointers to all the pages the supervisor must allow accesses in
+        // and prepare the callback stack.
+        let page_ptrs = alloc.borrow().pages();
+        let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] =
+            Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast();
+        let stack_ptr = raw_stack_ptr.expose_provenance();
+        let start_info = StartFfiInfo { page_ptrs, stack_ptr };
+
+        // SAFETY: We do not access machine memory past this point until the
+        // supervisor is ready to allow it.
+        unsafe {
+            if alloc.borrow_mut().prepare_ffi().is_err() {
+                // Don't mess up unwinding by maybe leaving the memory partly protected
+                alloc.borrow_mut().unprep_ffi();
+                panic!("Cannot protect memory for FFI call!");
+            }
+        }
+
+        // Send over the info.
+        // NB: if we do not wait to receive a blank confirmation response, it is
+        // possible that the supervisor is alerted of the SIGSTOP *before* it has
+        // actually received the start_info, thus deadlocking! This way, we can
+        // enforce an ordering for these events.
+        sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap();
+        sv.confirm_rx.recv().unwrap();
+        *sv_guard = Some(sv);
+        // We need to be stopped for the supervisor to be able to make certain
+        // modifications to our memory - simply waiting on the recv() doesn't
+        // count.
+        signal::raise(signal::SIGSTOP).unwrap();
+        (sv_guard, Some(raw_stack_ptr))
+    }
+
+    /// Undoes FFI-related preparations, allowing Miri to continue as normal, then
+    /// gets the memory accesses and changes performed during the FFI call. Note
+    /// that this may include some spurious accesses done by `libffi` itself in
+    /// the process of executing the function call.
+    ///
+    /// SAFETY: The `sv_guard` and `raw_stack_ptr` passed must be the same ones
+    /// received by a prior call to `start_ffi`, and the allocator must be the
+    /// one passed to it also.
+    pub unsafe fn end_ffi(
+        alloc: &Rc<RefCell<IsolatedAlloc>>,
+        mut sv_guard: std::sync::MutexGuard<'static, Option<Supervisor>>,
+        raw_stack_ptr: Option<*mut [u8; CALLBACK_STACK_SIZE]>,
+    ) -> Option<MemEvents> {
+        // We can't use IPC channels here to signal that FFI mode has ended,
+        // since they might allocate memory which could get us stuck in a SIGTRAP
+        // with no easy way out! While this could be worked around, it is much
+        // simpler and more robust to simply use the signals which are left for
+        // arbitrary usage. Since this will block until we are continued by the
+        // supervisor, we can assume past this point that everything is back to
+        // normal.
+        signal::raise(signal::SIGUSR1).unwrap();
+
+        // This is safe! It just sets memory to normal expected permissions.
+        alloc.borrow_mut().unprep_ffi();
+
+        // If this is `None`, then `raw_stack_ptr` is None and does not need to
+        // be deallocated (and there's no need to worry about the guard, since
+        // it contains nothing).
+        let sv = sv_guard.take()?;
+        // SAFETY: Caller upholds that this pointer was allocated as a box with
+        // this type.
+        unsafe {
+            drop(Box::from_raw(raw_stack_ptr.unwrap()));
+        }
+        // On the off-chance something really weird happens, don't block forever.
+        let ret = sv
+            .event_rx
+            .try_recv_timeout(std::time::Duration::from_secs(5))
+            .map_err(|e| {
+                match e {
+                    ipc::TryRecvError::IpcError(_) => (),
+                    ipc::TryRecvError::Empty =>
+                        eprintln!("Waiting for accesses from supervisor timed out!"),
+                }
+            })
+            .ok();
+        // Do *not* leave the supervisor empty, or else we might get another fork...
+        *sv_guard = Some(sv);
+        ret
+    }
+}
+
+/// Initialises the supervisor process. If this function errors, then the
+/// supervisor process could not be created successfully; else, the caller
+/// is now the child process and can communicate via `start_ffi`/`end_ffi`,
+/// receiving back events through `get_events`.
+///
+/// # Safety
+/// The invariants for `fork()` must be upheld by the caller.
+pub unsafe fn init_sv() -> Result<(), SvInitError> {
+    // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the
+    // relevant missing bits.
+
+    // On Linux, this will check whether ptrace is fully disabled by the Yama module.
+    // If Yama isn't running or we're not on Linux, we'll still error later, but
+    // this saves a very expensive fork call.
+    let ptrace_status = std::fs::read_to_string("/proc/sys/kernel/yama/ptrace_scope");
+    if let Ok(stat) = ptrace_status {
+        if let Some(stat) = stat.chars().next() {
+            // Fast-error if ptrace is fully disabled on the system.
+            if stat == '3' {
+                return Err(SvInitError);
+            }
+        }
+    }
+
+    // Initialise the supervisor if it isn't already, placing it into SUPERVISOR.
+    let mut lock = SUPERVISOR.lock().unwrap();
+    if lock.is_some() {
+        return Ok(());
+    }
+
+    // Prepare the IPC channels we need.
+    let (message_tx, message_rx) = ipc::channel().unwrap();
+    let (confirm_tx, confirm_rx) = ipc::channel().unwrap();
+    let (event_tx, event_rx) = ipc::channel().unwrap();
+    // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error.
+    let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap();
+    super::parent::PAGE_SIZE.store(page_size, std::sync::atomic::Ordering::Relaxed);
+
+    unsafe {
+        // TODO: Maybe use clone3() instead for better signalling of when the child exits?
+        // SAFETY: Caller upholds that only one thread exists.
+        match unistd::fork().unwrap() {
+            unistd::ForkResult::Parent { child } => {
+                // If somehow another thread does exist, prevent it from accessing the lock
+                // and thus breaking our safety invariants.
+                std::mem::forget(lock);
+                // The child process is free to unwind, so we won't to avoid doubly freeing
+                // system resources.
+                let init = std::panic::catch_unwind(|| {
+                    let listener =
+                        ChildListener { message_rx, attached: false, override_retcode: None };
+                    // Trace as many things as possible, to be able to handle them as needed.
+                    let options = ptrace::Options::PTRACE_O_TRACESYSGOOD
+                        | ptrace::Options::PTRACE_O_TRACECLONE
+                        | ptrace::Options::PTRACE_O_TRACEFORK;
+                    // Attach to the child process without stopping it.
+                    match ptrace::seize(child, options) {
+                        // Ptrace works :D
+                        Ok(_) => {
+                            let code = sv_loop(listener, child, event_tx, confirm_tx).unwrap_err();
+                            // If a return code of 0 is not explicitly given, assume something went
+                            // wrong and return 1.
+                            std::process::exit(code.0.unwrap_or(1))
+                        }
+                        // Ptrace does not work and we failed to catch that.
+                        Err(_) => {
+                            // If we can't ptrace, Miri continues being the parent.
+                            signal::kill(child, signal::SIGKILL).unwrap();
+                            SvInitError
+                        }
+                    }
+                });
+                match init {
+                    // The "Ok" case means that we couldn't ptrace.
+                    Ok(e) => return Err(e),
+                    Err(p) => {
+                        eprintln!("Supervisor process panicked!\n{p:?}");
+                        std::process::exit(1);
+                    }
+                }
+            }
+            unistd::ForkResult::Child => {
+                // Make sure we never get orphaned and stuck in SIGSTOP or similar
+                // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call.
+                let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
+                assert_eq!(ret, 0);
+                // First make sure the parent succeeded with ptracing us!
+                signal::raise(signal::SIGSTOP).unwrap();
+                // If we're the child process, save the supervisor info.
+                *lock = Some(Supervisor { message_tx, confirm_rx, event_rx });
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Instruct the supervisor process to return a particular code. Useful if for
+/// whatever reason this code fails to be intercepted normally. In the case of
+/// `abort_if_errors()` used in `bin/miri.rs`, the return code is erroneously
+/// given as a 0 if this is not used.
+pub fn register_retcode_sv(code: i32) {
+    let mut sv_guard = SUPERVISOR.lock().unwrap();
+    if let Some(sv) = sv_guard.take() {
+        sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap();
+        *sv_guard = Some(sv);
+    }
+}
diff --git a/src/tools/miri/src/shims/native_lib/trace/messages.rs b/src/tools/miri/src/shims/native_lib/trace/messages.rs
new file mode 100644
index 0000000000000..8a83dab5c09d1
--- /dev/null
+++ b/src/tools/miri/src/shims/native_lib/trace/messages.rs
@@ -0,0 +1,80 @@
+//! Houses the types that are directly sent across the IPC channels.
+//!
+//! The overall structure of a traced FFI call, from the child process's POV, is
+//! as follows:
+//! ```
+//! message_tx.send(TraceRequest::StartFfi);
+//! confirm_rx.recv();
+//! raise(SIGSTOP);
+//! /* do ffi call */
+//! raise(SIGUSR1); // morally equivalent to some kind of "TraceRequest::EndFfi"
+//! let events = event_rx.recv();
+//! ```
+//! `TraceRequest::OverrideRetcode` can be sent at any point in the above, including
+//! before or after all of them.
+//!
+//! NB: sending these events out of order, skipping steps, etc. will result in
+//! unspecified behaviour from the supervisor process, so use the abstractions
+//! in `super::child` (namely `start_ffi()` and `end_ffi()`) to handle this. It is
+//! trivially easy to cause a deadlock or crash by messing this up!
+
+use std::ops::Range;
+
+/// An IPC request sent by the child process to the parent.
+///
+/// The sender for this channel should live on the child process.
+#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
+pub enum TraceRequest {
+    /// Requests that tracing begins. Following this being sent, the child must
+    /// wait to receive a `Confirmation` on the respective channel and then
+    /// `raise(SIGSTOP)`.
+    ///
+    /// To avoid possible issues while allocating memory for IPC channels, ending
+    /// the tracing is instead done via `raise(SIGUSR1)`.
+    StartFfi(StartFfiInfo),
+    /// Manually overrides the code that the supervisor will return upon exiting.
+    /// Once set, it is permanent. This can be called again to change the value.
+    OverrideRetcode(i32),
+}
+
+/// Information needed to begin tracing.
+#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
+pub struct StartFfiInfo {
+    /// A vector of page addresses. These should have been automatically obtained
+    /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`.
+    pub page_ptrs: Vec<usize>,
+    /// The address of an allocation that can serve as a temporary stack.
+    /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int.
+    pub stack_ptr: usize,
+}
+
+/// A marker type confirming that the supervisor has received the request to begin
+/// tracing and is now waiting for a `SIGSTOP`.
+///
+/// The sender for this channel should live on the parent process.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+pub struct Confirmation;
+
+/// The final results of an FFI trace, containing every relevant event detected
+/// by the tracer. Sent by the supervisor after receiving a `SIGUSR1` signal.
+///
+/// The sender for this channel should live on the parent process.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+pub struct MemEvents {
+    /// An ordered list of memory accesses that occurred. These should be assumed
+    /// to be overcautious; that is, if the size of an access is uncertain it is
+    /// pessimistically rounded up, and if the type (read/write/both) is uncertain
+    /// it is reported as whatever would be safest to assume; i.e. a read + maybe-write
+    /// becomes a read + write, etc.
+    pub acc_events: Vec<AccessEvent>,
+}
+
+/// A single memory access, conservatively overestimated
+/// in case of ambiguity.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+pub enum AccessEvent {
+    /// A read may have occurred on no more than the specified address range.
+    Read(Range<usize>),
+    /// A write may have occurred on no more than the specified address range.
+    Write(Range<usize>),
+}
diff --git a/src/tools/miri/src/shims/native_lib/trace/mod.rs b/src/tools/miri/src/shims/native_lib/trace/mod.rs
new file mode 100644
index 0000000000000..174b06b3ac56c
--- /dev/null
+++ b/src/tools/miri/src/shims/native_lib/trace/mod.rs
@@ -0,0 +1,8 @@
+mod child;
+pub mod messages;
+mod parent;
+
+pub use self::child::{Supervisor, init_sv, register_retcode_sv};
+
+/// The size of the temporary stack we use for callbacks that the server executes in the client.
+const CALLBACK_STACK_SIZE: usize = 1024;
diff --git a/src/tools/miri/src/shims/native_lib/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs
new file mode 100644
index 0000000000000..1d968b7a974eb
--- /dev/null
+++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs
@@ -0,0 +1,691 @@
+use std::sync::atomic::{AtomicPtr, AtomicUsize};
+
+use ipc_channel::ipc;
+use nix::sys::{ptrace, signal, wait};
+use nix::unistd;
+
+use super::CALLBACK_STACK_SIZE;
+use super::messages::{AccessEvent, Confirmation, MemEvents, StartFfiInfo, TraceRequest};
+
+/// The flags to use when calling `waitid()`.
+/// Since bitwise or on the nix version of these flags is implemented as a trait,
+/// this cannot be const directly so we do it this way.
+const WAIT_FLAGS: wait::WaitPidFlag =
+    wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED);
+
+/// Arch-specific maximum size a single access might perform. x86 value is set
+/// assuming nothing bigger than AVX-512 is available.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+const ARCH_MAX_ACCESS_SIZE: usize = 64;
+/// The largest arm64 simd instruction operates on 16 bytes.
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+const ARCH_MAX_ACCESS_SIZE: usize = 16;
+/// The max riscv vector instruction can access 8 consecutive 32-bit values.
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+const ARCH_MAX_ACCESS_SIZE: usize = 32;
+
+/// The default word size on a given platform, in bytes.
+#[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "riscv32"))]
+const ARCH_WORD_SIZE: usize = 4;
+#[cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "riscv64"))]
+const ARCH_WORD_SIZE: usize = 8;
+
+/// The address of the page set to be edited, initialised to a sentinel null
+/// pointer.
+static PAGE_ADDR: AtomicPtr<u8> = AtomicPtr::new(std::ptr::null_mut());
+/// The host pagesize, initialised to a sentinel zero value.
+pub static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0);
+/// How many consecutive pages to unprotect. 1 by default, unlikely to be set
+/// higher than 2.
+static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1);
+
+/// Allows us to get common arguments from the `user_regs_t` across architectures.
+/// Normally this would land us ABI hell, but thankfully all of our usecases
+/// consist of functions with a small number of register-sized integer arguments.
+/// See <https://man7.org/linux/man-pages/man2/syscall.2.html> for sources.
+trait ArchIndependentRegs {
+    /// Gets the address of the instruction pointer.
+    fn ip(&self) -> usize;
+    /// Set the instruction pointer; remember to also set the stack pointer, or
+    /// else the stack might get messed up!
+    fn set_ip(&mut self, ip: usize);
+    /// Set the stack pointer, ideally to a zeroed-out area.
+    fn set_sp(&mut self, sp: usize);
+}
+
+// It's fine / desirable behaviour for values to wrap here, we care about just
+// preserving the bit pattern.
+#[cfg(target_arch = "x86_64")]
+#[expect(clippy::as_conversions)]
+#[rustfmt::skip]
+impl ArchIndependentRegs for libc::user_regs_struct {
+    #[inline]
+    fn ip(&self) -> usize { self.rip as _ }
+    #[inline]
+    fn set_ip(&mut self, ip: usize) { self.rip = ip as _ }
+    #[inline]
+    fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ }
+}
+
+#[cfg(target_arch = "x86")]
+#[expect(clippy::as_conversions)]
+#[rustfmt::skip]
+impl ArchIndependentRegs for libc::user_regs_struct {
+    #[inline]
+    fn ip(&self) -> usize { self.eip as _ }
+    #[inline]
+    fn set_ip(&mut self, ip: usize) { self.eip = ip as _ }
+    #[inline]
+    fn set_sp(&mut self, sp: usize) { self.esp = sp as _ }
+}
+
+#[cfg(target_arch = "aarch64")]
+#[expect(clippy::as_conversions)]
+#[rustfmt::skip]
+impl ArchIndependentRegs for libc::user_regs_struct {
+    #[inline]
+    fn ip(&self) -> usize { self.pc as _ }
+    #[inline]
+    fn set_ip(&mut self, ip: usize) { self.pc = ip as _ }
+    #[inline]
+    fn set_sp(&mut self, sp: usize) { self.sp = sp as _ }
+}
+
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+#[expect(clippy::as_conversions)]
+#[rustfmt::skip]
+impl ArchIndependentRegs for libc::user_regs_struct {
+    #[inline]
+    fn ip(&self) -> usize { self.pc as _ }
+    #[inline]
+    fn set_ip(&mut self, ip: usize) { self.pc = ip as _ }
+    #[inline]
+    fn set_sp(&mut self, sp: usize) { self.sp = sp as _ }
+}
+
+/// A unified event representing something happening on the child process. Wraps
+/// `nix`'s `WaitStatus` and our custom signals so it can all be done with one
+/// `match` statement.
+pub enum ExecEvent {
+    /// Child process requests that we begin monitoring it.
+    Start(StartFfiInfo),
+    /// Child requests that we stop monitoring and pass over the events we
+    /// detected.
+    End,
+    /// The child process with the specified pid was stopped by the given signal.
+    Status(unistd::Pid, signal::Signal),
+    /// The child process with the specified pid entered or existed a syscall.
+    Syscall(unistd::Pid),
+    /// A child process exited or was killed; if we have a return code, it is
+    /// specified.
+    Died(Option<i32>),
+}
+
+/// A listener for the FFI start info channel along with relevant state.
+pub struct ChildListener {
+    /// The matching channel for the child's `Supervisor` struct.
+    pub message_rx: ipc::IpcReceiver<TraceRequest>,
+    /// Whether an FFI call is currently ongoing.
+    pub attached: bool,
+    /// If `Some`, overrides the return code with the given value.
+    pub override_retcode: Option<i32>,
+}
+
+impl Iterator for ChildListener {
+    type Item = ExecEvent;
+
+    // Allows us to monitor the child process by just iterating over the listener.
+    // NB: This should never return None!
+    fn next(&mut self) -> Option<Self::Item> {
+        // Do not block if the child has nothing to report for `waitid`.
+        let opts = WAIT_FLAGS | wait::WaitPidFlag::WNOHANG;
+        loop {
+            // Listen to any child, not just the main one. Important if we want
+            // to allow the C code to fork further, along with being a bit of
+            // defensive programming since Linux sometimes assigns threads of
+            // the same process different PIDs with unpredictable rules...
+            match wait::waitid(wait::Id::All, opts) {
+                Ok(stat) =>
+                    match stat {
+                        // Child exited normally with a specific code set.
+                        wait::WaitStatus::Exited(_, code) => {
+                            let code = self.override_retcode.unwrap_or(code);
+                            return Some(ExecEvent::Died(Some(code)));
+                        }
+                        // Child was killed by a signal, without giving a code.
+                        wait::WaitStatus::Signaled(_, _, _) =>
+                            return Some(ExecEvent::Died(self.override_retcode)),
+                        // Child entered a syscall. Since we're always technically
+                        // tracing, only pass this along if we're actively
+                        // monitoring the child.
+                        wait::WaitStatus::PtraceSyscall(pid) =>
+                            if self.attached {
+                                return Some(ExecEvent::Syscall(pid));
+                            },
+                        // Child with the given pid was stopped by the given signal.
+                        // It's somewhat dubious when this is returned instead of
+                        // WaitStatus::Stopped, but for our purposes they are the
+                        // same thing.
+                        wait::WaitStatus::PtraceEvent(pid, signal, _) =>
+                            if self.attached {
+                                // This is our end-of-FFI signal!
+                                if signal == signal::SIGUSR1 {
+                                    self.attached = false;
+                                    return Some(ExecEvent::End);
+                                } else {
+                                    return Some(ExecEvent::Status(pid, signal));
+                                }
+                            } else {
+                                // Just pass along the signal.
+                                ptrace::cont(pid, signal).unwrap();
+                            },
+                        // Child was stopped at the given signal. Same logic as for
+                        // WaitStatus::PtraceEvent.
+                        wait::WaitStatus::Stopped(pid, signal) =>
+                            if self.attached {
+                                if signal == signal::SIGUSR1 {
+                                    self.attached = false;
+                                    return Some(ExecEvent::End);
+                                } else {
+                                    return Some(ExecEvent::Status(pid, signal));
+                                }
+                            } else {
+                                ptrace::cont(pid, signal).unwrap();
+                            },
+                        _ => (),
+                    },
+                // This case should only trigger if all children died and we
+                // somehow missed that, but it's best we not allow any room
+                // for deadlocks.
+                Err(_) => return Some(ExecEvent::Died(None)),
+            }
+
+            // Similarly, do a non-blocking poll of the IPC channel.
+            if let Ok(req) = self.message_rx.try_recv() {
+                match req {
+                    TraceRequest::StartFfi(info) =>
+                    // Should never trigger - but better to panic explicitly than deadlock!
+                        if self.attached {
+                            panic!("Attempting to begin FFI multiple times!");
+                        } else {
+                            self.attached = true;
+                            return Some(ExecEvent::Start(info));
+                        },
+                    TraceRequest::OverrideRetcode(code) => self.override_retcode = Some(code),
+                }
+            }
+
+            // Not ideal, but doing anything else might sacrifice performance.
+            std::thread::yield_now();
+        }
+    }
+}
+
+/// An error came up while waiting on the child process to do something.
+/// It likely died, with this return code if we have one.
+#[derive(Debug)]
+pub struct ExecEnd(pub Option<i32>);
+
+/// This is the main loop of the supervisor process. It runs in a separate
+/// process from the rest of Miri (but because we fork, addresses for anything
+/// created before the fork - like statics - are the same).
+pub fn sv_loop(
+    listener: ChildListener,
+    init_pid: unistd::Pid,
+    event_tx: ipc::IpcSender<MemEvents>,
+    confirm_tx: ipc::IpcSender<Confirmation>,
+) -> Result<!, ExecEnd> {
+    // Get the pagesize set and make sure it isn't still on the zero sentinel value!
+    let page_size = PAGE_SIZE.load(std::sync::atomic::Ordering::Relaxed);
+    assert_ne!(page_size, 0);
+
+    // Things that we return to the child process.
+    let mut acc_events = Vec::new();
+
+    // Memory allocated for the MiriMachine.
+    let mut ch_pages = Vec::new();
+    let mut ch_stack = None;
+
+    // An instance of the Capstone disassembler, so we don't spawn one on every access.
+    let cs = get_disasm();
+
+    // The pid of the last process we interacted with, used by default if we don't have a
+    // reason to use a different one.
+    let mut curr_pid = init_pid;
+
+    // There's an initial sigstop we need to deal with.
+    wait_for_signal(Some(curr_pid), signal::SIGSTOP, false)?;
+    ptrace::cont(curr_pid, None).unwrap();
+
+    for evt in listener {
+        match evt {
+            // start_ffi was called by the child, so prep memory.
+            ExecEvent::Start(ch_info) => {
+                // All the pages that the child process is "allowed to" access.
+                ch_pages = ch_info.page_ptrs;
+                // And the temporary callback stack it allocated for us to use later.
+                ch_stack = Some(ch_info.stack_ptr);
+
+                // We received the signal and are no longer in the main listener loop,
+                // so we can let the child move on to the end of start_ffi where it will
+                // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in
+                // order to do most ptrace operations!
+                confirm_tx.send(Confirmation).unwrap();
+                // We can't trust simply calling `Pid::this()` in the child process to give the right
+                // PID for us, so we get it this way.
+                curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap();
+
+                ptrace::syscall(curr_pid, None).unwrap();
+            }
+            // end_ffi was called by the child.
+            ExecEvent::End => {
+                // Hand over the access info we traced.
+                event_tx.send(MemEvents { acc_events }).unwrap();
+                // And reset our values.
+                acc_events = Vec::new();
+                ch_stack = None;
+
+                // No need to monitor syscalls anymore, they'd just be ignored.
+                ptrace::cont(curr_pid, None).unwrap();
+            }
+            // Child process was stopped by a signal
+            ExecEvent::Status(pid, signal) =>
+                match signal {
+                    // If it was a segfault, check if it was an artificial one
+                    // caused by it trying to access the MiriMachine memory.
+                    signal::SIGSEGV =>
+                        handle_segfault(
+                            pid,
+                            &ch_pages,
+                            ch_stack.unwrap(),
+                            page_size,
+                            &cs,
+                            &mut acc_events,
+                        )?,
+                    // Something weird happened.
+                    _ => {
+                        eprintln!("Process unexpectedly got {signal}; continuing...");
+                        // In case we're not tracing
+                        if ptrace::syscall(pid, None).is_err() {
+                            // If *this* fails too, something really weird happened
+                            // and it's probably best to just panic.
+                            signal::kill(pid, signal::SIGCONT).unwrap();
+                        }
+                    }
+                },
+            // Child entered a syscall; we wait for exits inside of this, so it
+            // should never trigger on return from a syscall we care about.
+            ExecEvent::Syscall(pid) => {
+                ptrace::syscall(pid, None).unwrap();
+            }
+            ExecEvent::Died(code) => {
+                return Err(ExecEnd(code));
+            }
+        }
+    }
+
+    unreachable!()
+}
+
+/// Spawns a Capstone disassembler for the host architecture.
+#[rustfmt::skip]
+fn get_disasm() -> capstone::Capstone {
+    use capstone::prelude::*;
+    let cs_pre = Capstone::new();
+    {
+        #[cfg(target_arch = "x86_64")]
+        {cs_pre.x86().mode(arch::x86::ArchMode::Mode64)}
+        #[cfg(target_arch = "x86")]
+        {cs_pre.x86().mode(arch::x86::ArchMode::Mode32)}
+        #[cfg(target_arch = "aarch64")]
+        {cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)}
+        #[cfg(target_arch = "arm")]
+        {cs_pre.arm().mode(arch::arm::ArchMode::Arm)}
+        #[cfg(target_arch = "riscv64")]
+        {cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV64)}
+        #[cfg(target_arch = "riscv32")]
+        {cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV32)}
+    }
+    .detail(true)
+    .build()
+    .unwrap()
+}
+
+/// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`.
+/// We want to avoid that in some cases, like at the beginning of FFI.
+///
+/// If `pid` is `None`, only one wait will be done and `init_cont` should be false.
+fn wait_for_signal(
+    pid: Option<unistd::Pid>,
+    wait_signal: signal::Signal,
+    init_cont: bool,
+) -> Result<unistd::Pid, ExecEnd> {
+    if init_cont {
+        ptrace::cont(pid.unwrap(), None).unwrap();
+    }
+    // Repeatedly call `waitid` until we get the signal we want, or the process dies.
+    loop {
+        let wait_id = match pid {
+            Some(pid) => wait::Id::Pid(pid),
+            None => wait::Id::All,
+        };
+        let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecEnd(None))?;
+        let (signal, pid) = match stat {
+            // Report the cause of death, if we know it.
+            wait::WaitStatus::Exited(_, code) => {
+                return Err(ExecEnd(Some(code)));
+            }
+            wait::WaitStatus::Signaled(_, _, _) => return Err(ExecEnd(None)),
+            wait::WaitStatus::Stopped(pid, signal) => (signal, pid),
+            wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid),
+            // This covers PtraceSyscall and variants that are impossible with
+            // the flags set (e.g. WaitStatus::StillAlive).
+            _ => {
+                ptrace::cont(pid.unwrap(), None).unwrap();
+                continue;
+            }
+        };
+        if signal == wait_signal {
+            return Ok(pid);
+        } else {
+            ptrace::cont(pid, signal).map_err(|_| ExecEnd(None))?;
+        }
+    }
+}
+
+/// Grabs the access that caused a segfault and logs it down if it's to our memory,
+/// or kills the child and returns the appropriate error otherwise.
+fn handle_segfault(
+    pid: unistd::Pid,
+    ch_pages: &[usize],
+    ch_stack: usize,
+    page_size: usize,
+    cs: &capstone::Capstone,
+    acc_events: &mut Vec<AccessEvent>,
+) -> Result<(), ExecEnd> {
+    /// This is just here to not pollute the main namespace with `capstone::prelude::*`.
+    #[inline]
+    fn capstone_disassemble(
+        instr: &[u8],
+        addr: usize,
+        cs: &capstone::Capstone,
+        acc_events: &mut Vec<AccessEvent>,
+    ) -> capstone::CsResult<()> {
+        use capstone::prelude::*;
+
+        // The arch_detail is what we care about, but it relies on these temporaries
+        // that we can't drop. 0x1000 is the default base address for Captsone, and
+        // we're expecting 1 instruction.
+        let insns = cs.disasm_count(instr, 0x1000, 1)?;
+        let ins_detail = cs.insn_detail(&insns[0])?;
+        let arch_detail = ins_detail.arch_detail();
+
+        for op in arch_detail.operands() {
+            match op {
+                #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+                arch::ArchOperand::X86Operand(x86_operand) => {
+                    match x86_operand.op_type {
+                        // We only care about memory accesses
+                        arch::x86::X86OperandType::Mem(_) => {
+                            let push = addr..addr.strict_add(usize::from(x86_operand.size));
+                            // It's called a "RegAccessType" but it also applies to memory
+                            let acc_ty = x86_operand.access.unwrap();
+                            if acc_ty.is_readable() {
+                                acc_events.push(AccessEvent::Read(push.clone()));
+                            }
+                            if acc_ty.is_writable() {
+                                acc_events.push(AccessEvent::Write(push));
+                            }
+                        }
+                        _ => (),
+                    }
+                }
+                #[cfg(target_arch = "aarch64")]
+                arch::ArchOperand::Arm64Operand(arm64_operand) => {
+                    // Annoyingly, we don't always get the size here, so just be pessimistic for now.
+                    match arm64_operand.op_type {
+                        arch::arm64::Arm64OperandType::Mem(_) => {
+                            // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes.
+                            let size = match arm64_operand.vas {
+                                // Not an fp/simd instruction.
+                                arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE,
+                                // 1 byte.
+                                arch::arm64::Arm64Vas::ARM64_VAS_1B => 1,
+                                // 2 bytes.
+                                arch::arm64::Arm64Vas::ARM64_VAS_1H => 2,
+                                // 4 bytes.
+                                arch::arm64::Arm64Vas::ARM64_VAS_4B
+                                | arch::arm64::Arm64Vas::ARM64_VAS_2H
+                                | arch::arm64::Arm64Vas::ARM64_VAS_1S => 4,
+                                // 8 bytes.
+                                arch::arm64::Arm64Vas::ARM64_VAS_8B
+                                | arch::arm64::Arm64Vas::ARM64_VAS_4H
+                                | arch::arm64::Arm64Vas::ARM64_VAS_2S
+                                | arch::arm64::Arm64Vas::ARM64_VAS_1D => 8,
+                                // 16 bytes.
+                                arch::arm64::Arm64Vas::ARM64_VAS_16B
+                                | arch::arm64::Arm64Vas::ARM64_VAS_8H
+                                | arch::arm64::Arm64Vas::ARM64_VAS_4S
+                                | arch::arm64::Arm64Vas::ARM64_VAS_2D
+                                | arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16,
+                            };
+                            let push = addr..addr.strict_add(size);
+                            // FIXME: This now has access type info in the latest
+                            // git version of capstone because this pissed me off
+                            // and I added it. Change this when it updates.
+                            acc_events.push(AccessEvent::Read(push.clone()));
+                            acc_events.push(AccessEvent::Write(push));
+                        }
+                        _ => (),
+                    }
+                }
+                #[cfg(target_arch = "arm")]
+                arch::ArchOperand::ArmOperand(arm_operand) =>
+                    match arm_operand.op_type {
+                        arch::arm::ArmOperandType::Mem(_) => {
+                            // We don't get info on the size of the access, but
+                            // we're at least told if it's a vector instruction.
+                            let size = if arm_operand.vector_index.is_some() {
+                                ARCH_MAX_ACCESS_SIZE
+                            } else {
+                                ARCH_WORD_SIZE
+                            };
+                            let push = addr..addr.strict_add(size);
+                            let acc_ty = arm_operand.access.unwrap();
+                            if acc_ty.is_readable() {
+                                acc_events.push(AccessEvent::Read(push.clone()));
+                            }
+                            if acc_ty.is_writable() {
+                                acc_events.push(AccessEvent::Write(push));
+                            }
+                        }
+                        _ => (),
+                    },
+                #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+                arch::ArchOperand::RiscVOperand(risc_voperand) => {
+                    match risc_voperand {
+                        arch::riscv::RiscVOperand::Mem(_) => {
+                            // We get basically no info here.
+                            let push = addr..addr.strict_add(size);
+                            acc_events.push(AccessEvent::Read(push.clone()));
+                            acc_events.push(AccessEvent::Write(push));
+                        }
+                        _ => (),
+                    }
+                }
+                _ => unimplemented!(),
+            }
+        }
+
+        Ok(())
+    }
+
+    // Get information on what caused the segfault. This contains the address
+    // that triggered it.
+    let siginfo = ptrace::getsiginfo(pid).unwrap();
+    // All x86, ARM, etc. instructions only have at most one memory operand
+    // (thankfully!)
+    // SAFETY: si_addr is safe to call.
+    let addr = unsafe { siginfo.si_addr().addr() };
+    let page_addr = addr.strict_sub(addr.strict_rem(page_size));
+
+    if !ch_pages.iter().any(|pg| (*pg..pg.strict_add(page_size)).contains(&addr)) {
+        // This was a real segfault (not one of the Miri memory pages), so print some debug info and
+        // quit.
+        let regs = ptrace::getregs(pid).unwrap();
+        eprintln!("Segfault occurred during FFI at {addr:#018x}");
+        eprintln!("Expected access on pages: {ch_pages:#018x?}");
+        eprintln!("Register dump: {regs:#x?}");
+        ptrace::kill(pid).unwrap();
+        return Err(ExecEnd(None));
+    }
+
+    // Overall structure:
+    // - Get the address that caused the segfault
+    // - Unprotect the memory: we force the child to execute `mempr_off`, passing parameters via
+    //   global atomic variables. This is what we use the temporary callback stack for.
+    // - Step 1 instruction
+    // - Parse executed code to estimate size & type of access
+    // - Reprotect the memory by executing `mempr_on` in the child.
+    // - Continue
+
+    // Ensure the stack is properly zeroed out!
+    for a in (ch_stack..ch_stack.strict_add(CALLBACK_STACK_SIZE)).step_by(ARCH_WORD_SIZE) {
+        ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap();
+    }
+
+    // Guard against both architectures with upwards and downwards-growing stacks.
+    let stack_ptr = ch_stack.strict_add(CALLBACK_STACK_SIZE / 2);
+    let regs_bak = ptrace::getregs(pid).unwrap();
+    let mut new_regs = regs_bak;
+    let ip_prestep = regs_bak.ip();
+
+    // Move the instr ptr into the deprotection code.
+    #[expect(clippy::as_conversions)]
+    new_regs.set_ip(mempr_off as usize);
+    // Don't mess up the stack by accident!
+    new_regs.set_sp(stack_ptr);
+
+    // Modify the PAGE_ADDR global on the child process to point to the page
+    // that we want unprotected.
+    ptrace::write(
+        pid,
+        (&raw const PAGE_ADDR).cast_mut().cast(),
+        libc::c_long::try_from(page_addr).unwrap(),
+    )
+    .unwrap();
+
+    // Check if we also own the next page, and if so unprotect it in case
+    // the access spans the page boundary.
+    let flag = if ch_pages.contains(&page_addr.strict_add(page_size)) { 2 } else { 1 };
+    ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), flag).unwrap();
+
+    ptrace::setregs(pid, new_regs).unwrap();
+
+    // Our mempr_* functions end with a raise(SIGSTOP).
+    wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
+
+    // Step 1 instruction.
+    ptrace::setregs(pid, regs_bak).unwrap();
+    ptrace::step(pid, None).unwrap();
+    // Don't use wait_for_signal here since 1 instruction doesn't give room
+    // for any uncertainty + we don't want it `cont()`ing randomly by accident
+    // Also, don't let it continue with unprotected memory if something errors!
+    let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecEnd(None))?;
+
+    // Zero out again to be safe
+    for a in (ch_stack..ch_stack.strict_add(CALLBACK_STACK_SIZE)).step_by(ARCH_WORD_SIZE) {
+        ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap();
+    }
+
+    // Save registers and grab the bytes that were executed. This would
+    // be really nasty if it was a jump or similar but those thankfully
+    // won't do memory accesses and so can't trigger this!
+    let regs_bak = ptrace::getregs(pid).unwrap();
+    new_regs = regs_bak;
+    let ip_poststep = regs_bak.ip();
+    // We need to do reads/writes in word-sized chunks.
+    let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE);
+    let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| {
+        // This only needs to be a valid pointer in the child process, not ours.
+        ret.append(
+            &mut ptrace::read(pid, std::ptr::without_provenance_mut(ip))
+                .unwrap()
+                .to_ne_bytes()
+                .to_vec(),
+        );
+        ret
+    });
+
+    // Now figure out the size + type of access and log it down.
+    // This will mark down e.g. the same area being read multiple times,
+    // since it's more efficient to compress the accesses at the end.
+    if capstone_disassemble(&instr, addr, cs, acc_events).is_err() {
+        // Read goes first because we need to be pessimistic.
+        acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
+        acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
+    }
+
+    // Reprotect everything and continue.
+    #[expect(clippy::as_conversions)]
+    new_regs.set_ip(mempr_on as usize);
+    new_regs.set_sp(stack_ptr);
+    ptrace::setregs(pid, new_regs).unwrap();
+    wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
+
+    ptrace::setregs(pid, regs_bak).unwrap();
+    ptrace::syscall(pid, None).unwrap();
+    Ok(())
+}
+
+// We only get dropped into these functions via offsetting the instr pointer
+// manually, so we *must not ever* unwind from them.
+
+/// Disables protections on the page whose address is currently in `PAGE_ADDR`.
+///
+/// SAFETY: `PAGE_ADDR` should be set to a page-aligned pointer to an owned page,
+/// `PAGE_SIZE` should be the host pagesize, and the range from `PAGE_ADDR` to
+/// `PAGE_SIZE` * `PAGE_COUNT` must be owned and allocated memory. No other threads
+/// should be running.
+pub unsafe extern "C" fn mempr_off() {
+    use std::sync::atomic::Ordering;
+
+    // Again, cannot allow unwinds to happen here.
+    let len = PAGE_SIZE.load(Ordering::Relaxed).saturating_mul(PAGE_COUNT.load(Ordering::Relaxed));
+    // SAFETY: Upheld by "caller".
+    unsafe {
+        // It's up to the caller to make sure this doesn't actually overflow, but
+        // we mustn't unwind from here, so...
+        if libc::mprotect(
+            PAGE_ADDR.load(Ordering::Relaxed).cast(),
+            len,
+            libc::PROT_READ | libc::PROT_WRITE,
+        ) != 0
+        {
+            // Can't return or unwind, but we can do this.
+            std::process::exit(-1);
+        }
+    }
+    // If this fails somehow we're doomed.
+    if signal::raise(signal::SIGSTOP).is_err() {
+        std::process::exit(-1);
+    }
+}
+
+/// Reenables protection on the page set by `PAGE_ADDR`.
+///
+/// SAFETY: See `mempr_off()`.
+pub unsafe extern "C" fn mempr_on() {
+    use std::sync::atomic::Ordering;
+
+    let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed));
+    // SAFETY: Upheld by "caller".
+    unsafe {
+        if libc::mprotect(PAGE_ADDR.load(Ordering::Relaxed).cast(), len, libc::PROT_NONE) != 0 {
+            std::process::exit(-1);
+        }
+    }
+    if signal::raise(signal::SIGSTOP).is_err() {
+        std::process::exit(-1);
+    }
+}
diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index 14ebb22dc2bc3..b3c58397a02bd 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -945,8 +945,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             }
             "pthread_join" => {
                 let [thread, retval] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
-                let res = this.pthread_join(thread, retval)?;
-                this.write_scalar(res, dest)?;
+                this.pthread_join(thread, retval, dest)?;
             }
             "pthread_detach" => {
                 let [thread] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
diff --git a/src/tools/miri/src/shims/unix/linux/mem.rs b/src/tools/miri/src/shims/unix/linux/mem.rs
index 8e5a3021b1c03..47732f811f1a3 100644
--- a/src/tools/miri/src/shims/unix/linux/mem.rs
+++ b/src/tools/miri/src/shims/unix/linux/mem.rs
@@ -22,8 +22,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let flags = this.read_scalar(flags)?.to_i32()?;
 
         // old_address must be a multiple of the page size
-        #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero
-        if old_address.addr().bytes() % this.machine.page_size != 0 || new_size == 0 {
+        if !old_address.addr().bytes().is_multiple_of(this.machine.page_size) || new_size == 0 {
             this.set_last_error(LibcError("EINVAL"))?;
             return interp_ok(this.eval_libc("MAP_FAILED"));
         }
diff --git a/src/tools/miri/src/shims/unix/mem.rs b/src/tools/miri/src/shims/unix/mem.rs
index aefeee6f7a3a3..4bbbbc69c08a3 100644
--- a/src/tools/miri/src/shims/unix/mem.rs
+++ b/src/tools/miri/src/shims/unix/mem.rs
@@ -130,8 +130,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         // addr must be a multiple of the page size, but apart from that munmap is just implemented
         // as a dealloc.
-        #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero
-        if addr.addr().bytes() % this.machine.page_size != 0 {
+        if !addr.addr().bytes().is_multiple_of(this.machine.page_size) {
             return this.set_last_error_and_return_i32(LibcError("EINVAL"));
         }
 
diff --git a/src/tools/miri/src/shims/unix/sync.rs b/src/tools/miri/src/shims/unix/sync.rs
index eee2bbcb903da..50eb4d922891b 100644
--- a/src/tools/miri/src/shims/unix/sync.rs
+++ b/src/tools/miri/src/shims/unix/sync.rs
@@ -17,7 +17,7 @@ fn bytewise_equal_atomic_relaxed<'tcx>(
 
     // We do this in chunks of 4, so that we are okay to race with (sufficiently aligned)
     // 4-byte atomic accesses.
-    assert!(size.bytes() % 4 == 0);
+    assert!(size.bytes().is_multiple_of(4));
     for i in 0..(size.bytes() / 4) {
         let offset = Size::from_bytes(i.strict_mul(4));
         let load = |place: &MPlaceTy<'tcx>| {
diff --git a/src/tools/miri/src/shims/unix/thread.rs b/src/tools/miri/src/shims/unix/thread.rs
index 4b6615b3ea82c..a438e71a41d91 100644
--- a/src/tools/miri/src/shims/unix/thread.rs
+++ b/src/tools/miri/src/shims/unix/thread.rs
@@ -41,7 +41,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         &mut self,
         thread: &OpTy<'tcx>,
         retval: &OpTy<'tcx>,
-    ) -> InterpResult<'tcx, Scalar> {
+        return_dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
 
         if !this.ptr_is_null(this.read_pointer(retval)?)? {
@@ -51,12 +52,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         let thread = this.read_scalar(thread)?.to_int(this.libc_ty_layout("pthread_t").size)?;
         let Ok(thread) = this.thread_id_try_from(thread) else {
-            return interp_ok(this.eval_libc("ESRCH"));
+            this.write_scalar(this.eval_libc("ESRCH"), return_dest)?;
+            return interp_ok(());
         };
 
-        this.join_thread_exclusive(thread)?;
-
-        interp_ok(Scalar::from_u32(0))
+        this.join_thread_exclusive(
+            thread,
+            /* success_retval */ Scalar::from_u32(0),
+            return_dest,
+        )
     }
 
     fn pthread_detach(&mut self, thread: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
diff --git a/src/tools/miri/src/shims/windows/foreign_items.rs b/src/tools/miri/src/shims/windows/foreign_items.rs
index 10f6df67ad47a..de10357f5faee 100644
--- a/src/tools/miri/src/shims/windows/foreign_items.rs
+++ b/src/tools/miri/src/shims/windows/foreign_items.rs
@@ -573,8 +573,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             "WaitForSingleObject" => {
                 let [handle, timeout] = this.check_shim(abi, sys_conv, link_name, args)?;
 
-                let ret = this.WaitForSingleObject(handle, timeout)?;
-                this.write_scalar(ret, dest)?;
+                this.WaitForSingleObject(handle, timeout, dest)?;
             }
             "GetCurrentProcess" => {
                 let [] = this.check_shim(abi, sys_conv, link_name, args)?;
diff --git a/src/tools/miri/src/shims/windows/thread.rs b/src/tools/miri/src/shims/windows/thread.rs
index 56ae88bb1a178..a155ec65a51fd 100644
--- a/src/tools/miri/src/shims/windows/thread.rs
+++ b/src/tools/miri/src/shims/windows/thread.rs
@@ -58,13 +58,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         &mut self,
         handle_op: &OpTy<'tcx>,
         timeout_op: &OpTy<'tcx>,
-    ) -> InterpResult<'tcx, Scalar> {
+        return_dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
 
         let handle = this.read_handle(handle_op, "WaitForSingleObject")?;
         let timeout = this.read_scalar(timeout_op)?.to_u32()?;
 
-        let thread = match handle {
+        let joined_thread_id = match handle {
             Handle::Thread(thread) => thread,
             // Unlike on posix, the outcome of joining the current thread is not documented.
             // On current Windows, it just deadlocks.
@@ -76,8 +77,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             throw_unsup_format!("`WaitForSingleObject` with non-infinite timeout");
         }
 
-        this.join_thread(thread)?;
+        this.join_thread(
+            joined_thread_id,
+            /* success_retval */ this.eval_windows("c", "WAIT_OBJECT_0"),
+            return_dest,
+        )?;
 
-        interp_ok(this.eval_windows("c", "WAIT_OBJECT_0"))
+        interp_ok(())
     }
 }
diff --git a/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs b/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs
index c63e926376d45..9646cdb206591 100644
--- a/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs
+++ b/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs
@@ -6,7 +6,7 @@ fn direct_raw(x: *const (i32, i32)) -> *const i32 {
 
 // Ensure that if a raw pointer is created via an intermediate
 // reference, we catch that. (Just in case someone decides to
-// desugar this differenly or so.)
+// desugar this differently or so.)
 fn via_ref(x: *const (i32, i32)) -> *const i32 {
     unsafe { &(*x).0 as *const i32 } //~ERROR: dangling pointer
 }
diff --git a/src/tools/opt-dist/src/exec.rs b/src/tools/opt-dist/src/exec.rs
index 64ce5cc377522..0dc6e56b9d5f4 100644
--- a/src/tools/opt-dist/src/exec.rs
+++ b/src/tools/opt-dist/src/exec.rs
@@ -122,7 +122,12 @@ impl Bootstrap {
         let metrics_path = env.build_root().join("build").join("metrics.json");
         let args = dist_args.iter().map(|arg| arg.as_str()).collect::<Vec<_>>();
         let cmd = cmd(&args).env("RUST_BACKTRACE", "full");
-        let cmd = add_shared_x_flags(env, cmd);
+        let mut cmd = add_shared_x_flags(env, cmd);
+        if env.is_fast_try_build() {
+            // We set build.extended=false for fast try builds, but we still need Cargo
+            cmd = cmd.arg("cargo");
+        }
+
         Self { cmd, metrics_path }
     }
 
@@ -189,5 +194,18 @@ impl Bootstrap {
 }
 
 fn add_shared_x_flags(env: &Environment, cmd: CmdBuilder) -> CmdBuilder {
-    if env.is_fast_try_build() { cmd.arg("--set").arg("rust.deny-warnings=false") } else { cmd }
+    if env.is_fast_try_build() {
+        // Skip things that cannot be skipped through `x ... --skip`
+        cmd.arg("--set")
+            .arg("rust.llvm-bitcode-linker=false")
+            // Skip wasm-component-ld. This also skips cargo, which we need to re-enable for dist
+            .arg("--set")
+            .arg("build.extended=false")
+            .arg("--set")
+            .arg("rust.codegen-backends=['llvm']")
+            .arg("--set")
+            .arg("rust.deny-warnings=false")
+    } else {
+        cmd
+    }
 }
diff --git a/src/tools/opt-dist/src/main.rs b/src/tools/opt-dist/src/main.rs
index d2827ec01ca7d..9c8a6637a3b10 100644
--- a/src/tools/opt-dist/src/main.rs
+++ b/src/tools/opt-dist/src/main.rs
@@ -407,13 +407,18 @@ fn main() -> anyhow::Result<()> {
         for target in [
             "rust-docs",
             "rustc-docs",
+            "rustc-dev",
+            "rust-dev",
             "rust-docs-json",
             "rust-analyzer",
             "rustc-src",
+            "extended",
             "clippy",
             "miri",
             "rustfmt",
             "gcc",
+            "generate-copyright",
+            "bootstrap",
         ] {
             build_args.extend(["--skip".to_string(), target.to_string()]);
         }
diff --git a/src/tools/test-float-parse/src/ui.rs b/src/tools/test-float-parse/src/ui.rs
index 73473eef0bfdb..1a9ba0dc1d99c 100644
--- a/src/tools/test-float-parse/src/ui.rs
+++ b/src/tools/test-float-parse/src/ui.rs
@@ -118,7 +118,7 @@ pub fn finish_all(tests: &[TestInfo], total_elapsed: Duration, cfg: &Config) ->
         match result {
             Ok(FinishedAll) => (),
             Err(EarlyExit::Timeout) => {
-                println!("      exited early; exceded {:?} timeout", cfg.timeout)
+                println!("      exited early; exceeded {:?} timeout", cfg.timeout)
             }
             Err(EarlyExit::MaxFailures) => {
                 println!("      exited early; exceeded {:?} max failures", cfg.max_failures)
diff --git a/tests/rustdoc/enum/enum-variant-non_exhaustive.rs b/tests/rustdoc/enum/enum-variant-non_exhaustive.rs
new file mode 100644
index 0000000000000..ea0234a49f640
--- /dev/null
+++ b/tests/rustdoc/enum/enum-variant-non_exhaustive.rs
@@ -0,0 +1,17 @@
+// regression test for https://github.com/rust-lang/rust/issues/142599
+
+#![crate_name = "foo"]
+
+//@ snapshot type-code 'foo/enum.Type.html' '//pre[@class="rust item-decl"]/code'
+pub enum Type {
+    #[non_exhaustive]
+    // attribute that should not be shown
+    #[warn(unsafe_code)]
+    Variant,
+}
+
+// we would love to use the `following-sibling::` axis
+// (along with an `h2[@id="aliased-type"]` query),
+// but unfortunately python doesn't implement that.
+//@ snapshot type-alias-code 'foo/type.TypeAlias.html' '//pre[@class="rust item-decl"][2]/code'
+pub type TypeAlias = Type;
diff --git a/tests/rustdoc/enum/enum-variant-non_exhaustive.type-alias-code.html b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-alias-code.html
new file mode 100644
index 0000000000000..04eea709079d7
--- /dev/null
+++ b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-alias-code.html
@@ -0,0 +1,4 @@
+<code>pub enum TypeAlias {
+    #[non_exhaustive]
+    Variant,
+}</code>
\ No newline at end of file
diff --git a/tests/rustdoc/enum/enum-variant-non_exhaustive.type-code.html b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-code.html
new file mode 100644
index 0000000000000..6c8851ea5df47
--- /dev/null
+++ b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-code.html
@@ -0,0 +1,4 @@
+<code>pub enum Type {
+    #[non_exhaustive]
+    Variant,
+}</code>
\ No newline at end of file