From f327ba00357ba4bd91929082c059b2929f285fed Mon Sep 17 00:00:00 2001
From: Nia Espera <a5b6@riseup.net>
Date: Wed, 2 Jul 2025 02:55:41 +0200
Subject: [PATCH] various native-lib-trace-related fixups

Co-Authored-By: Ralf Jung <post@ralfj.de>
---
 src/alloc/isolated_alloc.rs            |  21 +-
 src/bin/miri.rs                        |  30 +--
 src/lib.rs                             |   8 +-
 src/shims/mod.rs                       |   4 +-
 src/shims/native_lib/mod.rs            |  96 +++++---
 src/shims/native_lib/trace/child.rs    | 104 ++++-----
 src/shims/native_lib/trace/messages.rs |  47 ++--
 src/shims/native_lib/trace/mod.rs      |   2 +
 src/shims/native_lib/trace/parent.rs   | 304 +++++++++++--------------
 src/shims/native_lib/trace/stub.rs     |  34 +++
 10 files changed, 312 insertions(+), 338 deletions(-)
 create mode 100644 src/shims/native_lib/trace/stub.rs
diff --git a/src/alloc/isolated_alloc.rs b/src/alloc/isolated_alloc.rs
index a7bb9b4da7..7b2f1a3eeb 100644
--- a/src/alloc/isolated_alloc.rs
+++ b/src/alloc/isolated_alloc.rs
@@ -302,23 +302,20 @@ impl IsolatedAlloc {
         }
     }
 
-    /// Returns a vector of page addresses managed by the allocator.
-    pub fn pages(&self) -> Vec<usize> {
-        let mut pages: Vec<usize> =
-            self.page_ptrs.iter().map(|p| p.expose_provenance().get()).collect();
-        for (ptr, size) in self.huge_ptrs.iter() {
-            for i in 0..size / self.page_size {
-                pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size));
-            }
-        }
-        pages
+    /// Returns a list of page addresses managed by the allocator.
+    pub fn pages(&self) -> impl Iterator<Item = usize> {
+        let pages = self.page_ptrs.iter().map(|p| p.expose_provenance().get());
+        pages.chain(self.huge_ptrs.iter().flat_map(|(ptr, size)| {
+            (0..size / self.page_size)
+                .map(|i| ptr.expose_provenance().get().strict_add(i * self.page_size))
+        }))
     }
 
     /// Protects all owned memory as `PROT_NONE`, preventing accesses.
     ///
     /// SAFETY: Accessing memory after this point will result in a segfault
     /// unless it is first unprotected.
-    pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> {
+    pub unsafe fn start_ffi(&mut self) -> Result<(), nix::errno::Errno> {
         let prot = mman::ProtFlags::PROT_NONE;
         unsafe { self.mprotect(prot) }
     }
@@ -326,7 +323,7 @@ impl IsolatedAlloc {
     /// Deprotects all owned memory by setting it to RW. Erroring here is very
     /// likely unrecoverable, so it may panic if applying those permissions
     /// fails.
-    pub fn unprep_ffi(&mut self) {
+    pub fn end_ffi(&mut self) {
         let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE;
         unsafe {
             self.mprotect(prot).unwrap();
diff --git a/src/bin/miri.rs b/src/bin/miri.rs
index e3b579a4ac..61cebedf08 100644
--- a/src/bin/miri.rs
+++ b/src/bin/miri.rs
@@ -233,8 +233,6 @@ impl rustc_driver::Callbacks for MiriCompilerCalls {
         } else {
             let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None)
                 .unwrap_or_else(|| {
-                    //#[cfg(target_os = "linux")]
-                    //miri::native_lib::register_retcode_sv(rustc_driver::EXIT_FAILURE);
                     tcx.dcx().abort_if_errors();
                     rustc_driver::EXIT_FAILURE
                 });
@@ -337,6 +335,9 @@ impl rustc_driver::Callbacks for MiriBeRustCompilerCalls {
 fn exit(exit_code: i32) -> ! {
     // Drop the tracing guard before exiting, so tracing calls are flushed correctly.
     deinit_loggers();
+    // Make sure the supervisor knows about the code code.
+    #[cfg(target_os = "linux")]
+    miri::native_lib::register_retcode_sv(exit_code);
     std::process::exit(exit_code);
 }
 
@@ -355,6 +356,11 @@ fn run_compiler_and_exit(
     args: &[String],
     callbacks: &mut (dyn rustc_driver::Callbacks + Send),
 ) -> ! {
+    // Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if
+    // MIRI_BE_RUSTC is set. We do this late so that when `native_lib::init_sv` is called,
+    // there are no other threads.
+    rustc_driver::install_ctrlc_handler();
+
     // Invoke compiler, catch any unwinding panics and handle return code.
     let exit_code =
         rustc_driver::catch_with_exit_code(move || rustc_driver::run_compiler(args, callbacks));
@@ -439,10 +445,6 @@ fn main() {
     let args = rustc_driver::catch_fatal_errors(|| rustc_driver::args::raw_args(&early_dcx))
         .unwrap_or_else(|_| std::process::exit(rustc_driver::EXIT_FAILURE));
 
-    // Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if
-    // MIRI_BE_RUSTC is set.
-    rustc_driver::install_ctrlc_handler();
-
     // If the environment asks us to actually be rustc, then do that.
     if let Some(crate_kind) = env::var_os("MIRI_BE_RUSTC") {
         // Earliest rustc setup.
@@ -750,15 +752,15 @@ fn main() {
 
     debug!("rustc arguments: {:?}", rustc_args);
     debug!("crate arguments: {:?}", miri_config.args);
-    #[cfg(target_os = "linux")]
     if !miri_config.native_lib.is_empty() && miri_config.native_lib_enable_tracing {
-        // FIXME: This should display a diagnostic / warning on error
-        // SAFETY: If any other threads exist at this point (namely for the ctrlc
-        // handler), they will not interact with anything on the main rustc/Miri
-        // thread in an async-signal-unsafe way such as by accessing shared
-        // semaphores, etc.; the handler only calls `sleep()` and `exit()`, which
-        // are async-signal-safe, as is accessing atomics
-        //let _ = unsafe { miri::native_lib::init_sv() };
+        // SAFETY: No other threads are running
+        #[cfg(target_os = "linux")]
+        if unsafe { miri::native_lib::init_sv() }.is_err() {
+            eprintln!(
+                "warning: The native-lib tracer could not be started. Is this an x86 Linux system, and does Miri have permissions to ptrace?\n\
+                Falling back to non-tracing native-lib mode."
+            );
+        }
     }
     run_compiler_and_exit(
         &rustc_args,
diff --git a/src/lib.rs b/src/lib.rs
index 642b7ee984..374f63a166 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -96,10 +96,10 @@ pub use rustc_const_eval::interpret::{self, AllocMap, Provenance as _};
 use rustc_middle::{bug, span_bug};
 use tracing::{info, trace};
 
-//#[cfg(target_os = "linux")]
-//pub mod native_lib {
-//    pub use crate::shims::{init_sv, register_retcode_sv};
-//}
+#[cfg(target_os = "linux")]
+pub mod native_lib {
+    pub use crate::shims::{init_sv, register_retcode_sv};
+}
 
 // Type aliases that set the provenance parameter.
 pub type Pointer = interpret::Pointer<Option<machine::Provenance>>;
diff --git a/src/shims/mod.rs b/src/shims/mod.rs
index 60c0e28929..b08ab101e9 100644
--- a/src/shims/mod.rs
+++ b/src/shims/mod.rs
@@ -22,8 +22,8 @@ pub mod tls;
 pub mod unwind;
 
 pub use self::files::FdTable;
-//#[cfg(target_os = "linux")]
-//pub use self::native_lib::trace::{init_sv, register_retcode_sv};
+#[cfg(target_os = "linux")]
+pub use self::native_lib::trace::{init_sv, register_retcode_sv};
 pub use self::unix::{DirTable, EpollInterestTable};
 
 /// What needs to be done after emulating an item (a shim or an intrinsic) is done.
diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs
index 9b30d8ce78..d7432a7300 100644
--- a/src/shims/native_lib/mod.rs
+++ b/src/shims/native_lib/mod.rs
@@ -1,9 +1,5 @@
 //! Implements calling functions from a native library.
 
-// FIXME: disabled since it fails to build on many targets.
-//#[cfg(target_os = "linux")]
-//pub mod trace;
-
 use std::ops::Deref;
 
 use libffi::high::call as ffi;
@@ -13,14 +9,55 @@ use rustc_middle::mir::interpret::Pointer;
 use rustc_middle::ty::{self as ty, IntTy, UintTy};
 use rustc_span::Symbol;
 
-//#[cfg(target_os = "linux")]
-//use self::trace::Supervisor;
+#[cfg_attr(
+    not(all(
+        target_os = "linux",
+        target_env = "gnu",
+        any(target_arch = "x86", target_arch = "x86_64")
+    )),
+    path = "trace/stub.rs"
+)]
+pub mod trace;
+
 use crate::*;
 
-//#[cfg(target_os = "linux")]
-//type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<self::trace::messages::MemEvents>)>;
-//#[cfg(not(target_os = "linux"))]
-type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<!>)>;
+/// The final results of an FFI trace, containing every relevant event detected
+/// by the tracer.
+#[allow(dead_code)]
+#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Debug)]
+pub struct MemEvents {
+    /// An list of memory accesses that occurred, in the order they occurred in.
+    pub acc_events: Vec<AccessEvent>,
+}
+
+/// A single memory access.
+#[allow(dead_code)]
+#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Debug)]
+pub enum AccessEvent {
+    /// A read may have occurred on this memory range.
+    /// Some instructions *may* read memory without *always* doing that,
+    /// so this can be an over-approximation.
+    /// The range info, however, is reliable if the access did happen.
+    Read(AccessRange),
+    /// A read may have occurred on this memory range.
+    /// Some instructions *may* write memory without *always* doing that,
+    /// so this can be an over-approximation.
+    /// The range info, however, is reliable if the access did happen.
+    Write(AccessRange),
+}
+
+/// The memory touched by a given access.
+#[allow(dead_code)]
+#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Clone, Debug)]
+pub struct AccessRange {
+    /// The base address in memory where an access occurred.
+    pub addr: usize,
+    /// The number of bytes affected from the base.
+    pub size: usize,
+}
 
 impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
 trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
@@ -31,18 +68,17 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
         dest: &MPlaceTy<'tcx>,
         ptr: CodePtr,
         libffi_args: Vec<libffi::high::Arg<'a>>,
-    ) -> CallResult<'tcx> {
+    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
         let this = self.eval_context_mut();
-        //#[cfg(target_os = "linux")]
-        //let alloc = this.machine.allocator.as_ref().unwrap();
-
-        // SAFETY: We don't touch the machine memory past this point.
-        //#[cfg(target_os = "linux")]
-        //let (guard, stack_ptr) = unsafe { Supervisor::start_ffi(alloc) };
+        #[cfg(target_os = "linux")]
+        let alloc = this.machine.allocator.as_ref().unwrap();
+        #[cfg(not(target_os = "linux"))]
+        // Placeholder value.
+        let alloc = ();
 
-        // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
-        // as the specified primitive integer type
-        let res = 'res: {
+        trace::Supervisor::do_ffi(alloc, || {
+            // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
+            // as the specified primitive integer type
             let scalar = match dest.layout.ty.kind() {
                 // ints
                 ty::Int(IntTy::I8) => {
@@ -93,7 +129,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // have the output_type `Tuple([])`.
                 ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
                     unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
-                    break 'res interp_ok(ImmTy::uninit(dest.layout));
+                    return interp_ok(ImmTy::uninit(dest.layout));
                 }
                 ty::RawPtr(..) => {
                     let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
@@ -101,23 +137,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     Scalar::from_pointer(ptr, this)
                 }
                 _ =>
-                    break 'res Err(err_unsup_format!(
+                    return Err(err_unsup_format!(
                         "unsupported return type for native call: {:?}",
                         link_name
                     ))
                     .into(),
             };
             interp_ok(ImmTy::from_scalar(scalar, dest.layout))
-        };
-
-        // SAFETY: We got the guard and stack pointer from start_ffi, and
-        // the allocator is the same
-        //#[cfg(target_os = "linux")]
-        //let events = unsafe { Supervisor::end_ffi(alloc, guard, stack_ptr) };
-        //#[cfg(not(target_os = "linux"))]
-        let events = None;
-
-        interp_ok((res?, events))
+        })
     }
 
     /// Get the pointer to the function of the specified name in the shared object file,
@@ -214,10 +241,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 if !this.machine.native_call_mem_warned.replace(true) {
                     // Newly set, so first time we get here.
                     this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem {
-                        //#[cfg(target_os = "linux")]
-                        //tracing: self::trace::Supervisor::is_enabled(),
-                        //#[cfg(not(target_os = "linux"))]
-                        tracing: false,
+                        tracing: self::trace::Supervisor::is_enabled(),
                     });
                 }
 
diff --git a/src/shims/native_lib/trace/child.rs b/src/shims/native_lib/trace/child.rs
index 4961e875c7..de26cb0fe5 100644
--- a/src/shims/native_lib/trace/child.rs
+++ b/src/shims/native_lib/trace/child.rs
@@ -4,12 +4,22 @@ use std::rc::Rc;
 use ipc_channel::ipc;
 use nix::sys::{ptrace, signal};
 use nix::unistd;
+use rustc_const_eval::interpret::InterpResult;
 
 use super::CALLBACK_STACK_SIZE;
-use super::messages::{Confirmation, MemEvents, StartFfiInfo, TraceRequest};
+use super::messages::{Confirmation, StartFfiInfo, TraceRequest};
 use super::parent::{ChildListener, sv_loop};
 use crate::alloc::isolated_alloc::IsolatedAlloc;
+use crate::shims::native_lib::MemEvents;
 
+/// A handle to the single, shared supervisor process across all `MiriMachine`s.
+/// Since it would be very difficult to trace multiple FFI calls in parallel, we
+/// need to ensure that either (a) only one `MiriMachine` is performing an FFI call
+/// at any given time, or (b) there are distinct supervisor and child processes for
+/// each machine. The former was chosen here.
+///
+/// This should only contain a `None` if the supervisor has not (yet) been initialised;
+/// otherwise, if `init_sv` was called and did not error, this will always be nonempty.
 static SUPERVISOR: std::sync::Mutex<Option<Supervisor>> = std::sync::Mutex::new(None);
 
 /// The main means of communication between the child and parent process,
@@ -34,32 +44,23 @@ impl Supervisor {
         SUPERVISOR.lock().unwrap().is_some()
     }
 
-    /// Begins preparations for doing an FFI call. This should be called at
-    /// the last possible moment before entering said call. `alloc` points to
-    /// the allocator which handed out the memory used for this machine.
-    ///
+    /// Performs an arbitrary FFI call, enabling tracing from the supervisor.
     /// As this locks the supervisor via a mutex, no other threads may enter FFI
-    /// until this one returns and its guard is dropped via `end_ffi`. The
-    /// pointer returned should be passed to `end_ffi` to avoid a memory leak.
-    ///
-    /// SAFETY: The resulting guard must be dropped *via `end_ffi`* immediately
-    /// after the desired call has concluded.
-    pub unsafe fn start_ffi(
+    /// until this function returns.
+    pub fn do_ffi<'tcx>(
         alloc: &Rc<RefCell<IsolatedAlloc>>,
-    ) -> (std::sync::MutexGuard<'static, Option<Supervisor>>, Option<*mut [u8; CALLBACK_STACK_SIZE]>)
-    {
+        f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>,
+    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
         let mut sv_guard = SUPERVISOR.lock().unwrap();
-        // If the supervisor is not initialised for whatever reason, fast-fail.
-        // This might be desired behaviour, as even on platforms where ptracing
-        // is not implemented it enables us to enforce that only one FFI call
+        // If the supervisor is not initialised for whatever reason, fast-return.
+        // As a side-effect, even on platforms where ptracing
+        // is not implemented, we enforce that only one FFI call
         // happens at a time.
-        let Some(sv) = sv_guard.take() else {
-            return (sv_guard, None);
-        };
+        let Some(sv) = sv_guard.as_mut() else { return f().map(|v| (v, None)) };
 
         // Get pointers to all the pages the supervisor must allow accesses in
         // and prepare the callback stack.
-        let page_ptrs = alloc.borrow().pages();
+        let page_ptrs = alloc.borrow().pages().collect();
         let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] =
             Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast();
         let stack_ptr = raw_stack_ptr.expose_provenance();
@@ -68,9 +69,9 @@ impl Supervisor {
         // SAFETY: We do not access machine memory past this point until the
         // supervisor is ready to allow it.
         unsafe {
-            if alloc.borrow_mut().prepare_ffi().is_err() {
+            if alloc.borrow_mut().start_ffi().is_err() {
                 // Don't mess up unwinding by maybe leaving the memory partly protected
-                alloc.borrow_mut().unprep_ffi();
+                alloc.borrow_mut().end_ffi();
                 panic!("Cannot protect memory for FFI call!");
             }
         }
@@ -82,27 +83,13 @@ impl Supervisor {
         // enforce an ordering for these events.
         sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap();
         sv.confirm_rx.recv().unwrap();
-        *sv_guard = Some(sv);
         // We need to be stopped for the supervisor to be able to make certain
         // modifications to our memory - simply waiting on the recv() doesn't
         // count.
         signal::raise(signal::SIGSTOP).unwrap();
-        (sv_guard, Some(raw_stack_ptr))
-    }
 
-    /// Undoes FFI-related preparations, allowing Miri to continue as normal, then
-    /// gets the memory accesses and changes performed during the FFI call. Note
-    /// that this may include some spurious accesses done by `libffi` itself in
-    /// the process of executing the function call.
-    ///
-    /// SAFETY: The `sv_guard` and `raw_stack_ptr` passed must be the same ones
-    /// received by a prior call to `start_ffi`, and the allocator must be the
-    /// one passed to it also.
-    pub unsafe fn end_ffi(
-        alloc: &Rc<RefCell<IsolatedAlloc>>,
-        mut sv_guard: std::sync::MutexGuard<'static, Option<Supervisor>>,
-        raw_stack_ptr: Option<*mut [u8; CALLBACK_STACK_SIZE]>,
-    ) -> Option<MemEvents> {
+        let res = f();
+
         // We can't use IPC channels here to signal that FFI mode has ended,
         // since they might allocate memory which could get us stuck in a SIGTRAP
         // with no easy way out! While this could be worked around, it is much
@@ -113,42 +100,40 @@ impl Supervisor {
         signal::raise(signal::SIGUSR1).unwrap();
 
         // This is safe! It just sets memory to normal expected permissions.
-        alloc.borrow_mut().unprep_ffi();
+        alloc.borrow_mut().end_ffi();
 
-        // If this is `None`, then `raw_stack_ptr` is None and does not need to
-        // be deallocated (and there's no need to worry about the guard, since
-        // it contains nothing).
-        let sv = sv_guard.take()?;
         // SAFETY: Caller upholds that this pointer was allocated as a box with
         // this type.
         unsafe {
-            drop(Box::from_raw(raw_stack_ptr.unwrap()));
+            drop(Box::from_raw(raw_stack_ptr));
         }
         // On the off-chance something really weird happens, don't block forever.
-        let ret = sv
+        let events = sv
             .event_rx
             .try_recv_timeout(std::time::Duration::from_secs(5))
             .map_err(|e| {
                 match e {
                     ipc::TryRecvError::IpcError(_) => (),
                     ipc::TryRecvError::Empty =>
-                        eprintln!("Waiting for accesses from supervisor timed out!"),
+                        panic!("Waiting for accesses from supervisor timed out!"),
                 }
             })
             .ok();
-        // Do *not* leave the supervisor empty, or else we might get another fork...
-        *sv_guard = Some(sv);
-        ret
+
+        res.map(|v| (v, events))
     }
 }
 
 /// Initialises the supervisor process. If this function errors, then the
 /// supervisor process could not be created successfully; else, the caller
-/// is now the child process and can communicate via `start_ffi`/`end_ffi`,
-/// receiving back events through `get_events`.
+/// is now the child process and can communicate via `do_ffi`, receiving back
+/// events at the end.
 ///
 /// # Safety
-/// The invariants for `fork()` must be upheld by the caller.
+/// The invariants for `fork()` must be upheld by the caller, namely either:
+/// - Other threads do not exist, or;
+/// - If they do exist, either those threads or the resulting child process
+///   only ever act in [async-signal-safe](https://www.man7.org/linux/man-pages/man7/signal-safety.7.html) ways.
 pub unsafe fn init_sv() -> Result<(), SvInitError> {
     // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the
     // relevant missing bits.
@@ -191,8 +176,7 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
                 // The child process is free to unwind, so we won't to avoid doubly freeing
                 // system resources.
                 let init = std::panic::catch_unwind(|| {
-                    let listener =
-                        ChildListener { message_rx, attached: false, override_retcode: None };
+                    let listener = ChildListener::new(message_rx, confirm_tx.clone());
                     // Trace as many things as possible, to be able to handle them as needed.
                     let options = ptrace::Options::PTRACE_O_TRACESYSGOOD
                         | ptrace::Options::PTRACE_O_TRACECLONE
@@ -218,7 +202,9 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
                     // The "Ok" case means that we couldn't ptrace.
                     Ok(e) => return Err(e),
                     Err(p) => {
-                        eprintln!("Supervisor process panicked!\n{p:?}");
+                        eprintln!(
+                            "Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer."
+                        );
                         std::process::exit(1);
                     }
                 }
@@ -239,13 +225,11 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
 }
 
 /// Instruct the supervisor process to return a particular code. Useful if for
-/// whatever reason this code fails to be intercepted normally. In the case of
-/// `abort_if_errors()` used in `bin/miri.rs`, the return code is erroneously
-/// given as a 0 if this is not used.
+/// whatever reason this code fails to be intercepted normally.
 pub fn register_retcode_sv(code: i32) {
     let mut sv_guard = SUPERVISOR.lock().unwrap();
-    if let Some(sv) = sv_guard.take() {
+    if let Some(sv) = sv_guard.as_mut() {
         sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap();
-        *sv_guard = Some(sv);
+        sv.confirm_rx.recv().unwrap();
     }
 }
diff --git a/src/shims/native_lib/trace/messages.rs b/src/shims/native_lib/trace/messages.rs
index 8a83dab5c0..1f9df556b5 100644
--- a/src/shims/native_lib/trace/messages.rs
+++ b/src/shims/native_lib/trace/messages.rs
@@ -1,25 +1,28 @@
 //! Houses the types that are directly sent across the IPC channels.
 //!
-//! The overall structure of a traced FFI call, from the child process's POV, is
-//! as follows:
+//! When forking to initialise the supervisor during `init_sv`, the child raises
+//! a `SIGSTOP`; if the parent successfully ptraces the child, it will allow it
+//! to resume. Else, the child will be killed by the parent.
+//!
+//! After initialisation is done, the overall structure of a traced FFI call from
+//! the child process's POV is as follows:
 //! ```
 //! message_tx.send(TraceRequest::StartFfi);
-//! confirm_rx.recv();
+//! confirm_rx.recv(); // receives a `Confirmation`
 //! raise(SIGSTOP);
 //! /* do ffi call */
 //! raise(SIGUSR1); // morally equivalent to some kind of "TraceRequest::EndFfi"
-//! let events = event_rx.recv();
+//! let events = event_rx.recv(); // receives a `MemEvents`
 //! ```
 //! `TraceRequest::OverrideRetcode` can be sent at any point in the above, including
-//! before or after all of them.
+//! before or after all of them. `confirm_rx.recv()` is to be called after, to ensure
+//! that the child does not exit before the supervisor has registered the return code.
 //!
 //! NB: sending these events out of order, skipping steps, etc. will result in
 //! unspecified behaviour from the supervisor process, so use the abstractions
-//! in `super::child` (namely `start_ffi()` and `end_ffi()`) to handle this. It is
+//! in `super::child` (namely `do_ffi()`) to handle this. It is
 //! trivially easy to cause a deadlock or crash by messing this up!
 
-use std::ops::Range;
-
 /// An IPC request sent by the child process to the parent.
 ///
 /// The sender for this channel should live on the child process.
@@ -34,6 +37,8 @@ pub enum TraceRequest {
     StartFfi(StartFfiInfo),
     /// Manually overrides the code that the supervisor will return upon exiting.
     /// Once set, it is permanent. This can be called again to change the value.
+    ///
+    /// After sending this, the child must wait to receive a `Confirmation`.
     OverrideRetcode(i32),
 }
 
@@ -41,7 +46,7 @@ pub enum TraceRequest {
 #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
 pub struct StartFfiInfo {
     /// A vector of page addresses. These should have been automatically obtained
-    /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`.
+    /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::start_ffi`.
     pub page_ptrs: Vec<usize>,
     /// The address of an allocation that can serve as a temporary stack.
     /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int.
@@ -54,27 +59,3 @@ pub struct StartFfiInfo {
 /// The sender for this channel should live on the parent process.
 #[derive(serde::Serialize, serde::Deserialize, Debug)]
 pub struct Confirmation;
-
-/// The final results of an FFI trace, containing every relevant event detected
-/// by the tracer. Sent by the supervisor after receiving a `SIGUSR1` signal.
-///
-/// The sender for this channel should live on the parent process.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
-pub struct MemEvents {
-    /// An ordered list of memory accesses that occurred. These should be assumed
-    /// to be overcautious; that is, if the size of an access is uncertain it is
-    /// pessimistically rounded up, and if the type (read/write/both) is uncertain
-    /// it is reported as whatever would be safest to assume; i.e. a read + maybe-write
-    /// becomes a read + write, etc.
-    pub acc_events: Vec<AccessEvent>,
-}
-
-/// A single memory access, conservatively overestimated
-/// in case of ambiguity.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
-pub enum AccessEvent {
-    /// A read may have occurred on no more than the specified address range.
-    Read(Range<usize>),
-    /// A write may have occurred on no more than the specified address range.
-    Write(Range<usize>),
-}
diff --git a/src/shims/native_lib/trace/mod.rs b/src/shims/native_lib/trace/mod.rs
index 174b06b3ac..c8abacfb5e 100644
--- a/src/shims/native_lib/trace/mod.rs
+++ b/src/shims/native_lib/trace/mod.rs
@@ -5,4 +5,6 @@ mod parent;
 pub use self::child::{Supervisor, init_sv, register_retcode_sv};
 
 /// The size of the temporary stack we use for callbacks that the server executes in the client.
+/// This should be big enough that `mempr_on` and `mempr_off` can safely be jumped into with the
+/// stack pointer pointing to a "stack" of this size without overflowing it.
 const CALLBACK_STACK_SIZE: usize = 1024;
diff --git a/src/shims/native_lib/trace/parent.rs b/src/shims/native_lib/trace/parent.rs
index dfb0b35da6..fc4047eec5 100644
--- a/src/shims/native_lib/trace/parent.rs
+++ b/src/shims/native_lib/trace/parent.rs
@@ -5,26 +5,17 @@ use nix::sys::{ptrace, signal, wait};
 use nix::unistd;
 
 use super::CALLBACK_STACK_SIZE;
-use super::messages::{AccessEvent, Confirmation, MemEvents, StartFfiInfo, TraceRequest};
+use super::messages::{Confirmation, StartFfiInfo, TraceRequest};
+use crate::shims::native_lib::{AccessEvent, AccessRange, MemEvents};
 
 /// The flags to use when calling `waitid()`.
-/// Since bitwise or on the nix version of these flags is implemented as a trait,
-/// this cannot be const directly so we do it this way.
 const WAIT_FLAGS: wait::WaitPidFlag =
-    wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED);
-
-/// Arch-specific maximum size a single access might perform. x86 value is set
-/// assuming nothing bigger than AVX-512 is available.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-const ARCH_MAX_ACCESS_SIZE: usize = 64;
-/// The largest arm64 simd instruction operates on 16 bytes.
-#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
-const ARCH_MAX_ACCESS_SIZE: usize = 16;
+    wait::WaitPidFlag::WUNTRACED.union(wait::WaitPidFlag::WEXITED);
 
 /// The default word size on a given platform, in bytes.
-#[cfg(any(target_arch = "x86", target_arch = "arm"))]
+#[cfg(target_arch = "x86")]
 const ARCH_WORD_SIZE: usize = 4;
-#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
+#[cfg(target_arch = "x86_64")]
 const ARCH_WORD_SIZE: usize = 8;
 
 /// The address of the page set to be edited, initialised to a sentinel null
@@ -53,39 +44,25 @@ trait ArchIndependentRegs {
 // It's fine / desirable behaviour for values to wrap here, we care about just
 // preserving the bit pattern.
 #[cfg(target_arch = "x86_64")]
-#[expect(clippy::as_conversions)]
 #[rustfmt::skip]
 impl ArchIndependentRegs for libc::user_regs_struct {
     #[inline]
-    fn ip(&self) -> usize { self.rip as _ }
+    fn ip(&self) -> usize { self.rip.try_into().unwrap() }
     #[inline]
-    fn set_ip(&mut self, ip: usize) { self.rip = ip as _ }
+    fn set_ip(&mut self, ip: usize) { self.rip = ip.try_into().unwrap() }
     #[inline]
-    fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ }
+    fn set_sp(&mut self, sp: usize) { self.rsp = sp.try_into().unwrap() }
 }
 
 #[cfg(target_arch = "x86")]
-#[expect(clippy::as_conversions)]
 #[rustfmt::skip]
 impl ArchIndependentRegs for libc::user_regs_struct {
     #[inline]
-    fn ip(&self) -> usize { self.eip as _ }
+    fn ip(&self) -> usize { self.eip.try_into().unwrap() }
     #[inline]
-    fn set_ip(&mut self, ip: usize) { self.eip = ip as _ }
+    fn set_ip(&mut self, ip: usize) { self.eip = ip.try_into().unwrap() }
     #[inline]
-    fn set_sp(&mut self, sp: usize) { self.esp = sp as _ }
-}
-
-#[cfg(target_arch = "aarch64")]
-#[expect(clippy::as_conversions)]
-#[rustfmt::skip]
-impl ArchIndependentRegs for libc::user_regs_struct {
-    #[inline]
-    fn ip(&self) -> usize { self.pc as _ }
-    #[inline]
-    fn set_ip(&mut self, ip: usize) { self.pc = ip as _ }
-    #[inline]
-    fn set_sp(&mut self, sp: usize) { self.sp = sp as _ }
+    fn set_sp(&mut self, sp: usize) { self.esp = sp.try_into().unwrap() }
 }
 
 /// A unified event representing something happening on the child process. Wraps
@@ -109,11 +86,24 @@ pub enum ExecEvent {
 /// A listener for the FFI start info channel along with relevant state.
 pub struct ChildListener {
     /// The matching channel for the child's `Supervisor` struct.
-    pub message_rx: ipc::IpcReceiver<TraceRequest>,
+    message_rx: ipc::IpcReceiver<TraceRequest>,
+    /// ...
+    confirm_tx: ipc::IpcSender<Confirmation>,
     /// Whether an FFI call is currently ongoing.
-    pub attached: bool,
+    attached: bool,
     /// If `Some`, overrides the return code with the given value.
-    pub override_retcode: Option<i32>,
+    override_retcode: Option<i32>,
+    /// Last code obtained from a child exiting.
+    last_code: Option<i32>,
+}
+
+impl ChildListener {
+    pub fn new(
+        message_rx: ipc::IpcReceiver<TraceRequest>,
+        confirm_tx: ipc::IpcSender<Confirmation>,
+    ) -> Self {
+        Self { message_rx, confirm_tx, attached: false, override_retcode: None, last_code: None }
+    }
 }
 
 impl Iterator for ChildListener {
@@ -133,16 +123,10 @@ impl Iterator for ChildListener {
                 Ok(stat) =>
                     match stat {
                         // Child exited normally with a specific code set.
-                        wait::WaitStatus::Exited(_, code) => {
-                            let code = self.override_retcode.unwrap_or(code);
-                            return Some(ExecEvent::Died(Some(code)));
-                        }
+                        wait::WaitStatus::Exited(_, code) => self.last_code = Some(code),
                         // Child was killed by a signal, without giving a code.
-                        wait::WaitStatus::Signaled(_, _, _) =>
-                            return Some(ExecEvent::Died(self.override_retcode)),
-                        // Child entered a syscall. Since we're always technically
-                        // tracing, only pass this along if we're actively
-                        // monitoring the child.
+                        wait::WaitStatus::Signaled(_, _, _) => self.last_code = None,
+                        // Child entered or exited a syscall.
                         wait::WaitStatus::PtraceSyscall(pid) =>
                             if self.attached {
                                 return Some(ExecEvent::Syscall(pid));
@@ -179,10 +163,8 @@ impl Iterator for ChildListener {
                             },
                         _ => (),
                     },
-                // This case should only trigger if all children died and we
-                // somehow missed that, but it's best we not allow any room
-                // for deadlocks.
-                Err(_) => return Some(ExecEvent::Died(None)),
+                // This case should only trigger when all children died.
+                Err(_) => return Some(ExecEvent::Died(self.override_retcode.or(self.last_code))),
             }
 
             // Similarly, do a non-blocking poll of the IPC channel.
@@ -196,7 +178,10 @@ impl Iterator for ChildListener {
                             self.attached = true;
                             return Some(ExecEvent::Start(info));
                         },
-                    TraceRequest::OverrideRetcode(code) => self.override_retcode = Some(code),
+                    TraceRequest::OverrideRetcode(code) => {
+                        self.override_retcode = Some(code);
+                        self.confirm_tx.send(Confirmation).unwrap();
+                    }
                 }
             }
 
@@ -211,6 +196,12 @@ impl Iterator for ChildListener {
 #[derive(Debug)]
 pub struct ExecEnd(pub Option<i32>);
 
+/// Whether to call `ptrace::cont()` immediately. Used exclusively by `wait_for_signal`.
+enum InitialCont {
+    Yes,
+    No,
+}
+
 /// This is the main loop of the supervisor process. It runs in a separate
 /// process from the rest of Miri (but because we fork, addresses for anything
 /// created before the fork - like statics - are the same).
@@ -239,12 +230,12 @@ pub fn sv_loop(
     let mut curr_pid = init_pid;
 
     // There's an initial sigstop we need to deal with.
-    wait_for_signal(Some(curr_pid), signal::SIGSTOP, false)?;
+    wait_for_signal(Some(curr_pid), signal::SIGSTOP, InitialCont::No)?;
     ptrace::cont(curr_pid, None).unwrap();
 
     for evt in listener {
         match evt {
-            // start_ffi was called by the child, so prep memory.
+            // Child started ffi, so prep memory.
             ExecEvent::Start(ch_info) => {
                 // All the pages that the child process is "allowed to" access.
                 ch_pages = ch_info.page_ptrs;
@@ -252,17 +243,17 @@ pub fn sv_loop(
                 ch_stack = Some(ch_info.stack_ptr);
 
                 // We received the signal and are no longer in the main listener loop,
-                // so we can let the child move on to the end of start_ffi where it will
+                // so we can let the child move on to the end of the ffi prep where it will
                 // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in
                 // order to do most ptrace operations!
                 confirm_tx.send(Confirmation).unwrap();
                 // We can't trust simply calling `Pid::this()` in the child process to give the right
                 // PID for us, so we get it this way.
-                curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap();
+                curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap();
 
                 ptrace::syscall(curr_pid, None).unwrap();
             }
-            // end_ffi was called by the child.
+            // Child wants to end tracing.
             ExecEvent::End => {
                 // Hand over the access info we traced.
                 event_tx.send(MemEvents { acc_events }).unwrap();
@@ -322,10 +313,6 @@ fn get_disasm() -> capstone::Capstone {
         {cs_pre.x86().mode(arch::x86::ArchMode::Mode64)}
         #[cfg(target_arch = "x86")]
         {cs_pre.x86().mode(arch::x86::ArchMode::Mode32)}
-        #[cfg(target_arch = "aarch64")]
-        {cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)}
-        #[cfg(target_arch = "arm")]
-        {cs_pre.arm().mode(arch::arm::ArchMode::Arm)}
     }
     .detail(true)
     .build()
@@ -339,9 +326,9 @@ fn get_disasm() -> capstone::Capstone {
 fn wait_for_signal(
     pid: Option<unistd::Pid>,
     wait_signal: signal::Signal,
-    init_cont: bool,
+    init_cont: InitialCont,
 ) -> Result<unistd::Pid, ExecEnd> {
-    if init_cont {
+    if matches!(init_cont, InitialCont::Yes) {
         ptrace::cont(pid.unwrap(), None).unwrap();
     }
     // Repeatedly call `waitid` until we get the signal we want, or the process dies.
@@ -374,6 +361,74 @@ fn wait_for_signal(
     }
 }
 
+/// Add the memory events from `op` being executed while there is a memory access at `addr` to
+/// `acc_events`. Return whether this was a memory operand.
+fn capstone_find_events(
+    addr: usize,
+    op: &capstone::arch::ArchOperand,
+    acc_events: &mut Vec<AccessEvent>,
+) -> bool {
+    use capstone::prelude::*;
+    match op {
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        arch::ArchOperand::X86Operand(x86_operand) => {
+            match x86_operand.op_type {
+                // We only care about memory accesses
+                arch::x86::X86OperandType::Mem(_) => {
+                    let push = AccessRange { addr, size: x86_operand.size.into() };
+                    // It's called a "RegAccessType" but it also applies to memory
+                    let acc_ty = x86_operand.access.unwrap();
+                    // The same instruction might do both reads and writes, so potentially add both.
+                    // We do not know the order in which they happened, but writing and then reading
+                    // makes little sense so we put the read first. That is also the more
+                    // conservative choice.
+                    if acc_ty.is_readable() {
+                        acc_events.push(AccessEvent::Read(push.clone()));
+                    }
+                    if acc_ty.is_writable() {
+                        acc_events.push(AccessEvent::Write(push));
+                    }
+
+                    return true;
+                }
+                _ => (),
+            }
+        }
+        // FIXME: arm64
+        _ => unimplemented!(),
+    }
+
+    false
+}
+
+/// Extract the events from the given instruction.
+fn capstone_disassemble(
+    instr: &[u8],
+    addr: usize,
+    cs: &capstone::Capstone,
+    acc_events: &mut Vec<AccessEvent>,
+) -> capstone::CsResult<()> {
+    // The arch_detail is what we care about, but it relies on these temporaries
+    // that we can't drop. 0x1000 is the default base address for Captsone, and
+    // we're expecting 1 instruction.
+    let insns = cs.disasm_count(instr, 0x1000, 1)?;
+    let ins_detail = cs.insn_detail(&insns[0])?;
+    let arch_detail = ins_detail.arch_detail();
+
+    let mut found_mem_op = false;
+
+    for op in arch_detail.operands() {
+        if capstone_find_events(addr, &op, acc_events) {
+            if found_mem_op {
+                panic!("more than one memory operand found; we don't know which one accessed what");
+            }
+            found_mem_op = true;
+        }
+    }
+
+    Ok(())
+}
+
 /// Grabs the access that caused a segfault and logs it down if it's to our memory,
 /// or kills the child and returns the appropriate error otherwise.
 fn handle_segfault(
@@ -384,111 +439,6 @@ fn handle_segfault(
     cs: &capstone::Capstone,
     acc_events: &mut Vec<AccessEvent>,
 ) -> Result<(), ExecEnd> {
-    /// This is just here to not pollute the main namespace with `capstone::prelude::*`.
-    #[inline]
-    fn capstone_disassemble(
-        instr: &[u8],
-        addr: usize,
-        cs: &capstone::Capstone,
-        acc_events: &mut Vec<AccessEvent>,
-    ) -> capstone::CsResult<()> {
-        use capstone::prelude::*;
-
-        // The arch_detail is what we care about, but it relies on these temporaries
-        // that we can't drop. 0x1000 is the default base address for Captsone, and
-        // we're expecting 1 instruction.
-        let insns = cs.disasm_count(instr, 0x1000, 1)?;
-        let ins_detail = cs.insn_detail(&insns[0])?;
-        let arch_detail = ins_detail.arch_detail();
-
-        for op in arch_detail.operands() {
-            match op {
-                #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-                arch::ArchOperand::X86Operand(x86_operand) => {
-                    match x86_operand.op_type {
-                        // We only care about memory accesses
-                        arch::x86::X86OperandType::Mem(_) => {
-                            let push = addr..addr.strict_add(usize::from(x86_operand.size));
-                            // It's called a "RegAccessType" but it also applies to memory
-                            let acc_ty = x86_operand.access.unwrap();
-                            if acc_ty.is_readable() {
-                                acc_events.push(AccessEvent::Read(push.clone()));
-                            }
-                            if acc_ty.is_writable() {
-                                acc_events.push(AccessEvent::Write(push));
-                            }
-                        }
-                        _ => (),
-                    }
-                }
-                #[cfg(target_arch = "aarch64")]
-                arch::ArchOperand::Arm64Operand(arm64_operand) => {
-                    // Annoyingly, we don't always get the size here, so just be pessimistic for now.
-                    match arm64_operand.op_type {
-                        arch::arm64::Arm64OperandType::Mem(_) => {
-                            // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes.
-                            let size = match arm64_operand.vas {
-                                // Not an fp/simd instruction.
-                                arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE,
-                                // 1 byte.
-                                arch::arm64::Arm64Vas::ARM64_VAS_1B => 1,
-                                // 2 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_1H => 2,
-                                // 4 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_4B
-                                | arch::arm64::Arm64Vas::ARM64_VAS_2H
-                                | arch::arm64::Arm64Vas::ARM64_VAS_1S => 4,
-                                // 8 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_8B
-                                | arch::arm64::Arm64Vas::ARM64_VAS_4H
-                                | arch::arm64::Arm64Vas::ARM64_VAS_2S
-                                | arch::arm64::Arm64Vas::ARM64_VAS_1D => 8,
-                                // 16 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_16B
-                                | arch::arm64::Arm64Vas::ARM64_VAS_8H
-                                | arch::arm64::Arm64Vas::ARM64_VAS_4S
-                                | arch::arm64::Arm64Vas::ARM64_VAS_2D
-                                | arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16,
-                            };
-                            let push = addr..addr.strict_add(size);
-                            // FIXME: This now has access type info in the latest
-                            // git version of capstone because this pissed me off
-                            // and I added it. Change this when it updates.
-                            acc_events.push(AccessEvent::Read(push.clone()));
-                            acc_events.push(AccessEvent::Write(push));
-                        }
-                        _ => (),
-                    }
-                }
-                #[cfg(target_arch = "arm")]
-                arch::ArchOperand::ArmOperand(arm_operand) =>
-                    match arm_operand.op_type {
-                        arch::arm::ArmOperandType::Mem(_) => {
-                            // We don't get info on the size of the access, but
-                            // we're at least told if it's a vector instruction.
-                            let size = if arm_operand.vector_index.is_some() {
-                                ARCH_MAX_ACCESS_SIZE
-                            } else {
-                                ARCH_WORD_SIZE
-                            };
-                            let push = addr..addr.strict_add(size);
-                            let acc_ty = arm_operand.access.unwrap();
-                            if acc_ty.is_readable() {
-                                acc_events.push(AccessEvent::Read(push.clone()));
-                            }
-                            if acc_ty.is_writable() {
-                                acc_events.push(AccessEvent::Write(push));
-                            }
-                        }
-                        _ => (),
-                    },
-                _ => unimplemented!(),
-            }
-        }
-
-        Ok(())
-    }
-
     // Get information on what caused the segfault. This contains the address
     // that triggered it.
     let siginfo = ptrace::getsiginfo(pid).unwrap();
@@ -515,7 +465,7 @@ fn handle_segfault(
     //   global atomic variables. This is what we use the temporary callback stack for.
     // - Step 1 instruction
     // - Parse executed code to estimate size & type of access
-    // - Reprotect the memory by executing `mempr_on` in the child.
+    // - Reprotect the memory by executing `mempr_on` in the child, using the callback stack again.
     // - Continue
 
     // Ensure the stack is properly zeroed out!
@@ -552,7 +502,7 @@ fn handle_segfault(
     ptrace::setregs(pid, new_regs).unwrap();
 
     // Our mempr_* functions end with a raise(SIGSTOP).
-    wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
+    wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?;
 
     // Step 1 instruction.
     ptrace::setregs(pid, regs_bak).unwrap();
@@ -573,6 +523,12 @@ fn handle_segfault(
     let regs_bak = ptrace::getregs(pid).unwrap();
     new_regs = regs_bak;
     let ip_poststep = regs_bak.ip();
+
+    // Ensure that we've actually gone forwards.
+    assert!(ip_poststep > ip_prestep);
+    // But not by too much. 64 bytes should be "big enough" on ~any architecture.
+    assert!(ip_prestep.strict_add(64) > ip_poststep);
+
     // We need to do reads/writes in word-sized chunks.
     let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE);
     let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| {
@@ -587,20 +543,14 @@ fn handle_segfault(
     });
 
     // Now figure out the size + type of access and log it down.
-    // This will mark down e.g. the same area being read multiple times,
-    // since it's more efficient to compress the accesses at the end.
-    if capstone_disassemble(&instr, addr, cs, acc_events).is_err() {
-        // Read goes first because we need to be pessimistic.
-        acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
-        acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
-    }
+    capstone_disassemble(&instr, addr, cs, acc_events).expect("Failed to disassemble instruction");
 
     // Reprotect everything and continue.
     #[expect(clippy::as_conversions)]
     new_regs.set_ip(mempr_on as usize);
     new_regs.set_sp(stack_ptr);
     ptrace::setregs(pid, new_regs).unwrap();
-    wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
+    wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?;
 
     ptrace::setregs(pid, regs_bak).unwrap();
     ptrace::syscall(pid, None).unwrap();
diff --git a/src/shims/native_lib/trace/stub.rs b/src/shims/native_lib/trace/stub.rs
new file mode 100644
index 0000000000..22787a6f6f
--- /dev/null
+++ b/src/shims/native_lib/trace/stub.rs
@@ -0,0 +1,34 @@
+use rustc_const_eval::interpret::InterpResult;
+
+static SUPERVISOR: std::sync::Mutex<()> = std::sync::Mutex::new(());
+
+pub struct Supervisor;
+
+#[derive(Debug)]
+pub struct SvInitError;
+
+impl Supervisor {
+    #[inline(always)]
+    pub fn is_enabled() -> bool {
+        false
+    }
+
+    pub fn do_ffi<'tcx, T>(
+        _: T,
+        f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>,
+    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<super::MemEvents>)> {
+        // We acquire the lock to ensure that no two FFI calls run concurrently.
+        let _g = SUPERVISOR.lock().unwrap();
+        f().map(|v| (v, None))
+    }
+}
+
+#[inline(always)]
+#[allow(dead_code, clippy::missing_safety_doc)]
+pub unsafe fn init_sv() -> Result<!, SvInitError> {
+    Err(SvInitError)
+}
+
+#[inline(always)]
+#[allow(dead_code)]
+pub fn register_retcode_sv<T>(_: T) {}