From a6b031d1f2832ee5c416d1f5c46b938cfa10c35e Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Tue, 27 May 2025 22:31:23 +0200 Subject: [PATCH 1/6] ffi ptracing fix review comments ?? docs move init to arg parsing docs again --- Cargo.lock | 21 ++ Cargo.toml | 1 + src/alloc/isolated_alloc.rs | 46 +++- src/alloc_addresses/mod.rs | 37 ++- src/shims/native_lib.rs | 23 +- src/shims/trace/child.rs | 13 + src/shims/trace/parent.rs | 528 ++++++++++++++++++++++++++++++++---- 7 files changed, 613 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 95124ac6fe..d3123caaa4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -121,6 +121,26 @@ dependencies = [ "serde", ] +[[package]] +name = "capstone" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a" +dependencies = [ + "capstone-sys", + "libc", +] + +[[package]] +name = "capstone-sys" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "cargo-platform" version = "0.1.9" @@ -591,6 +611,7 @@ version = "0.1.0" dependencies = [ "aes", "bitflags", + "capstone", "chrono", "chrono-tz", "colored", diff --git a/Cargo.toml b/Cargo.toml index 76018efc4a..0b4b8e26bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ libloading = "0.8" nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"] } ipc-channel = "0.19.0" serde = { version = "1.0.219", features = ["derive"] } +capstone = "0.13" [dev-dependencies] ui_test = "0.29.1" diff --git a/src/alloc/isolated_alloc.rs b/src/alloc/isolated_alloc.rs index 123597ed88..1f5f7dd5df 100644 --- a/src/alloc/isolated_alloc.rs +++ b/src/alloc/isolated_alloc.rs @@ -1,5 +1,6 @@ use std::alloc::{self, Layout}; +use nix::sys::mman; use rustc_index::bit_set::DenseBitSet; /// How many bytes of memory each bit in the bitset represents. @@ -271,13 +272,54 @@ impl IsolatedAlloc { pub fn pages(&self) -> Vec { let mut pages: Vec<_> = self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance()).collect(); - for (ptr, size) in &self.huge_ptrs { + self.huge_ptrs.iter().for_each(|(ptr, size)| { for i in 0..size / self.page_size { pages.push(ptr.expose_provenance().strict_add(i * self.page_size)); } - } + }); pages } + + /// Protects all owned memory as `PROT_NONE`, preventing accesses. + /// + /// SAFETY: Accessing memory after this point will result in a segfault + /// unless it is first unprotected. + pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> { + let prot = mman::ProtFlags::PROT_NONE; + unsafe { self.mprotect(prot) } + } + + /// Deprotects all owned memory by setting it to RW. Erroring here is very + /// likely unrecoverable, so it may panic if applying those permissions + /// fails. + pub fn unprep_ffi(&mut self) { + let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE; + unsafe { + self.mprotect(prot).unwrap(); + } + } + + /// Applies `prot` to every page managed by the allocator. + /// + /// SAFETY: Accessing memory in violation of the protection flags will + /// trigger a segfault. + unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> { + for &pg in &self.page_ptrs { + unsafe { + // We already know only non-null ptrs are pushed to self.pages + let addr: std::ptr::NonNull = + std::ptr::NonNull::new_unchecked(pg.cast()); + mman::mprotect(addr, self.page_size, prot)?; + } + } + for &(hpg, size) in &self.huge_ptrs { + unsafe { + let addr = std::ptr::NonNull::new_unchecked(hpg.cast()); + mman::mprotect(addr, size.next_multiple_of(self.page_size), prot)?; + } + } + Ok(()) + } } #[cfg(test)] diff --git a/src/alloc_addresses/mod.rs b/src/alloc_addresses/mod.rs index 4a038fe648..2abb2d91ab 100644 --- a/src/alloc_addresses/mod.rs +++ b/src/alloc_addresses/mod.rs @@ -470,13 +470,46 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// This overapproximates the modifications which external code might make to memory: /// We set all reachable allocations as initialized, mark all reachable provenances as exposed /// and overwrite them with `Provenance::WILDCARD`. - fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> { + fn prepare_exposed_for_native_call(&mut self, _paranoid: bool) -> InterpResult<'tcx> { let this = self.eval_context_mut(); // We need to make a deep copy of this list, but it's fine; it also serves as scratch space // for the search within `prepare_for_native_call`. let exposed: Vec = this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect(); - this.prepare_for_native_call(exposed) + this.prepare_for_native_call(exposed /*, paranoid*/) + } + + /// Makes use of information obtained about memory accesses during FFI to determine which + /// provenances should be exposed. Note that if `prepare_exposed_for_native_call` was not + /// called before the FFI (with `paranoid` set to false) then some of the writes may be + /// lost! + #[cfg(target_os = "linux")] + fn apply_events( + &mut self, + events: crate::shims::trace::messages::MemEvents, + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + + let mut reads = vec![]; + let mut writes = vec![]; + for acc in events.acc_events { + match acc { + // Ideally, we'd skip reads that occur after certain bytes were + // already written to. However, these are always just conservative + // overestimates - Read(range) means "a read maybe happened + // spanning at most range" - so we can't make use of this for + // now. Maybe we could also skip over reads/writes that hit the + // same bytes, but that's best added together with the stuff above. + shims::trace::AccessEvent::Read(range) => reads.push(range), + shims::trace::AccessEvent::Write(range) => { + writes.push(range); + } + } + } + let _exposed: Vec = + this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect(); + interp_ok(()) + //this.apply_accesses(exposed, events.reads, events.writes) } } diff --git a/src/shims/native_lib.rs b/src/shims/native_lib.rs index 53d1de060f..127366d9d9 100644 --- a/src/shims/native_lib.rs +++ b/src/shims/native_lib.rs @@ -219,8 +219,17 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } } - // Prepare all exposed memory. - this.prepare_exposed_for_native_call()?; + // Prepare all exposed memory, depending on whether we have a supervisor process. + #[cfg(target_os = "linux")] + if super::trace::Supervisor::poll() { + this.prepare_exposed_for_native_call(false)?; + } else { + //this.prepare_exposed_for_native_call(true)?; + //eprintln!("Oh noes!"); + panic!("No ptrace!"); + } + #[cfg(not(target_os = "linux"))] + this.prepare_exposed_for_native_call(true)?; // Convert them to `libffi::high::Arg` type. let libffi_args = libffi_args @@ -229,7 +238,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { .collect::>>(); // Call the function and store output, depending on return type in the function signature. - let (ret, _) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; + let (ret, maybe_memevents) = + this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; + + #[cfg(target_os = "linux")] + if let Some(events) = maybe_memevents { + this.apply_events(events)?; + } + #[cfg(not(target_os = "linux"))] + let _ = maybe_memevents; // Suppress the unused warning this.write_immediate(*ret, dest)?; interp_ok(true) diff --git a/src/shims/trace/child.rs b/src/shims/trace/child.rs index dcfdaad748..3bb06490e5 100644 --- a/src/shims/trace/child.rs +++ b/src/shims/trace/child.rs @@ -65,6 +65,16 @@ impl Supervisor { let stack_ptr = raw_stack_ptr.expose_provenance(); let start_info = StartFfiInfo { page_ptrs, stack_ptr }; + // SAFETY: We do not access machine memory past this point until the + // supervisor is ready to allow it + unsafe { + if alloc.borrow_mut().prepare_ffi().is_err() { + // Don't mess up unwinding by maybe leaving the memory partly protected + alloc.borrow_mut().unprep_ffi(); + panic!("Cannot protect memory for FFI call!"); + } + } + // Send over the info. // NB: if we do not wait to receive a blank confirmation response, it is // possible that the supervisor is alerted of the SIGSTOP *before* it has @@ -102,6 +112,9 @@ impl Supervisor { // normal signal::raise(signal::SIGUSR1).unwrap(); + // This is safe! It just sets memory to normal expected permissions + alloc.borrow_mut().unprep_ffi(); + // If this is `None`, then `raw_stack_ptr` is None and does not need to // be deallocated (and there's no need to worry about the guard, since // it contains nothing) diff --git a/src/shims/trace/parent.rs b/src/shims/trace/parent.rs index 5eb8aa8465..14e11319d9 100644 --- a/src/shims/trace/parent.rs +++ b/src/shims/trace/parent.rs @@ -1,16 +1,108 @@ +use std::sync::atomic::{AtomicPtr, AtomicUsize}; + use ipc_channel::ipc; use nix::sys::{ptrace, signal, wait}; use nix::unistd; -use super::StartFfiInfo; -use super::messages::{Confirmation, MemEvents, TraceRequest}; +use crate::shims::trace::messages::{Confirmation, MemEvents, TraceRequest}; +use crate::shims::trace::{AccessEvent, FAKE_STACK_SIZE, StartFfiInfo}; /// The flags to use when calling `waitid()`. -/// Since bitwise OR on the nix version of these flags is implemented as a trait, -/// we can't use them directly so we do it this way +/// Since bitwise or on the nix version of these flags is implemented as a trait, +/// this cannot be const directly so we do it this way const WAIT_FLAGS: wait::WaitPidFlag = wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED); +/// Arch-specific maximum size a single access might perform. x86 value is set +/// assuming nothing bigger than AVX-512 is available. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +const ARCH_MAX_ACCESS_SIZE: usize = 64; +/// The largest arm64 simd instruction operates on 16 bytes. +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +const ARCH_MAX_ACCESS_SIZE: usize = 16; +/// The max riscv vector instruction can access 8 consecutive 32-bit values. +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +const ARCH_MAX_ACCESS_SIZE: usize = 32; + +/// The default word size on a given platform, in bytes. +#[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "riscv32"))] +const ARCH_WORD_SIZE: usize = 4; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "riscv64"))] +const ARCH_WORD_SIZE: usize = 8; + +/// The address of the page set to be edited, initialised to a sentinel null +/// pointer. +static PAGE_ADDR: AtomicPtr = AtomicPtr::new(std::ptr::null_mut()); +/// The host pagesize, initialised to a sentinel zero value. +pub static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0); +/// How many consecutive pages to unprotect. 1 by default, unlikely to be set +/// higher than 2. +static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1); + +/// Allows us to get common arguments from the `user_regs_t` across architectures. +/// Normally this would land us ABI hell, but thankfully all of our usecases +/// consist of functions with a small number of register-sized integer arguments. +/// See for sources +trait ArchIndependentRegs { + /// Gets the address of the instruction pointer. + fn ip(&self) -> usize; + /// Set the instruction pointer; remember to also set the stack pointer, or + /// else the stack might get messed up! + fn set_ip(&mut self, ip: usize); + /// Set the stack pointer, ideally to a zeroed-out area. + fn set_sp(&mut self, sp: usize); +} + +// It's fine / desirable behaviour for values to wrap here, we care about just +// preserving the bit pattern +#[cfg(target_arch = "x86_64")] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.rip as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.rip = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ } +} + +#[cfg(target_arch = "x86")] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.eip as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.eip = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.esp = sp as _ } +} + +#[cfg(target_arch = "aarch64")] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.pc as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.pc = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.sp = sp as _ } +} + +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.pc as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.pc = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.sp = sp as _ } +} + /// A unified event representing something happening on the child process. Wraps /// `nix`'s `WaitStatus` and our custom signals so it can all be done with one /// `match` statement. @@ -22,7 +114,7 @@ pub enum ExecEvent { End, /// The child process with the specified pid was stopped by the given signal. Status(unistd::Pid, signal::Signal), - /// The child process with the specified pid entered or exited a syscall. + /// The child process with the specified pid entered or existed a syscall. Syscall(unistd::Pid), /// A child process exited or was killed; if we have a return code, it is /// specified. @@ -33,6 +125,8 @@ pub enum ExecEvent { pub struct ChildListener { /// The matching channel for the child's `Supervisor` struct. pub message_rx: ipc::IpcReceiver, + /// The main child process' pid. + pub pid: unistd::Pid, /// Whether an FFI call is currently ongoing. pub attached: bool, /// If `Some`, overrides the return code with the given value. @@ -57,6 +151,7 @@ impl Iterator for ChildListener { match stat { // Child exited normally with a specific code set wait::WaitStatus::Exited(_, code) => { + //eprintln!("Exited main {code}"); let code = self.override_retcode.unwrap_or(code); return Some(ExecEvent::Died(Some(code))); } @@ -84,8 +179,11 @@ impl Iterator for ChildListener { return Some(ExecEvent::Status(pid, signal)); } } else { - // Just pass along the signal - ptrace::cont(pid, signal).unwrap(); + // Log that this happened and pass along the signal. + // If we don't do the kill, the child will instead + // act as if it never received this signal! + eprintln!("Ignoring PtraceEvent {signal:?}"); + signal::kill(pid, signal).unwrap(); }, // Child was stopped at the given signal. Same logic as for // WaitStatus::PtraceEvent @@ -98,7 +196,8 @@ impl Iterator for ChildListener { return Some(ExecEvent::Status(pid, signal)); } } else { - ptrace::cont(pid, signal).unwrap(); + eprintln!("Ignoring Stopped {signal:?}"); + signal::kill(pid, signal).unwrap(); }, _ => (), }, @@ -134,6 +233,8 @@ impl Iterator for ChildListener { enum ExecError { /// The child process died with this return code, if we have one. Died(Option), + /// Something errored, but we should ignore it and proceed. + Shrug, } /// This is the main loop of the supervisor process. It runs in a separate @@ -141,49 +242,50 @@ enum ExecError { /// created before the fork - like statics - are the same). pub fn sv_loop( listener: ChildListener, - init_pid: unistd::Pid, event_tx: ipc::IpcSender, confirm_tx: ipc::IpcSender, - _page_size: usize, + page_size: usize, ) -> Result> { // Things that we return to the child process let mut acc_events = Vec::new(); // Memory allocated on the MiriMachine - let mut _ch_pages = Vec::new(); - let mut _ch_stack = None; + let mut ch_pages = Vec::new(); + let mut ch_stack = None; + + // An instance of the Capstone disassembler, so we don't spawn one on every access + let cs = get_disasm(); - // The pid of the last process we interacted with, used by default if we don't have a - // reason to use a different one - let mut curr_pid = init_pid; + // The pid of the process that we forked from, used by default if we don't + // have a reason to use another one. + let main_pid = listener.pid; // There's an initial sigstop we need to deal with - wait_for_signal(Some(curr_pid), signal::SIGSTOP, false).map_err(|e| { + wait_for_signal(main_pid, signal::SIGSTOP, false).map_err(|e| { match e { ExecError::Died(code) => code, + ExecError::Shrug => None, } })?; - ptrace::cont(curr_pid, None).unwrap(); + ptrace::cont(main_pid, None).unwrap(); for evt in listener { match evt { // start_ffi was called by the child, so prep memory ExecEvent::Start(ch_info) => { // All the pages that the child process is "allowed to" access - _ch_pages = ch_info.page_ptrs; + ch_pages = ch_info.page_ptrs; // And the fake stack it allocated for us to use later - _ch_stack = Some(ch_info.stack_ptr); + ch_stack = Some(ch_info.stack_ptr); // We received the signal and are no longer in the main listener loop, // so we can let the child move on to the end of start_ffi where it will // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in // order to do most ptrace operations! confirm_tx.send(Confirmation).unwrap(); - // We can't trust simply calling `Pid::this()` in the child process to give the right - // PID for us, so we get it this way - curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap(); + wait_for_signal(main_pid, signal::SIGSTOP, false).unwrap(); - ptrace::syscall(curr_pid, None).unwrap(); + ptrace::syscall(main_pid, None).unwrap(); } // end_ffi was called by the child ExecEvent::End => { @@ -191,21 +293,43 @@ pub fn sv_loop( event_tx.send(MemEvents { acc_events }).unwrap(); // And reset our values acc_events = Vec::new(); - _ch_stack = None; + ch_stack = None; // No need to monitor syscalls anymore, they'd just be ignored - ptrace::cont(curr_pid, None).unwrap(); + ptrace::cont(main_pid, None).unwrap(); } // Child process was stopped by a signal - ExecEvent::Status(pid, signal) => { - eprintln!("Process unexpectedly got {signal}; continuing..."); - // In case we're not tracing - if ptrace::syscall(pid, signal).is_err() { - // If *this* fails too, something really weird happened - // and it's probably best to just panic - signal::kill(pid, signal::SIGCONT).unwrap(); - } - } + ExecEvent::Status(pid, signal) => + match signal { + // If it was a segfault, check if it was an artificial one + // caused by it trying to access the MiriMachine memory + signal::SIGSEGV => + match handle_segfault( + pid, + &ch_pages, + ch_stack.unwrap(), + page_size, + &cs, + &mut acc_events, + ) { + Err(e) => + match e { + ExecError::Died(code) => return Err(code), + ExecError::Shrug => continue, + }, + _ => (), + }, + // Something weird happened + _ => { + eprintln!("Process unexpectedly got {signal}; continuing..."); + // In case we're not tracing + if ptrace::syscall(pid, None).is_err() { + // If *this* fails too, something really weird happened + // and it's probably best to just panic + signal::kill(pid, signal::SIGCONT).unwrap(); + } + } + }, // Child entered a syscall; we wait for exits inside of this, so it // should never trigger on return from a syscall we care about ExecEvent::Syscall(pid) => { @@ -220,44 +344,350 @@ pub fn sv_loop( unreachable!() } +/// Spawns a Capstone disassembler for the host architecture. +#[rustfmt::skip] +fn get_disasm() -> capstone::Capstone { + use capstone::prelude::*; + let cs_pre = Capstone::new(); + { + #[cfg(target_arch = "x86_64")] + {cs_pre.x86().mode(arch::x86::ArchMode::Mode64)} + #[cfg(target_arch = "x86")] + {cs_pre.x86().mode(arch::x86::ArchMode::Mode32)} + #[cfg(target_arch = "aarch64")] + {cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)} + #[cfg(target_arch = "arm")] + {cs_pre.arm().mode(arch::arm::ArchMode::Arm)} + #[cfg(target_arch = "riscv64")] + {cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV64)} + #[cfg(target_arch = "riscv32")] + {cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV32)} + } + .detail(true) + .build() + .unwrap() +} + /// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`. /// We want to avoid that in some cases, like at the beginning of FFI. -/// -/// If `pid` is `None`, only one wait will be done and `init_cont` should be false. fn wait_for_signal( - pid: Option, + pid: unistd::Pid, wait_signal: signal::Signal, init_cont: bool, -) -> Result { +) -> Result<(), ExecError> { if init_cont { - ptrace::cont(pid.unwrap(), None).unwrap(); + ptrace::cont(pid, None).unwrap(); } // Repeatedly call `waitid` until we get the signal we want, or the process dies loop { - let wait_id = match pid { - Some(pid) => wait::Id::Pid(pid), - None => wait::Id::All, - }; - let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; - let (signal, pid) = match stat { + let stat = + wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + let signal = match stat { // Report the cause of death, if we know it wait::WaitStatus::Exited(_, code) => { + //eprintln!("Exited sig1 {code}"); return Err(ExecError::Died(Some(code))); } wait::WaitStatus::Signaled(_, _, _) => return Err(ExecError::Died(None)), - wait::WaitStatus::Stopped(pid, signal) => (signal, pid), - wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid), + wait::WaitStatus::Stopped(_, signal) => signal, + wait::WaitStatus::PtraceEvent(_, signal, _) => signal, // This covers PtraceSyscall and variants that are impossible with // the flags set (e.g. WaitStatus::StillAlive) _ => { - ptrace::cont(pid.unwrap(), None).unwrap(); + ptrace::cont(pid, None).unwrap(); continue; } }; if signal == wait_signal { - return Ok(pid); + break; } else { ptrace::cont(pid, None).map_err(|_| ExecError::Died(None))?; } } + Ok(()) +} + +/// Grabs the access that caused a segfault and logs it down if it's to our memory, +/// or kills the child and returns the appropriate error otherwise. +fn handle_segfault( + pid: unistd::Pid, + ch_pages: &[usize], + ch_stack: usize, + page_size: usize, + cs: &capstone::Capstone, + acc_events: &mut Vec, +) -> Result<(), ExecError> { + /// This is just here to not pollute the main namespace with `capstone::prelude::*`. + #[inline] + fn capstone_disassemble( + instr: &[u8], + addr: usize, + cs: &capstone::Capstone, + acc_events: &mut Vec, + ) -> capstone::CsResult<()> { + use capstone::prelude::*; + + // The arch_detail is what we care about, but it relies on these temporaries + // that we can't drop. 0x1000 is the default base address for Captsone, and + // we're expecting 1 instruction + let insns = cs.disasm_count(instr, 0x1000, 1)?; + let ins_detail = cs.insn_detail(&insns[0])?; + let arch_detail = ins_detail.arch_detail(); + + for op in arch_detail.operands() { + match op { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + arch::ArchOperand::X86Operand(x86_operand) => { + match x86_operand.op_type { + // We only care about memory accesses + arch::x86::X86OperandType::Mem(_) => { + let push = addr..addr.strict_add(usize::from(x86_operand.size)); + // It's called a "RegAccessType" but it also applies to memory + let acc_ty = x86_operand.access.unwrap(); + if acc_ty.is_readable() { + acc_events.push(AccessEvent::Read(push.clone())); + } + if acc_ty.is_writable() { + acc_events.push(AccessEvent::Write(push)); + } + } + _ => (), + } + } + #[cfg(target_arch = "aarch64")] + arch::ArchOperand::Arm64Operand(arm64_operand) => { + // Annoyingly, we don't always get the size here, so just be pessimistic for now + match arm64_operand.op_type { + arch::arm64::Arm64OperandType::Mem(_) => { + // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes + let size = match arm64_operand.vas { + // Not an fp/simd instruction + arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE, + // 1 byte + arch::arm64::Arm64Vas::ARM64_VAS_1B => 1, + // 2 bytes + arch::arm64::Arm64Vas::ARM64_VAS_1H => 2, + // 4 bytes + arch::arm64::Arm64Vas::ARM64_VAS_4B + | arch::arm64::Arm64Vas::ARM64_VAS_2H + | arch::arm64::Arm64Vas::ARM64_VAS_1S => 4, + // 8 bytes + arch::arm64::Arm64Vas::ARM64_VAS_8B + | arch::arm64::Arm64Vas::ARM64_VAS_4H + | arch::arm64::Arm64Vas::ARM64_VAS_2S + | arch::arm64::Arm64Vas::ARM64_VAS_1D => 8, + // 16 bytes + arch::arm64::Arm64Vas::ARM64_VAS_16B + | arch::arm64::Arm64Vas::ARM64_VAS_8H + | arch::arm64::Arm64Vas::ARM64_VAS_4S + | arch::arm64::Arm64Vas::ARM64_VAS_2D + | arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16, + }; + let push = addr..addr.strict_add(size); + // FIXME: This now has access type info in the latest + // git version of capstone because this pissed me off + // and I added it. Change this when it updates + acc_events.push(AccessEvent::Read(push.clone())); + acc_events.push(AccessEvent::Write(push)); + } + _ => (), + } + } + #[cfg(target_arch = "arm")] + arch::ArchOperand::ArmOperand(arm_operand) => + match arm_operand.op_type { + arch::arm::ArmOperandType::Mem(_) => { + // We don't get info on the size of the access, but + // we're at least told if it's a vector inssizetruction + let size = if arm_operand.vector_index.is_some() { + ARCH_MAX_ACCESS_SIZE + } else { + ARCH_WORD_SIZE + }; + let push = addr..addr.strict_add(size); + let acc_ty = arm_operand.access.unwrap(); + if acc_ty.is_readable() { + acc_events.push(AccessEvent::Read(push.clone())); + } + if acc_ty.is_writable() { + acc_events.push(AccessEvent::Write(push)); + } + } + _ => (), + }, + #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] + arch::ArchOperand::RiscVOperand(risc_voperand) => { + match risc_voperand { + arch::riscv::RiscVOperand::Mem(_) => { + // We get basically no info here + let push = addr..addr.strict_add(size); + acc_events.push(AccessEvent::Read(push.clone())); + acc_events.push(AccessEvent::Write(push)); + } + _ => (), + } + } + _ => unimplemented!(), + } + } + + Ok(()) + } + + // Get information on what caused the segfault. This contains the address + // that triggered it + let siginfo = ptrace::getsiginfo(pid).map_err(|_| ExecError::Shrug)?; + // All x86, ARM, etc. instructions only have at most one memory operand + // (thankfully!) + // SAFETY: si_addr is safe to call + let addr = unsafe { siginfo.si_addr().addr() }; + let page_addr = addr.strict_sub(addr.strict_rem(page_size)); + + if ch_pages.iter().any(|pg| (*pg..pg.strict_add(page_size)).contains(&addr)) { + // Overall structure: + // - Get the address that caused the segfault + // - Unprotect the memory + // - Step 1 instruction + // - Parse executed code to estimate size & type of access + // - Reprotect the memory + // - Continue + let stack_ptr = ch_stack.strict_add(FAKE_STACK_SIZE / 2); + let regs_bak = ptrace::getregs(pid).unwrap(); + let mut new_regs = regs_bak; + let ip_prestep = regs_bak.ip(); + + // Move the instr ptr into the deprotection code + #[expect(clippy::as_conversions)] + new_regs.set_ip(mempr_off as usize); + // Don't mess up the stack by accident! + new_regs.set_sp(stack_ptr); + + // Modify the PAGE_ADDR global on the child process to point to the page + // that we want unprotected + ptrace::write( + pid, + (&raw const PAGE_ADDR).cast_mut().cast(), + libc::c_long::try_from(page_addr).unwrap(), + ) + .unwrap(); + + // Check if we also own the next page, and if so unprotect it in case + // the access spans the page boundary + if ch_pages.contains(&page_addr.strict_add(page_size)) { + ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), 2).unwrap(); + } else { + ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), 1).unwrap(); + } + + ptrace::setregs(pid, new_regs).unwrap(); + + // Our mempr_* functions end with a raise(SIGSTOP) + wait_for_signal(pid, signal::SIGSTOP, true)?; + + // Step 1 instruction + ptrace::setregs(pid, regs_bak).unwrap(); + ptrace::step(pid, None).unwrap(); + // Don't use wait_for_signal here since 1 instruction doesn't give room + // for any uncertainty + we don't want it `cont()`ing randomly by accident + // Also, don't let it continue with unprotected memory if something errors! + let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + + // Save registers and grab the bytes that were executed. This would + // be really nasty if it was a jump or similar but those thankfully + // won't do memory accesses and so can't trigger this! + let regs_bak = ptrace::getregs(pid).unwrap(); + new_regs = regs_bak; + let ip_poststep = regs_bak.ip(); + // We need to do reads/writes in word-sized chunks + let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE); + let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| { + // This only needs to be a valid pointer in the child process, not ours + ret.append( + &mut ptrace::read(pid, std::ptr::without_provenance_mut(ip)) + .unwrap() + .to_ne_bytes() + .to_vec(), + ); + ret + }); + + // Now figure out the size + type of access and log it down + // This will mark down e.g. the same area being read multiple times, + // since it's more efficient to compress the accesses at the end + if capstone_disassemble(&instr, addr, cs, acc_events).is_err() { + // Read goes first because we need to be pessimistic + acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); + acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); + } + + // Reprotect everything and continue + #[expect(clippy::as_conversions)] + new_regs.set_ip(mempr_on as usize); + new_regs.set_sp(stack_ptr); + ptrace::setregs(pid, new_regs).unwrap(); + wait_for_signal(pid, signal::SIGSTOP, true)?; + + ptrace::setregs(pid, regs_bak).unwrap(); + ptrace::syscall(pid, None).unwrap(); + Ok(()) + } else { + // This was a real segfault, so print some debug info and quit + let regs = ptrace::getregs(pid).unwrap(); + eprintln!("Segfault occurred during FFI at {addr:#018x}"); + eprintln!("Expected access on pages: {ch_pages:#018x?}"); + eprintln!("Register dump: {regs:#x?}"); + ptrace::kill(pid).unwrap(); + Err(ExecError::Died(None)) + } +} + +// We only get dropped into these functions via offsetting the instr pointer +// manually, so we *must not ever* unwind from them + +/// Disables protections on the page whose address is currently in `PAGE_ADDR`. +/// +/// SAFETY: `PAGE_ADDR` should be set to a page-aligned pointer to an owned page, +/// `PAGE_SIZE` should be the host pagesize, and the range from `PAGE_ADDR` to +/// `PAGE_SIZE` * `PAGE_COUNT` must be owned and allocated memory. No other threads +/// should be running. +pub unsafe extern "C" fn mempr_off() { + use std::sync::atomic::Ordering; + + let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed)); + // SAFETY: Upheld by caller + unsafe { + // It's up to the caller to make sure this doesn't actually overflow, but + // we mustn't unwind from here, so... + if libc::mprotect( + PAGE_ADDR.load(Ordering::Relaxed).cast(), + len, + libc::PROT_READ | libc::PROT_WRITE, + ) != 0 + { + // Can't return or unwind, but we can do this + std::process::exit(-1); + } + } + // If this fails somehow we're doomed + if signal::raise(signal::SIGSTOP).is_err() { + std::process::exit(-1); + } +} + +/// Reenables protection on the page set by `PAGE_ADDR`. +/// +/// SAFETY: See `mempr_off()`. +pub unsafe extern "C" fn mempr_on() { + use std::sync::atomic::Ordering; + + let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed)); + // SAFETY: Upheld by caller + unsafe { + if libc::mprotect(PAGE_ADDR.load(Ordering::Relaxed).cast(), len, libc::PROT_NONE) != 0 { + std::process::exit(-1); + } + } + if signal::raise(signal::SIGSTOP).is_err() { + std::process::exit(-1); + } } From c5c2967f5e197795b9ec8776b8edee11d34f9249 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sun, 15 Jun 2025 23:22:23 +0200 Subject: [PATCH 2/6] fix the early init --- src/shims/trace/parent.rs | 61 ++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/shims/trace/parent.rs b/src/shims/trace/parent.rs index 14e11319d9..37a7e56ebb 100644 --- a/src/shims/trace/parent.rs +++ b/src/shims/trace/parent.rs @@ -125,8 +125,6 @@ pub enum ExecEvent { pub struct ChildListener { /// The matching channel for the child's `Supervisor` struct. pub message_rx: ipc::IpcReceiver, - /// The main child process' pid. - pub pid: unistd::Pid, /// Whether an FFI call is currently ongoing. pub attached: bool, /// If `Some`, overrides the return code with the given value. @@ -179,11 +177,8 @@ impl Iterator for ChildListener { return Some(ExecEvent::Status(pid, signal)); } } else { - // Log that this happened and pass along the signal. - // If we don't do the kill, the child will instead - // act as if it never received this signal! - eprintln!("Ignoring PtraceEvent {signal:?}"); - signal::kill(pid, signal).unwrap(); + // Just pass along the signal + ptrace::cont(pid, signal).unwrap(); }, // Child was stopped at the given signal. Same logic as for // WaitStatus::PtraceEvent @@ -196,8 +191,7 @@ impl Iterator for ChildListener { return Some(ExecEvent::Status(pid, signal)); } } else { - eprintln!("Ignoring Stopped {signal:?}"); - signal::kill(pid, signal).unwrap(); + ptrace::cont(pid, signal).unwrap(); }, _ => (), }, @@ -242,6 +236,7 @@ enum ExecError { /// created before the fork - like statics - are the same). pub fn sv_loop( listener: ChildListener, + init_pid: unistd::Pid, event_tx: ipc::IpcSender, confirm_tx: ipc::IpcSender, page_size: usize, @@ -256,18 +251,18 @@ pub fn sv_loop( // An instance of the Capstone disassembler, so we don't spawn one on every access let cs = get_disasm(); - // The pid of the process that we forked from, used by default if we don't - // have a reason to use another one. - let main_pid = listener.pid; + // The pid of the last process we interacted with, used by default if we don't have a + // reason to use a different one + let mut curr_pid = init_pid; // There's an initial sigstop we need to deal with - wait_for_signal(main_pid, signal::SIGSTOP, false).map_err(|e| { + wait_for_signal(Some(curr_pid), signal::SIGSTOP, false).map_err(|e| { match e { ExecError::Died(code) => code, ExecError::Shrug => None, } })?; - ptrace::cont(main_pid, None).unwrap(); + ptrace::cont(curr_pid, None).unwrap(); for evt in listener { match evt { @@ -283,9 +278,11 @@ pub fn sv_loop( // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in // order to do most ptrace operations! confirm_tx.send(Confirmation).unwrap(); - wait_for_signal(main_pid, signal::SIGSTOP, false).unwrap(); + // We can't trust simply calling `Pid::this()` in the child process to give the right + // PID for us, so we get it this way + curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap(); - ptrace::syscall(main_pid, None).unwrap(); + ptrace::syscall(curr_pid, None).unwrap(); } // end_ffi was called by the child ExecEvent::End => { @@ -296,7 +293,7 @@ pub fn sv_loop( ch_stack = None; // No need to monitor syscalls anymore, they'd just be ignored - ptrace::cont(main_pid, None).unwrap(); + ptrace::cont(curr_pid, None).unwrap(); } // Child process was stopped by a signal ExecEvent::Status(pid, signal) => @@ -370,41 +367,45 @@ fn get_disasm() -> capstone::Capstone { /// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`. /// We want to avoid that in some cases, like at the beginning of FFI. +/// +/// If `pid` is `None`, only one wait will be done and `init_cont` should be false. fn wait_for_signal( - pid: unistd::Pid, + pid: Option, wait_signal: signal::Signal, init_cont: bool, -) -> Result<(), ExecError> { +) -> Result { if init_cont { - ptrace::cont(pid, None).unwrap(); + ptrace::cont(pid.unwrap(), None).unwrap(); } // Repeatedly call `waitid` until we get the signal we want, or the process dies loop { - let stat = - wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; - let signal = match stat { + let wait_id = match pid { + Some(pid) => wait::Id::Pid(pid), + None => wait::Id::All, + }; + let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + let (signal, pid) = match stat { // Report the cause of death, if we know it wait::WaitStatus::Exited(_, code) => { //eprintln!("Exited sig1 {code}"); return Err(ExecError::Died(Some(code))); } wait::WaitStatus::Signaled(_, _, _) => return Err(ExecError::Died(None)), - wait::WaitStatus::Stopped(_, signal) => signal, - wait::WaitStatus::PtraceEvent(_, signal, _) => signal, + wait::WaitStatus::Stopped(pid, signal) => (signal, pid), + wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid), // This covers PtraceSyscall and variants that are impossible with // the flags set (e.g. WaitStatus::StillAlive) _ => { - ptrace::cont(pid, None).unwrap(); + ptrace::cont(pid.unwrap(), None).unwrap(); continue; } }; if signal == wait_signal { - break; + return Ok(pid); } else { ptrace::cont(pid, None).map_err(|_| ExecError::Died(None))?; } } - Ok(()) } /// Grabs the access that caused a segfault and logs it down if it's to our memory, @@ -582,7 +583,7 @@ fn handle_segfault( ptrace::setregs(pid, new_regs).unwrap(); // Our mempr_* functions end with a raise(SIGSTOP) - wait_for_signal(pid, signal::SIGSTOP, true)?; + wait_for_signal(Some(pid), signal::SIGSTOP, true)?; // Step 1 instruction ptrace::setregs(pid, regs_bak).unwrap(); @@ -625,7 +626,7 @@ fn handle_segfault( new_regs.set_ip(mempr_on as usize); new_regs.set_sp(stack_ptr); ptrace::setregs(pid, new_regs).unwrap(); - wait_for_signal(pid, signal::SIGSTOP, true)?; + wait_for_signal(Some(pid), signal::SIGSTOP, true)?; ptrace::setregs(pid, regs_bak).unwrap(); ptrace::syscall(pid, None).unwrap(); From 18c061801c4c140aff77c6656cf0e69fbcb2cd34 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sun, 15 Jun 2025 23:53:39 +0200 Subject: [PATCH 3/6] nit --- src/shims/trace/parent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shims/trace/parent.rs b/src/shims/trace/parent.rs index 37a7e56ebb..bbe3ec4132 100644 --- a/src/shims/trace/parent.rs +++ b/src/shims/trace/parent.rs @@ -403,7 +403,7 @@ fn wait_for_signal( if signal == wait_signal { return Ok(pid); } else { - ptrace::cont(pid, None).map_err(|_| ExecError::Died(None))?; + ptrace::cont(pid, signal).map_err(|_| ExecError::Died(None))?; } } } From 0c1e14be03b8582d614bd2723d178e85f5416224 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Wed, 18 Jun 2025 15:40:17 +0200 Subject: [PATCH 4/6] nits --- src/shims/native_lib.rs | 14 ++--- src/shims/trace/child.rs | 52 ++++++++-------- src/shims/trace/parent.rs | 126 +++++++++++++++++++------------------- 3 files changed, 94 insertions(+), 98 deletions(-) diff --git a/src/shims/native_lib.rs b/src/shims/native_lib.rs index 127366d9d9..9d238146d8 100644 --- a/src/shims/native_lib.rs +++ b/src/shims/native_lib.rs @@ -224,9 +224,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { if super::trace::Supervisor::poll() { this.prepare_exposed_for_native_call(false)?; } else { - //this.prepare_exposed_for_native_call(true)?; - //eprintln!("Oh noes!"); - panic!("No ptrace!"); + this.prepare_exposed_for_native_call(true)?; } #[cfg(not(target_os = "linux"))] this.prepare_exposed_for_native_call(true)?; @@ -246,7 +244,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this.apply_events(events)?; } #[cfg(not(target_os = "linux"))] - let _ = maybe_memevents; // Suppress the unused warning + let _ = maybe_memevents; // Suppress the unused warning. this.write_immediate(*ret, dest)?; interp_ok(true) @@ -267,15 +265,15 @@ unsafe fn do_native_call( unsafe { if let Some(alloc) = alloc { - // SAFETY: We don't touch the machine memory past this point + // SAFETY: We don't touch the machine memory past this point. let (guard, stack_ptr) = Supervisor::start_ffi(alloc.clone()); - // SAFETY: Upheld by caller + // SAFETY: Upheld by caller. let ret = ffi::call(ptr, args); // SAFETY: We got the guard and stack pointer from start_ffi, and - // the allocator is the same + // the allocator is the same. (ret, Supervisor::end_ffi(guard, alloc, stack_ptr)) } else { - // SAFETY: Upheld by caller + // SAFETY: Upheld by caller. (ffi::call(ptr, args), None) } } diff --git a/src/shims/trace/child.rs b/src/shims/trace/child.rs index 3bb06490e5..cb648197a9 100644 --- a/src/shims/trace/child.rs +++ b/src/shims/trace/child.rs @@ -52,13 +52,13 @@ impl Supervisor { // If the supervisor is not initialised for whatever reason, fast-fail. // This might be desired behaviour, as even on platforms where ptracing // is not implemented it enables us to enforce that only one FFI call - // happens at a time + // happens at a time. let Some(sv) = sv_guard.take() else { return (sv_guard, None); }; // Get pointers to all the pages the supervisor must allow accesses in - // and prepare the fake stack + // and prepare the fake stack. let page_ptrs = alloc.borrow().pages(); let raw_stack_ptr: *mut [u8; FAKE_STACK_SIZE] = Box::leak(Box::new([0u8; FAKE_STACK_SIZE])).as_mut_ptr().cast(); @@ -66,7 +66,7 @@ impl Supervisor { let start_info = StartFfiInfo { page_ptrs, stack_ptr }; // SAFETY: We do not access machine memory past this point until the - // supervisor is ready to allow it + // supervisor is ready to allow it. unsafe { if alloc.borrow_mut().prepare_ffi().is_err() { // Don't mess up unwinding by maybe leaving the memory partly protected @@ -79,13 +79,13 @@ impl Supervisor { // NB: if we do not wait to receive a blank confirmation response, it is // possible that the supervisor is alerted of the SIGSTOP *before* it has // actually received the start_info, thus deadlocking! This way, we can - // enforce an ordering for these events + // enforce an ordering for these events. sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap(); sv.confirm_rx.recv().unwrap(); *sv_guard = Some(sv); // We need to be stopped for the supervisor to be able to make certain // modifications to our memory - simply waiting on the recv() doesn't - // count + // count. signal::raise(signal::SIGSTOP).unwrap(); (sv_guard, Some(raw_stack_ptr)) } @@ -109,22 +109,22 @@ impl Supervisor { // simpler and more robust to simply use the signals which are left for // arbitrary usage. Since this will block until we are continued by the // supervisor, we can assume past this point that everything is back to - // normal + // normal. signal::raise(signal::SIGUSR1).unwrap(); - // This is safe! It just sets memory to normal expected permissions + // This is safe! It just sets memory to normal expected permissions. alloc.borrow_mut().unprep_ffi(); // If this is `None`, then `raw_stack_ptr` is None and does not need to // be deallocated (and there's no need to worry about the guard, since - // it contains nothing) + // it contains nothing). let sv = sv_guard.take()?; // SAFETY: Caller upholds that this pointer was allocated as a box with - // this type + // this type. unsafe { drop(Box::from_raw(raw_stack_ptr.unwrap())); } - // On the off-chance something really weird happens, don't block forever + // On the off-chance something really weird happens, don't block forever. let ret = sv .event_rx .try_recv_timeout(std::time::Duration::from_secs(5)) @@ -151,32 +151,32 @@ impl Supervisor { /// The invariants for `fork()` must be upheld by the caller. pub unsafe fn init_sv() -> Result<(), SvInitError> { // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the - // relevant missing bits + // relevant missing bits. // On Linux, this will check whether ptrace is fully disabled by the Yama module. // If Yama isn't running or we're not on Linux, we'll still error later, but - // this saves a very expensive fork call + // this saves a very expensive fork call. let ptrace_status = std::fs::read_to_string("/proc/sys/kernel/yama/ptrace_scope"); if let Ok(stat) = ptrace_status { if let Some(stat) = stat.chars().next() { - // Fast-error if ptrace is fully disabled on the system + // Fast-error if ptrace is fully disabled on the system. if stat == '3' { return Err(SvInitError); } } } - // Initialise the supervisor if it isn't already, placing it into SUPERVISOR + // Initialise the supervisor if it isn't already, placing it into SUPERVISOR. let mut lock = SUPERVISOR.lock().unwrap(); if lock.is_some() { return Ok(()); } - // Prepare the IPC channels we need + // Prepare the IPC channels we need. let (message_tx, message_rx) = ipc::channel().unwrap(); let (confirm_tx, confirm_rx) = ipc::channel().unwrap(); let (event_tx, event_rx) = ipc::channel().unwrap(); - // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error + // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error. let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap(); unsafe { @@ -185,37 +185,37 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { match unistd::fork().unwrap() { unistd::ForkResult::Parent { child } => { // If somehow another thread does exist, prevent it from accessing the lock - // and thus breaking our safety invariants + // and thus breaking our safety invariants. std::mem::forget(lock); // The child process is free to unwind, so we won't to avoid doubly freeing - // system resources + // system resources. let init = std::panic::catch_unwind(|| { let listener = ChildListener { message_rx, attached: false, override_retcode: None }; - // Trace as many things as possible, to be able to handle them as needed + // Trace as many things as possible, to be able to handle them as needed. let options = ptrace::Options::PTRACE_O_TRACESYSGOOD | ptrace::Options::PTRACE_O_TRACECLONE | ptrace::Options::PTRACE_O_TRACEFORK; - // Attach to the child process without stopping it + // Attach to the child process without stopping it. match ptrace::seize(child, options) { // Ptrace works :D Ok(_) => { let code = sv_loop(listener, child, event_tx, confirm_tx, page_size) .unwrap_err(); // If a return code of 0 is not explicitly given, assume something went - // wrong and return 1 + // wrong and return 1. std::process::exit(code.unwrap_or(1)) } - // Ptrace does not work and we failed to catch that + // Ptrace does not work and we failed to catch that. Err(_) => { - // If we can't ptrace, Miri continues being the parent + // If we can't ptrace, Miri continues being the parent. signal::kill(child, signal::SIGKILL).unwrap(); SvInitError } } }); match init { - // The "Ok" case means that we couldn't ptrace + // The "Ok" case means that we couldn't ptrace. Ok(e) => return Err(e), Err(p) => { eprintln!("Supervisor process panicked!\n{p:?}"); @@ -225,12 +225,12 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { } unistd::ForkResult::Child => { // Make sure we never get orphaned and stuck in SIGSTOP or similar - // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call + // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call. let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM); assert_eq!(ret, 0); // First make sure the parent succeeded with ptracing us! signal::raise(signal::SIGSTOP).unwrap(); - // If we're the child process, save the supervisor info + // If we're the child process, save the supervisor info. *lock = Some(Supervisor { message_tx, confirm_rx, event_rx }); } } diff --git a/src/shims/trace/parent.rs b/src/shims/trace/parent.rs index bbe3ec4132..284d6ae28c 100644 --- a/src/shims/trace/parent.rs +++ b/src/shims/trace/parent.rs @@ -9,7 +9,7 @@ use crate::shims::trace::{AccessEvent, FAKE_STACK_SIZE, StartFfiInfo}; /// The flags to use when calling `waitid()`. /// Since bitwise or on the nix version of these flags is implemented as a trait, -/// this cannot be const directly so we do it this way +/// this cannot be const directly so we do it this way. const WAIT_FLAGS: wait::WaitPidFlag = wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED); @@ -42,7 +42,7 @@ static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1); /// Allows us to get common arguments from the `user_regs_t` across architectures. /// Normally this would land us ABI hell, but thankfully all of our usecases /// consist of functions with a small number of register-sized integer arguments. -/// See for sources +/// See for sources. trait ArchIndependentRegs { /// Gets the address of the instruction pointer. fn ip(&self) -> usize; @@ -54,7 +54,7 @@ trait ArchIndependentRegs { } // It's fine / desirable behaviour for values to wrap here, we care about just -// preserving the bit pattern +// preserving the bit pattern. #[cfg(target_arch = "x86_64")] #[expect(clippy::as_conversions)] #[rustfmt::skip] @@ -137,7 +137,7 @@ impl Iterator for ChildListener { // Allows us to monitor the child process by just iterating over the listener // NB: This should never return None! fn next(&mut self) -> Option { - // Do not block if the child has nothing to report for `waitid` + // Do not block if the child has nothing to report for `waitid`. let opts = WAIT_FLAGS | wait::WaitPidFlag::WNOHANG; loop { // Listen to any child, not just the main one. Important if we want @@ -147,18 +147,17 @@ impl Iterator for ChildListener { match wait::waitid(wait::Id::All, opts) { Ok(stat) => match stat { - // Child exited normally with a specific code set + // Child exited normally with a specific code set. wait::WaitStatus::Exited(_, code) => { - //eprintln!("Exited main {code}"); let code = self.override_retcode.unwrap_or(code); return Some(ExecEvent::Died(Some(code))); } - // Child was killed by a signal, without giving a code + // Child was killed by a signal, without giving a code. wait::WaitStatus::Signaled(_, _, _) => return Some(ExecEvent::Died(self.override_retcode)), // Child entered a syscall. Since we're always technically // tracing, only pass this along if we're actively - // monitoring the child + // monitoring the child. wait::WaitStatus::PtraceSyscall(pid) => if self.attached { return Some(ExecEvent::Syscall(pid)); @@ -177,11 +176,11 @@ impl Iterator for ChildListener { return Some(ExecEvent::Status(pid, signal)); } } else { - // Just pass along the signal + // Just pass along the signal. ptrace::cont(pid, signal).unwrap(); }, // Child was stopped at the given signal. Same logic as for - // WaitStatus::PtraceEvent + // WaitStatus::PtraceEvent. wait::WaitStatus::Stopped(pid, signal) => if self.attached { if signal == signal::SIGUSR1 { @@ -197,11 +196,11 @@ impl Iterator for ChildListener { }, // This case should only trigger if all children died and we // somehow missed that, but it's best we not allow any room - // for deadlocks + // for deadlocks. Err(_) => return Some(ExecEvent::Died(None)), } - // Similarly, do a non-blocking poll of the IPC channel + // Similarly, do a non-blocking poll of the IPC channel. if let Ok(req) = self.message_rx.try_recv() { match req { TraceRequest::StartFfi(info) => @@ -216,7 +215,7 @@ impl Iterator for ChildListener { } } - // Not ideal, but doing anything else might sacrifice performance + // Not ideal, but doing anything else might sacrifice performance. std::thread::yield_now(); } } @@ -241,21 +240,21 @@ pub fn sv_loop( confirm_tx: ipc::IpcSender, page_size: usize, ) -> Result> { - // Things that we return to the child process + // Things that we return to the child process. let mut acc_events = Vec::new(); - // Memory allocated on the MiriMachine + // Memory allocated for the MiriMachine. let mut ch_pages = Vec::new(); let mut ch_stack = None; - // An instance of the Capstone disassembler, so we don't spawn one on every access + // An instance of the Capstone disassembler, so we don't spawn one on every access. let cs = get_disasm(); // The pid of the last process we interacted with, used by default if we don't have a - // reason to use a different one + // reason to use a different one. let mut curr_pid = init_pid; - // There's an initial sigstop we need to deal with + // There's an initial sigstop we need to deal with. wait_for_signal(Some(curr_pid), signal::SIGSTOP, false).map_err(|e| { match e { ExecError::Died(code) => code, @@ -266,11 +265,11 @@ pub fn sv_loop( for evt in listener { match evt { - // start_ffi was called by the child, so prep memory + // start_ffi was called by the child, so prep memory. ExecEvent::Start(ch_info) => { - // All the pages that the child process is "allowed to" access + // All the pages that the child process is "allowed to" access. ch_pages = ch_info.page_ptrs; - // And the fake stack it allocated for us to use later + // And the fake stack it allocated for us to use later. ch_stack = Some(ch_info.stack_ptr); // We received the signal and are no longer in the main listener loop, @@ -279,27 +278,27 @@ pub fn sv_loop( // order to do most ptrace operations! confirm_tx.send(Confirmation).unwrap(); // We can't trust simply calling `Pid::this()` in the child process to give the right - // PID for us, so we get it this way + // PID for us, so we get it this way. curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap(); ptrace::syscall(curr_pid, None).unwrap(); } - // end_ffi was called by the child + // end_ffi was called by the child. ExecEvent::End => { - // Hand over the access info we traced + // Hand over the access info we traced. event_tx.send(MemEvents { acc_events }).unwrap(); - // And reset our values + // And reset our values. acc_events = Vec::new(); ch_stack = None; - // No need to monitor syscalls anymore, they'd just be ignored + // No need to monitor syscalls anymore, they'd just be ignored. ptrace::cont(curr_pid, None).unwrap(); } // Child process was stopped by a signal ExecEvent::Status(pid, signal) => match signal { // If it was a segfault, check if it was an artificial one - // caused by it trying to access the MiriMachine memory + // caused by it trying to access the MiriMachine memory. signal::SIGSEGV => match handle_segfault( pid, @@ -316,19 +315,19 @@ pub fn sv_loop( }, _ => (), }, - // Something weird happened + // Something weird happened. _ => { eprintln!("Process unexpectedly got {signal}; continuing..."); // In case we're not tracing if ptrace::syscall(pid, None).is_err() { // If *this* fails too, something really weird happened - // and it's probably best to just panic + // and it's probably best to just panic. signal::kill(pid, signal::SIGCONT).unwrap(); } } }, // Child entered a syscall; we wait for exits inside of this, so it - // should never trigger on return from a syscall we care about + // should never trigger on return from a syscall we care about. ExecEvent::Syscall(pid) => { ptrace::syscall(pid, None).unwrap(); } @@ -377,7 +376,7 @@ fn wait_for_signal( if init_cont { ptrace::cont(pid.unwrap(), None).unwrap(); } - // Repeatedly call `waitid` until we get the signal we want, or the process dies + // Repeatedly call `waitid` until we get the signal we want, or the process dies. loop { let wait_id = match pid { Some(pid) => wait::Id::Pid(pid), @@ -385,16 +384,15 @@ fn wait_for_signal( }; let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; let (signal, pid) = match stat { - // Report the cause of death, if we know it + // Report the cause of death, if we know it. wait::WaitStatus::Exited(_, code) => { - //eprintln!("Exited sig1 {code}"); return Err(ExecError::Died(Some(code))); } wait::WaitStatus::Signaled(_, _, _) => return Err(ExecError::Died(None)), wait::WaitStatus::Stopped(pid, signal) => (signal, pid), wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid), // This covers PtraceSyscall and variants that are impossible with - // the flags set (e.g. WaitStatus::StillAlive) + // the flags set (e.g. WaitStatus::StillAlive). _ => { ptrace::cont(pid.unwrap(), None).unwrap(); continue; @@ -430,7 +428,7 @@ fn handle_segfault( // The arch_detail is what we care about, but it relies on these temporaries // that we can't drop. 0x1000 is the default base address for Captsone, and - // we're expecting 1 instruction + // we're expecting 1 instruction. let insns = cs.disasm_count(instr, 0x1000, 1)?; let ins_detail = cs.insn_detail(&insns[0])?; let arch_detail = ins_detail.arch_detail(); @@ -457,27 +455,27 @@ fn handle_segfault( } #[cfg(target_arch = "aarch64")] arch::ArchOperand::Arm64Operand(arm64_operand) => { - // Annoyingly, we don't always get the size here, so just be pessimistic for now + // Annoyingly, we don't always get the size here, so just be pessimistic for now. match arm64_operand.op_type { arch::arm64::Arm64OperandType::Mem(_) => { - // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes + // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes. let size = match arm64_operand.vas { - // Not an fp/simd instruction + // Not an fp/simd instruction. arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE, - // 1 byte + // 1 byte. arch::arm64::Arm64Vas::ARM64_VAS_1B => 1, - // 2 bytes + // 2 bytes. arch::arm64::Arm64Vas::ARM64_VAS_1H => 2, - // 4 bytes + // 4 bytes. arch::arm64::Arm64Vas::ARM64_VAS_4B | arch::arm64::Arm64Vas::ARM64_VAS_2H | arch::arm64::Arm64Vas::ARM64_VAS_1S => 4, - // 8 bytes + // 8 bytes. arch::arm64::Arm64Vas::ARM64_VAS_8B | arch::arm64::Arm64Vas::ARM64_VAS_4H | arch::arm64::Arm64Vas::ARM64_VAS_2S | arch::arm64::Arm64Vas::ARM64_VAS_1D => 8, - // 16 bytes + // 16 bytes. arch::arm64::Arm64Vas::ARM64_VAS_16B | arch::arm64::Arm64Vas::ARM64_VAS_8H | arch::arm64::Arm64Vas::ARM64_VAS_4S @@ -487,7 +485,7 @@ fn handle_segfault( let push = addr..addr.strict_add(size); // FIXME: This now has access type info in the latest // git version of capstone because this pissed me off - // and I added it. Change this when it updates + // and I added it. Change this when it updates. acc_events.push(AccessEvent::Read(push.clone())); acc_events.push(AccessEvent::Write(push)); } @@ -499,7 +497,7 @@ fn handle_segfault( match arm_operand.op_type { arch::arm::ArmOperandType::Mem(_) => { // We don't get info on the size of the access, but - // we're at least told if it's a vector inssizetruction + // we're at least told if it's a vector instruction. let size = if arm_operand.vector_index.is_some() { ARCH_MAX_ACCESS_SIZE } else { @@ -520,7 +518,7 @@ fn handle_segfault( arch::ArchOperand::RiscVOperand(risc_voperand) => { match risc_voperand { arch::riscv::RiscVOperand::Mem(_) => { - // We get basically no info here + // We get basically no info here. let push = addr..addr.strict_add(size); acc_events.push(AccessEvent::Read(push.clone())); acc_events.push(AccessEvent::Write(push)); @@ -536,11 +534,11 @@ fn handle_segfault( } // Get information on what caused the segfault. This contains the address - // that triggered it + // that triggered it. let siginfo = ptrace::getsiginfo(pid).map_err(|_| ExecError::Shrug)?; // All x86, ARM, etc. instructions only have at most one memory operand // (thankfully!) - // SAFETY: si_addr is safe to call + // SAFETY: si_addr is safe to call. let addr = unsafe { siginfo.si_addr().addr() }; let page_addr = addr.strict_sub(addr.strict_rem(page_size)); @@ -557,14 +555,14 @@ fn handle_segfault( let mut new_regs = regs_bak; let ip_prestep = regs_bak.ip(); - // Move the instr ptr into the deprotection code + // Move the instr ptr into the deprotection code. #[expect(clippy::as_conversions)] new_regs.set_ip(mempr_off as usize); // Don't mess up the stack by accident! new_regs.set_sp(stack_ptr); // Modify the PAGE_ADDR global on the child process to point to the page - // that we want unprotected + // that we want unprotected. ptrace::write( pid, (&raw const PAGE_ADDR).cast_mut().cast(), @@ -573,7 +571,7 @@ fn handle_segfault( .unwrap(); // Check if we also own the next page, and if so unprotect it in case - // the access spans the page boundary + // the access spans the page boundary. if ch_pages.contains(&page_addr.strict_add(page_size)) { ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), 2).unwrap(); } else { @@ -582,10 +580,10 @@ fn handle_segfault( ptrace::setregs(pid, new_regs).unwrap(); - // Our mempr_* functions end with a raise(SIGSTOP) + // Our mempr_* functions end with a raise(SIGSTOP). wait_for_signal(Some(pid), signal::SIGSTOP, true)?; - // Step 1 instruction + // Step 1 instruction. ptrace::setregs(pid, regs_bak).unwrap(); ptrace::step(pid, None).unwrap(); // Don't use wait_for_signal here since 1 instruction doesn't give room @@ -599,10 +597,10 @@ fn handle_segfault( let regs_bak = ptrace::getregs(pid).unwrap(); new_regs = regs_bak; let ip_poststep = regs_bak.ip(); - // We need to do reads/writes in word-sized chunks + // We need to do reads/writes in word-sized chunks. let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE); let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| { - // This only needs to be a valid pointer in the child process, not ours + // This only needs to be a valid pointer in the child process, not ours. ret.append( &mut ptrace::read(pid, std::ptr::without_provenance_mut(ip)) .unwrap() @@ -614,14 +612,14 @@ fn handle_segfault( // Now figure out the size + type of access and log it down // This will mark down e.g. the same area being read multiple times, - // since it's more efficient to compress the accesses at the end + // since it's more efficient to compress the accesses at the end. if capstone_disassemble(&instr, addr, cs, acc_events).is_err() { - // Read goes first because we need to be pessimistic + // Read goes first because we need to be pessimistic. acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); } - // Reprotect everything and continue + // Reprotect everything and continue. #[expect(clippy::as_conversions)] new_regs.set_ip(mempr_on as usize); new_regs.set_sp(stack_ptr); @@ -632,7 +630,7 @@ fn handle_segfault( ptrace::syscall(pid, None).unwrap(); Ok(()) } else { - // This was a real segfault, so print some debug info and quit + // This was a real segfault, so print some debug info and quit. let regs = ptrace::getregs(pid).unwrap(); eprintln!("Segfault occurred during FFI at {addr:#018x}"); eprintln!("Expected access on pages: {ch_pages:#018x?}"); @@ -643,7 +641,7 @@ fn handle_segfault( } // We only get dropped into these functions via offsetting the instr pointer -// manually, so we *must not ever* unwind from them +// manually, so we *must not ever* unwind from them. /// Disables protections on the page whose address is currently in `PAGE_ADDR`. /// @@ -655,7 +653,7 @@ pub unsafe extern "C" fn mempr_off() { use std::sync::atomic::Ordering; let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed)); - // SAFETY: Upheld by caller + // SAFETY: Upheld by "caller". unsafe { // It's up to the caller to make sure this doesn't actually overflow, but // we mustn't unwind from here, so... @@ -665,11 +663,11 @@ pub unsafe extern "C" fn mempr_off() { libc::PROT_READ | libc::PROT_WRITE, ) != 0 { - // Can't return or unwind, but we can do this + // Can't return or unwind, but we can do this. std::process::exit(-1); } } - // If this fails somehow we're doomed + // If this fails somehow we're doomed. if signal::raise(signal::SIGSTOP).is_err() { std::process::exit(-1); } @@ -682,7 +680,7 @@ pub unsafe extern "C" fn mempr_on() { use std::sync::atomic::Ordering; let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed)); - // SAFETY: Upheld by caller + // SAFETY: Upheld by "caller". unsafe { if libc::mprotect(PAGE_ADDR.load(Ordering::Relaxed).cast(), len, libc::PROT_NONE) != 0 { std::process::exit(-1); From 083d635773601531f49dc11c4676c2c54b9f5852 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Wed, 18 Jun 2025 16:24:27 +0200 Subject: [PATCH 5/6] rebase --- src/shims/trace/child.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/shims/trace/child.rs b/src/shims/trace/child.rs index cb648197a9..6e2ad1ac01 100644 --- a/src/shims/trace/child.rs +++ b/src/shims/trace/child.rs @@ -100,7 +100,7 @@ impl Supervisor { /// one passed to it also. pub unsafe fn end_ffi( mut sv_guard: std::sync::MutexGuard<'static, Option>, - _alloc: Rc>, + alloc: Rc>, raw_stack_ptr: Option<*mut [u8; FAKE_STACK_SIZE]>, ) -> Option { // We can't use IPC channels here to signal that FFI mode has ended, @@ -228,6 +228,12 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call. let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM); assert_eq!(ret, 0); + // Set up the pagesize used in the memory protection functions. + // SAFETY: sysconf(_SC_PAGESIZE) is always safe and doesn't error + super::parent::PAGE_SIZE.store( + libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap(), + std::sync::atomic::Ordering::Relaxed, + ); // First make sure the parent succeeded with ptracing us! signal::raise(signal::SIGSTOP).unwrap(); // If we're the child process, save the supervisor info. From e1d46331ee1adaf0ed1da31eb62b81befe8bdc75 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Thu, 19 Jun 2025 15:46:10 +0200 Subject: [PATCH 6/6] zero out stack, etc --- src/shims/trace/child.rs | 13 +++++-------- src/shims/trace/parent.rs | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/shims/trace/child.rs b/src/shims/trace/child.rs index 6e2ad1ac01..d9227273a1 100644 --- a/src/shims/trace/child.rs +++ b/src/shims/trace/child.rs @@ -179,6 +179,10 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error. let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap(); + // Set up the pagesize used in the memory protection functions. + // SAFETY: sysconf(_SC_PAGESIZE) is always safe and doesn't error + super::parent::PAGE_SIZE.store(page_size, std::sync::atomic::Ordering::Relaxed); + unsafe { // TODO: Maybe use clone3() instead for better signalling of when the child exits? // SAFETY: Caller upholds that only one thread exists. @@ -200,8 +204,7 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { match ptrace::seize(child, options) { // Ptrace works :D Ok(_) => { - let code = sv_loop(listener, child, event_tx, confirm_tx, page_size) - .unwrap_err(); + let code = sv_loop(listener, child, event_tx, confirm_tx).unwrap_err(); // If a return code of 0 is not explicitly given, assume something went // wrong and return 1. std::process::exit(code.unwrap_or(1)) @@ -228,12 +231,6 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call. let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM); assert_eq!(ret, 0); - // Set up the pagesize used in the memory protection functions. - // SAFETY: sysconf(_SC_PAGESIZE) is always safe and doesn't error - super::parent::PAGE_SIZE.store( - libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap(), - std::sync::atomic::Ordering::Relaxed, - ); // First make sure the parent succeeded with ptracing us! signal::raise(signal::SIGSTOP).unwrap(); // If we're the child process, save the supervisor info. diff --git a/src/shims/trace/parent.rs b/src/shims/trace/parent.rs index 284d6ae28c..fd2c7c037d 100644 --- a/src/shims/trace/parent.rs +++ b/src/shims/trace/parent.rs @@ -238,8 +238,10 @@ pub fn sv_loop( init_pid: unistd::Pid, event_tx: ipc::IpcSender, confirm_tx: ipc::IpcSender, - page_size: usize, ) -> Result> { + let page_size = PAGE_SIZE.load(std::sync::atomic::Ordering::Relaxed); + assert_ne!(page_size, 0); + // Things that we return to the child process. let mut acc_events = Vec::new(); @@ -289,6 +291,7 @@ pub fn sv_loop( event_tx.send(MemEvents { acc_events }).unwrap(); // And reset our values. acc_events = Vec::new(); + ch_pages = Vec::new(); ch_stack = None; // No need to monitor syscalls anymore, they'd just be ignored. @@ -550,6 +553,12 @@ fn handle_segfault( // - Parse executed code to estimate size & type of access // - Reprotect the memory // - Continue + + // Zero out the stack + for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { + ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); + } + let stack_ptr = ch_stack.strict_add(FAKE_STACK_SIZE / 2); let regs_bak = ptrace::getregs(pid).unwrap(); let mut new_regs = regs_bak; @@ -591,6 +600,11 @@ fn handle_segfault( // Also, don't let it continue with unprotected memory if something errors! let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + // Zero it out again to be safe + for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { + ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); + } + // Save registers and grab the bytes that were executed. This would // be really nasty if it was a jump or similar but those thankfully // won't do memory accesses and so can't trigger this!