diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index d7432a7300..c31e358d92 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -34,18 +34,34 @@ pub struct MemEvents { /// A single memory access. #[allow(dead_code)] #[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum AccessEvent { - /// A read may have occurred on this memory range. - /// Some instructions *may* read memory without *always* doing that, - /// so this can be an over-approximation. - /// The range info, however, is reliable if the access did happen. + /// A read occurred on this memory range. Read(AccessRange), - /// A read may have occurred on this memory range. + /// A write may have occurred on this memory range. /// Some instructions *may* write memory without *always* doing that, /// so this can be an over-approximation. /// The range info, however, is reliable if the access did happen. - Write(AccessRange), + /// If the second field is true, the access definitely happened. + Write(AccessRange, bool), +} + +impl AccessEvent { + fn get_range(&self) -> AccessRange { + match self { + AccessEvent::Read(access_range) => access_range.clone(), + AccessEvent::Write(access_range, _) => access_range.clone(), + } + } + + fn is_read(&self) -> bool { + matches!(self, AccessEvent::Read(_)) + } + + fn definitely_happened(&self) -> bool { + // Anything except a maybe-write is definitive. + !matches!(self, AccessEvent::Write(_, false)) + } } /// The memory touched by a given access. @@ -59,6 +75,12 @@ pub struct AccessRange { pub size: usize, } +impl AccessRange { + fn end(&self) -> usize { + self.addr.strict_add(self.size) + } +} + impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {} trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Call native host function and return the output as an immediate. @@ -196,6 +218,43 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { } None } + + /// Applies the `events` to Miri's internal state. The event vector must be + /// ordered sequentially by when the accesses happened, and the sizes are + /// assumed to be exact. + fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + + for evt in events.acc_events { + let evt_rg = evt.get_range(); + // We're assuming an access only touches 1 allocation. + let alloc_id = this + .alloc_id_from_addr(evt_rg.addr.to_u64(), evt_rg.size.try_into().unwrap(), true) + .expect("Foreign code did an out-of-bounds access!"); + + let alloc = this.get_alloc_raw(alloc_id)?; + let alloc_addr = alloc.get_bytes_unchecked_raw().addr(); + + // Shift the overlap range to be an offset from the allocation base addr. + let overlap = evt_rg.addr.strict_sub(alloc_addr)..evt_rg.end().strict_sub(alloc_addr); + + // Reads are infallible, writes might not be. + if evt.is_read() { + let p_map = alloc.provenance(); + for idx in overlap { + // If a provenance was read by the foreign code, expose it. + if let Some(prov) = p_map.get(Size::from_bytes(idx), this) { + this.expose_provenance(prov)?; + } + } + } else if evt.definitely_happened() || alloc.mutability.is_mut() { + //let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?; + //alloc.process_native_write(AllocRange { start: overlap.start, size: overlap.len() }) + } + } + + interp_ok(()) + } } impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} @@ -221,6 +280,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } }; + // Do we have ptrace? + let tracing = trace::Supervisor::is_enabled(); + // Get the function arguments, and convert them to `libffi`-compatible form. let mut libffi_args = Vec::::with_capacity(args.len()); for arg in args.iter() { @@ -240,9 +302,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // The first time this happens, print a warning. if !this.machine.native_call_mem_warned.replace(true) { // Newly set, so first time we get here. - this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { - tracing: self::trace::Supervisor::is_enabled(), - }); + this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing }); } this.expose_provenance(prov)?; @@ -268,15 +328,21 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // be read by FFI. The `black_box` is defensive programming as LLVM likes // to (incorrectly) optimize away ptr2int casts whose result is unused. std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance()); - // Expose all provenances in this allocation, since the native code can do $whatever. - for prov in alloc.provenance().provenances() { - this.expose_provenance(prov)?; + + if !tracing { + // Expose all provenances in this allocation, since the native code can do $whatever. + for prov in alloc.provenance().provenances() { + this.expose_provenance(prov)?; + } } // Prepare for possible write from native code if mutable. if info.mutbl.is_mut() { - let alloc = &mut this.get_alloc_raw_mut(alloc_id)?.0; + let (alloc, _cx) = this.get_alloc_raw_mut(alloc_id)?; alloc.prepare_for_native_access(); + if tracing { + //alloc.process_native_write(&cx.tcx, None); + } // Also expose *mutable* provenance for the interpreter-level allocation. std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance()); } @@ -288,10 +354,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let (ret, maybe_memevents) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; - if cfg!(target_os = "linux") - && let Some(events) = maybe_memevents - { - trace!("Registered FFI events:\n{events:#0x?}"); + if tracing { + this.tracing_apply_accesses(maybe_memevents.unwrap())?; } this.write_immediate(*ret, dest)?; diff --git a/src/shims/native_lib/trace/parent.rs b/src/shims/native_lib/trace/parent.rs index fc4047eec5..7484788fb9 100644 --- a/src/shims/native_lib/trace/parent.rs +++ b/src/shims/native_lib/trace/parent.rs @@ -386,7 +386,11 @@ fn capstone_find_events( acc_events.push(AccessEvent::Read(push.clone())); } if acc_ty.is_writable() { - acc_events.push(AccessEvent::Write(push)); + // FIXME: This could be made certain; either determine all cases where + // only reads happen, or have an intermediate mempr_* function to first + // map the page(s) as readonly and check if a segfault occurred. + // If this did a read as well, it's possible the write didn't happen. + acc_events.push(AccessEvent::Write(push, !acc_ty.is_readable())); } return true; @@ -442,8 +446,7 @@ fn handle_segfault( // Get information on what caused the segfault. This contains the address // that triggered it. let siginfo = ptrace::getsiginfo(pid).unwrap(); - // All x86, ARM, etc. instructions only have at most one memory operand - // (thankfully!) + // All x86 instructions only have at most one memory operand (thankfully!) // SAFETY: si_addr is safe to call. let addr = unsafe { siginfo.si_addr().addr() }; let page_addr = addr.strict_sub(addr.strict_rem(page_size)); diff --git a/tests/native-lib/fail/trace_read.rs b/tests/native-lib/fail/trace_read.rs new file mode 100644 index 0000000000..4701d0cdc7 --- /dev/null +++ b/tests/native-lib/fail/trace_read.rs @@ -0,0 +1,26 @@ +//@only-target: linux +//@only-target: gnu +//@only-target: x86 +//@only-on-host +//@compile-flags: -Zmiri-native-lib-enable-tracing + +extern "C" { + fn do_nothing(); +} + +fn main() { + unexposed_reachable_alloc(); +} + +fn unexposed_reachable_alloc() { + let inner = 42; + let intermediate = &raw const inner; + let exposed = (&raw const intermediate).expose_provenance(); + unsafe { do_nothing() }; + let invalid: *const i32 = std::ptr::with_exposed_provenance(intermediate.addr()); + let valid: *const *const i32 = std::ptr::with_exposed_provenance(exposed); + unsafe { + assert_ne!((*valid).addr(), 0); + println!("{}", *invalid); //~ ERROR: Undefined Behavior: pointer not dereferenceable: pointer must be dereferenceable for 4 bytes, but got + } +} diff --git a/tests/native-lib/fail/trace_read.stderr b/tests/native-lib/fail/trace_read.stderr new file mode 100644 index 0000000000..6054194e57 --- /dev/null +++ b/tests/native-lib/fail/trace_read.stderr @@ -0,0 +1,21 @@ +error: Undefined Behavior: pointer not dereferenceable: pointer must be dereferenceable for 4 bytes, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + --> tests/native-lib/fail/trace_read.rs:LL:CC + | +LL | println!("{}", *invalid); + | ^^^^^^^^ Undefined Behavior occurred here + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/trace_read.rs:LL:CC +note: inside `main` + --> tests/native-lib/fail/trace_read.rs:LL:CC + | +LL | unexposed_reachable_alloc(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/tests/native-lib/fail/trace_write.rs b/tests/native-lib/fail/trace_write.rs new file mode 100644 index 0000000000..d3c2be8563 --- /dev/null +++ b/tests/native-lib/fail/trace_write.rs @@ -0,0 +1,22 @@ +//@only-target: linux +//@only-target: gnu +//@only-target: x86 +//@only-on-host +//@compile-flags: -Zmiri-native-lib-enable-tracing + +extern "C" { + fn init_n(n: i32, ptr: *mut u8); +} + +fn main() { + partial_init(); +} + +fn partial_init() { + let mut slice: [u8; 10]; + unsafe { + init_n(5, (&raw mut slice).cast()); + assert!(slice[3] == 0); + println!("{}", slice[6]); + } +} diff --git a/tests/native-lib/pass/ptr_read_access.stderr b/tests/native-lib/pass/ptr_read_access.notrace.stderr similarity index 100% rename from tests/native-lib/pass/ptr_read_access.stderr rename to tests/native-lib/pass/ptr_read_access.notrace.stderr diff --git a/tests/native-lib/pass/ptr_read_access.rs b/tests/native-lib/pass/ptr_read_access.rs index 4c85213536..251c157225 100644 --- a/tests/native-lib/pass/ptr_read_access.rs +++ b/tests/native-lib/pass/ptr_read_access.rs @@ -1,3 +1,10 @@ +//revisions: trace notrace +//@[trace] only-target: linux +//@[trace] only-target: gnu +//@[trace] only-target: x86 +//@[trace] only-on-host +//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing + fn main() { test_access_pointer(); test_access_simple(); diff --git a/tests/native-lib/pass/ptr_write_access.stderr b/tests/native-lib/pass/ptr_write_access.notrace.stderr similarity index 100% rename from tests/native-lib/pass/ptr_write_access.stderr rename to tests/native-lib/pass/ptr_write_access.notrace.stderr diff --git a/tests/native-lib/pass/ptr_write_access.rs b/tests/native-lib/pass/ptr_write_access.rs index 86a9c97f4c..ae2e7b9de2 100644 --- a/tests/native-lib/pass/ptr_write_access.rs +++ b/tests/native-lib/pass/ptr_write_access.rs @@ -1,3 +1,9 @@ +//revisions: trace notrace +//@[trace] only-target: linux +//@[trace] only-target: gnu +//@[trace] only-target: x86 +//@[trace] only-on-host +//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing //@compile-flags: -Zmiri-permissive-provenance #![feature(box_as_ptr)] diff --git a/tests/native-lib/ptr_write_access.c b/tests/native-lib/ptr_write_access.c index fd8b005499..8654994d1e 100644 --- a/tests/native-lib/ptr_write_access.c +++ b/tests/native-lib/ptr_write_access.c @@ -107,3 +107,11 @@ EXPORT void set_shared_mem(int32_t** ptr) { EXPORT void init_ptr_stored_in_shared_mem(int32_t val) { **shared_place = val; } + +/* Test: partial_init */ + +EXPORT void init_n(int32_t n, char* ptr) { + for (int i=0; i