Skip to content

Commit ed212ff

Browse files
authored
Merge pull request #4456 from nia-e/trace-incorporate-events
trace: incorporate events
2 parents 62aea72 + 06f0dca commit ed212ff

17 files changed

+314
-25
lines changed

src/tools/miri/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#![feature(unqualified_local_imports)]
1616
#![feature(derive_coerce_pointee)]
1717
#![feature(arbitrary_self_types)]
18+
#![feature(iter_advance_by)]
1819
// Configure clippy and other lints
1920
#![allow(
2021
clippy::collapsible_else_if,

src/tools/miri/src/shims/native_lib/mod.rs

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,25 @@ pub struct MemEvents {
3434
/// A single memory access.
3535
#[allow(dead_code)]
3636
#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
37-
#[derive(Debug)]
37+
#[derive(Clone, Debug)]
3838
pub enum AccessEvent {
39-
/// A read may have occurred on this memory range.
40-
/// Some instructions *may* read memory without *always* doing that,
41-
/// so this can be an over-approximation.
42-
/// The range info, however, is reliable if the access did happen.
39+
/// A read occurred on this memory range.
4340
Read(AccessRange),
44-
/// A read may have occurred on this memory range.
41+
/// A write may have occurred on this memory range.
4542
/// Some instructions *may* write memory without *always* doing that,
4643
/// so this can be an over-approximation.
4744
/// The range info, however, is reliable if the access did happen.
48-
Write(AccessRange),
45+
/// If the second field is true, the access definitely happened.
46+
Write(AccessRange, bool),
47+
}
48+
49+
impl AccessEvent {
50+
fn get_range(&self) -> AccessRange {
51+
match self {
52+
AccessEvent::Read(access_range) => access_range.clone(),
53+
AccessEvent::Write(access_range, _) => access_range.clone(),
54+
}
55+
}
4956
}
5057

5158
/// The memory touched by a given access.
@@ -59,6 +66,12 @@ pub struct AccessRange {
5966
pub size: usize,
6067
}
6168

69+
impl AccessRange {
70+
fn end(&self) -> usize {
71+
self.addr.strict_add(self.size)
72+
}
73+
}
74+
6275
impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
6376
trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
6477
/// Call native host function and return the output as an immediate.
@@ -196,6 +209,73 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
196209
}
197210
None
198211
}
212+
213+
/// Applies the `events` to Miri's internal state. The event vector must be
214+
/// ordered sequentially by when the accesses happened, and the sizes are
215+
/// assumed to be exact.
216+
fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
217+
let this = self.eval_context_mut();
218+
219+
for evt in events.acc_events {
220+
let evt_rg = evt.get_range();
221+
// LLVM at least permits vectorising accesses to adjacent allocations,
222+
// so we cannot assume 1 access = 1 allocation. :(
223+
let mut rg = evt_rg.addr..evt_rg.end();
224+
while let Some(curr) = rg.next() {
225+
let Some(alloc_id) = this.alloc_id_from_addr(
226+
curr.to_u64(),
227+
rg.len().try_into().unwrap(),
228+
/* only_exposed_allocations */ true,
229+
) else {
230+
throw_ub_format!("Foreign code did an out-of-bounds access!")
231+
};
232+
let alloc = this.get_alloc_raw(alloc_id)?;
233+
// The logical and physical address of the allocation coincide, so we can use
234+
// this instead of `addr_from_alloc_id`.
235+
let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
236+
237+
// Determine the range inside the allocation that this access covers. This range is
238+
// in terms of offsets from the start of `alloc`. The start of the overlap range
239+
// will be `curr`; the end will be the minimum of the end of the allocation and the
240+
// end of the access' range.
241+
let overlap = curr.strict_sub(alloc_addr)
242+
..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
243+
// Skip forward however many bytes of the access are contained in the current
244+
// allocation, subtracting 1 since the overlap range includes the current addr
245+
// that was already popped off of the range.
246+
rg.advance_by(overlap.len().strict_sub(1)).unwrap();
247+
248+
match evt {
249+
AccessEvent::Read(_) => {
250+
// FIXME: ProvenanceMap should have something like get_range().
251+
let p_map = alloc.provenance();
252+
for idx in overlap {
253+
// If a provenance was read by the foreign code, expose it.
254+
if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
255+
this.expose_provenance(prov)?;
256+
}
257+
}
258+
}
259+
AccessEvent::Write(_, certain) => {
260+
// Sometimes we aren't certain if a write happened, in which case we
261+
// only initialise that data if the allocation is mutable.
262+
if certain || alloc.mutability.is_mut() {
263+
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
264+
alloc.process_native_write(
265+
&cx.tcx,
266+
Some(AllocRange {
267+
start: Size::from_bytes(overlap.start),
268+
size: Size::from_bytes(overlap.len()),
269+
}),
270+
)
271+
}
272+
}
273+
}
274+
}
275+
}
276+
277+
interp_ok(())
278+
}
199279
}
200280

201281
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
@@ -221,6 +301,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
221301
}
222302
};
223303

304+
// Do we have ptrace?
305+
let tracing = trace::Supervisor::is_enabled();
306+
224307
// Get the function arguments, and convert them to `libffi`-compatible form.
225308
let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
226309
for arg in args.iter() {
@@ -240,9 +323,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
240323
// The first time this happens, print a warning.
241324
if !this.machine.native_call_mem_warned.replace(true) {
242325
// Newly set, so first time we get here.
243-
this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem {
244-
tracing: self::trace::Supervisor::is_enabled(),
245-
});
326+
this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
246327
}
247328

248329
this.expose_provenance(prov)?;
@@ -269,15 +350,23 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
269350
// be read by FFI. The `black_box` is defensive programming as LLVM likes
270351
// to (incorrectly) optimize away ptr2int casts whose result is unused.
271352
std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
272-
// Expose all provenances in this allocation, since the native code can do $whatever.
273-
for prov in alloc.provenance().provenances() {
274-
this.expose_provenance(prov)?;
353+
354+
if !tracing {
355+
// Expose all provenances in this allocation, since the native code can do $whatever.
356+
// Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
357+
for prov in alloc.provenance().provenances() {
358+
this.expose_provenance(prov)?;
359+
}
275360
}
276361

277362
// Prepare for possible write from native code if mutable.
278363
if info.mutbl.is_mut() {
279364
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
280-
alloc.process_native_write(&cx.tcx, None);
365+
// These writes could initialize everything and wreck havoc with the pointers.
366+
// We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
367+
if !tracing {
368+
alloc.process_native_write(&cx.tcx, None);
369+
}
281370
// Also expose *mutable* provenance for the interpreter-level allocation.
282371
std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
283372
}
@@ -289,10 +378,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
289378
let (ret, maybe_memevents) =
290379
this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
291380

292-
if cfg!(target_os = "linux")
293-
&& let Some(events) = maybe_memevents
294-
{
295-
trace!("Registered FFI events:\n{events:#0x?}");
381+
if tracing {
382+
this.tracing_apply_accesses(maybe_memevents.unwrap())?;
296383
}
297384

298385
this.write_immediate(*ret, dest)?;

src/tools/miri/src/shims/native_lib/trace/parent.rs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@ impl ArchIndependentRegs for libc::user_regs_struct {
5858
#[rustfmt::skip]
5959
impl ArchIndependentRegs for libc::user_regs_struct {
6060
#[inline]
61-
fn ip(&self) -> usize { self.eip.try_into().unwrap() }
61+
fn ip(&self) -> usize { self.eip.cast_unsigned().try_into().unwrap() }
6262
#[inline]
63-
fn set_ip(&mut self, ip: usize) { self.eip = ip.try_into().unwrap() }
63+
fn set_ip(&mut self, ip: usize) { self.eip = ip.cast_signed().try_into().unwrap() }
6464
#[inline]
65-
fn set_sp(&mut self, sp: usize) { self.esp = sp.try_into().unwrap() }
65+
fn set_sp(&mut self, sp: usize) { self.esp = sp.cast_signed().try_into().unwrap() }
6666
}
6767

6868
/// A unified event representing something happening on the child process. Wraps
@@ -386,7 +386,17 @@ fn capstone_find_events(
386386
acc_events.push(AccessEvent::Read(push.clone()));
387387
}
388388
if acc_ty.is_writable() {
389-
acc_events.push(AccessEvent::Write(push));
389+
// FIXME: This could be made certain; either determine all cases where
390+
// only reads happen, or have an intermediate mempr_* function to first
391+
// map the page(s) as readonly and check if a segfault occurred.
392+
393+
// Per https://docs.rs/iced-x86/latest/iced_x86/enum.OpAccess.html,
394+
// we know that the possible access types are Read, CondRead, Write,
395+
// CondWrite, ReadWrite, and ReadCondWrite. Since we got a segfault
396+
// we know some kind of access happened so Cond{Read, Write}s are
397+
// certain reads and writes; the only uncertainty is with an RW op
398+
// as it might be a ReadCondWrite with the write condition unmet.
399+
acc_events.push(AccessEvent::Write(push, !acc_ty.is_readable()));
390400
}
391401

392402
return true;
@@ -442,8 +452,7 @@ fn handle_segfault(
442452
// Get information on what caused the segfault. This contains the address
443453
// that triggered it.
444454
let siginfo = ptrace::getsiginfo(pid).unwrap();
445-
// All x86, ARM, etc. instructions only have at most one memory operand
446-
// (thankfully!)
455+
// All x86 instructions only have at most one memory operand (thankfully!)
447456
// SAFETY: si_addr is safe to call.
448457
let addr = unsafe { siginfo.si_addr().addr() };
449458
let page_addr = addr.strict_sub(addr.strict_rem(page_size));
@@ -490,7 +499,7 @@ fn handle_segfault(
490499
ptrace::write(
491500
pid,
492501
(&raw const PAGE_ADDR).cast_mut().cast(),
493-
libc::c_long::try_from(page_addr).unwrap(),
502+
libc::c_long::try_from(page_addr.cast_signed()).unwrap(),
494503
)
495504
.unwrap();
496505

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
2+
//@compile-flags: -Zmiri-native-lib-enable-tracing
3+
4+
extern "C" {
5+
fn init_n(n: i32, ptr: *mut u8);
6+
}
7+
8+
fn main() {
9+
partial_init();
10+
}
11+
12+
// Initialise the first 2 elements of the slice from native code, and check
13+
// that the 3rd is correctly deemed uninit.
14+
fn partial_init() {
15+
let mut slice = std::mem::MaybeUninit::<[u8; 3]>::uninit();
16+
let slice_ptr = slice.as_mut_ptr().cast::<u8>();
17+
unsafe {
18+
// Initialize the first two elements.
19+
init_n(2, slice_ptr);
20+
assert!(*slice_ptr == 0);
21+
assert!(*slice_ptr.offset(1) == 0);
22+
// Reading the third is UB!
23+
let _val = *slice_ptr.offset(2); //~ ERROR: Undefined Behavior: using uninitialized data
24+
}
25+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
warning: sharing memory with a native function called via FFI
2+
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
3+
|
4+
LL | init_n(2, slice_ptr);
5+
| ^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
6+
|
7+
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
8+
= help: in particular, Miri assumes that the native call initializes all memory it has written to
9+
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
10+
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
11+
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
12+
= note: BACKTRACE:
13+
= note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
14+
note: inside `main`
15+
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
16+
|
17+
LL | partial_init();
18+
| ^^^^^^^^^^^^^^
19+
20+
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
21+
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
22+
|
23+
LL | let _val = *slice_ptr.offset(2);
24+
| ^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here
25+
|
26+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
27+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
28+
= note: BACKTRACE:
29+
= note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
30+
note: inside `main`
31+
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
32+
|
33+
LL | partial_init();
34+
| ^^^^^^^^^^^^^^
35+
36+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
37+
38+
error: aborting due to 1 previous error; 1 warning emitted
39+
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
2+
//@compile-flags: -Zmiri-permissive-provenance -Zmiri-native-lib-enable-tracing
3+
4+
extern "C" {
5+
fn do_one_deref(ptr: *const *const *const i32) -> usize;
6+
}
7+
8+
fn main() {
9+
unexposed_reachable_alloc();
10+
}
11+
12+
// Expose 2 pointers by virtue of doing a native read and assert that the 3rd in
13+
// the chain remains properly unexposed.
14+
fn unexposed_reachable_alloc() {
15+
let inner = 42;
16+
let intermediate_a = &raw const inner;
17+
let intermediate_b = &raw const intermediate_a;
18+
let exposed = &raw const intermediate_b;
19+
// Discard the return value; it's just there so the access in C doesn't get optimised away.
20+
unsafe { do_one_deref(exposed) };
21+
// Native read should have exposed the address of intermediate_b...
22+
let valid: *const i32 = std::ptr::with_exposed_provenance(intermediate_b.addr());
23+
// but not of intermediate_a.
24+
let invalid: *const i32 = std::ptr::with_exposed_provenance(intermediate_a.addr());
25+
unsafe {
26+
let _ok = *valid;
27+
let _not_ok = *invalid; //~ ERROR: Undefined Behavior: memory access failed: attempting to access
28+
}
29+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
warning: sharing memory with a native function called via FFI
2+
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
3+
|
4+
LL | unsafe { do_one_deref(exposed) };
5+
| ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
6+
|
7+
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
8+
= help: in particular, Miri assumes that the native call initializes all memory it has written to
9+
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
10+
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
11+
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
12+
= note: BACKTRACE:
13+
= note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
14+
note: inside `main`
15+
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
16+
|
17+
LL | unexposed_reachable_alloc();
18+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
19+
20+
error: Undefined Behavior: memory access failed: attempting to access 4 bytes, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
21+
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
22+
|
23+
LL | let _not_ok = *invalid;
24+
| ^^^^^^^^ Undefined Behavior occurred here
25+
|
26+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
27+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
28+
= note: BACKTRACE:
29+
= note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
30+
note: inside `main`
31+
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
32+
|
33+
LL | unexposed_reachable_alloc();
34+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
35+
36+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
37+
38+
error: aborting due to 1 previous error; 1 warning emitted
39+

src/tools/miri/tests/native-lib/pass/ptr_read_access.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
//@revisions: trace notrace
2+
//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
3+
//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing
4+
15
fn main() {
26
test_access_pointer();
37
test_access_simple();

0 commit comments

Comments
 (0)