wip: hook up bits

nia-e · nia-e · commit acbf7f35956e · 2025-07-08T10:49:39.000+02:00
diff --git a/src/eval.rs b/src/eval.rs
@@ -201,7 +201,7 @@ impl Default for MiriConfig {
             report_progress: None,
             retag_fields: RetagFields::Yes,
             native_lib: vec![],
-            native_lib_enable_tracing: false,
+            native_lib_enable_tracing: true,
             gc_interval: 10_000,
             num_cpus: 1,
             page_size: None,
diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs
@@ -40,14 +40,27 @@ pub struct MemEvents {
 /// A single memory access.
 #[allow(dead_code)]
 #[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub enum AccessEvent {
     /// A read occurred on this memory range.
     Read(AccessRange),
-/// A read occurred on this memory range.
+    /// A read occurred on this memory range.
     Write(AccessRange),
 }
 
+impl AccessEvent {
+    fn get_range(&self) -> AccessRange {
+        match self {
+            AccessEvent::Read(access_range) => access_range.clone(),
+            AccessEvent::Write(access_range) => access_range.clone(),
+        }
+    }
+
+    fn is_read(&self) -> bool {
+        matches!(self, AccessEvent::Read(_))
+    }
+}
+
 /// The memory touched by a given access.
 #[allow(dead_code)]
 #[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
@@ -198,6 +211,78 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
         }
         None
     }
+
+    /// Applies the `events` to Miri's internal state. The event vector must be
+    /// ordered sequentially by when the accesses happened, and the sizes are
+    /// assumed to be exact.
+    fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        // TODO: This could be optimised! If we first "compress" down the reads
+        // and writes to discard redundant reads and writes and sort them by
+        // address, we can take this from worst-case O(accesses * allocations)
+        // to O(log(accesses) * allocations).
+        for evt in events.acc_events {
+            let mut todo = this.exposed_allocs();
+            let mut done = rustc_data_structures::fx::FxHashSet::default();
+            while let Some(alloc_id) = todo.pop() {
+                if done.insert(alloc_id) {
+                    continue;
+                }
+
+                let info = this.get_alloc_info(alloc_id);
+                // If there is no data behind this pointer, skip this.
+                if !matches!(info.kind, AllocKind::LiveData) {
+                    continue;
+                }
+
+                // Get the (size, len) pair for the current allocation...
+                let (alloc_addr, alloc_len) = {
+                    let alloc = this.get_alloc_raw(alloc_id)?;
+                    (alloc.get_bytes_unchecked_raw().addr(), alloc.len())
+                };
+
+                // ...and for the current accesses, checking if they overlap.
+                let rg = evt.get_range();
+                if !(rg.addr <= alloc_addr.strict_add(alloc_len)
+                    && alloc_addr <= rg.addr.strict_add(rg.size))
+                {
+                    continue;
+                }
+
+                // Shift the overlap range to be an offset from the allocation base addr.
+                let unshifted_overlap = std::cmp::max(rg.addr, alloc_addr)
+                    ..std::cmp::min(rg.addr.strict_add(rg.size), alloc_addr.strict_add(alloc_len));
+                let overlap = unshifted_overlap.start.strict_sub(alloc_addr)
+                    ..unshifted_overlap.end.strict_sub(alloc_addr);
+
+                if evt.is_read() {
+                    let alloc = this.get_alloc_raw(alloc_id)?;
+                    let p_map = alloc.provenance();
+                    for idx in overlap {
+                        // If a provenance was read by the foreign code, expose it and add it to the todo list.
+                        if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
+                            // Do this extra check since we get bytewise provenance,
+                            // so otherwise we risk inserting 4/8 copies of it per pointer.
+                            // TODO: Can freestanding bytes even have
+                            if let Some(prov_id) = prov.get_alloc_id()
+                                && !todo.contains(&prov_id)
+                                && !done.contains(&prov_id)
+                            {
+                                todo.push(prov_id);
+                            }
+                            this.expose_provenance(prov)?;
+                        }
+                    }
+                } else {
+                    let (_alloc_mut, _m) = this.get_alloc_raw_mut(alloc_id)?;
+                    // TODO: expose a way to write wildcards on a given range and mark it as init
+                }
+            }
+        }
+
+        interp_ok(())
+    }
 }
 
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
@@ -223,6 +308,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             }
         };
 
+        // Do we have ptrace?
+        let tracing = trace::Supervisor::is_enabled();
+
         // Get the function arguments, and convert them to `libffi`-compatible form.
         let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
         for arg in args.iter() {
@@ -242,9 +330,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // The first time this happens, print a warning.
                 if !this.machine.native_call_mem_warned.replace(true) {
                     // Newly set, so first time we get here.
-                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem {
-                        tracing: self::trace::Supervisor::is_enabled(),
-                    });
+                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
                 }
 
                 this.expose_provenance(prov)?;
@@ -270,15 +356,37 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             // be read by FFI. The `black_box` is defensive programming as LLVM likes
             // to (incorrectly) optimize away ptr2int casts whose result is unused.
             std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
-            // Expose all provenances in this allocation, since the native code can do $whatever.
-            for prov in alloc.provenance().provenances() {
-                this.expose_provenance(prov)?;
+
+            if !tracing {
+                // Expose all provenances in this allocation, since the native code can do $whatever.
+                for prov in alloc.provenance().provenances() {
+                    this.expose_provenance(prov)?;
+                }
             }
 
             // Prepare for possible write from native code if mutable.
             if info.mutbl.is_mut() {
-                let alloc = &mut this.get_alloc_raw_mut(alloc_id)?.0;
-                alloc.prepare_for_native_access();
+                let alloc = this.get_alloc_raw_mut(alloc_id)?.0;
+                if tracing {
+                    let full_range =
+                        AllocRange { start: Size::ZERO, size: Size::from_bytes(alloc.len()) };
+                    // Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
+                    for chunk in alloc.init_mask().clone().range_as_init_chunks(full_range) {
+                        if !chunk.is_init() {
+                            let uninit_bytes = unsafe {
+                                let start = chunk.range().start.bytes_usize();
+                                let len = chunk.range().end.bytes_usize().strict_sub(start);
+                                let ptr = alloc.get_bytes_unchecked_raw_mut().add(start);
+                                std::slice::from_raw_parts_mut(ptr, len)
+                            };
+                            uninit_bytes.fill(0);
+                        }
+                    }
+                } else {
+                    // FIXME: Make this take an arg to determine whether it actually
+                    // writes wildcard prov & marks init, so we don't duplicate code above.
+                    alloc.prepare_for_native_access();
+                }
                 // Also expose *mutable* provenance for the interpreter-level allocation.
                 std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
             }
@@ -290,10 +398,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let (ret, maybe_memevents) =
             this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
 
-        if cfg!(target_os = "linux")
-            && let Some(events) = maybe_memevents
-        {
-            trace!("Registered FFI events:\n{events:#0x?}");
+        if tracing {
+            this.tracing_apply_accesses(maybe_memevents.unwrap())?;
         }
 
         this.write_immediate(*ret, dest)?;
diff --git a/src/shims/native_lib/trace/child.rs b/src/shims/native_lib/trace/child.rs
@@ -202,7 +202,9 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
                     // The "Ok" case means that we couldn't ptrace.
                     Ok(e) => return Err(e),
                     Err(p) => {
-                        eprintln!("Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer.");
+                        eprintln!(
+                            "Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer."
+                        );
                         std::process::exit(1);
                     }
                 }
diff --git a/src/shims/native_lib/trace/parent.rs b/src/shims/native_lib/trace/parent.rs
@@ -408,10 +408,7 @@ fn handle_segfault(
                     match x86_operand.op_type {
                         // We only care about memory accesses
                         arch::x86::X86OperandType::Mem(_) => {
-                            let push = AccessRange {
-                                addr,
-                                size: x86_operand.size.into(),
-                            };
+                            let push = AccessRange { addr, size: x86_operand.size.into() };
                             // It's called a "RegAccessType" but it also applies to memory
                             let acc_ty = x86_operand.access.unwrap();
                             if acc_ty.is_readable() {

Original file line number	Diff line number	Diff line change
`@@ -202,7 +202,9 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {`
`202`	`202`	`// The "Ok" case means that we couldn't ptrace.`
`203`	`203`	`Ok(e) => return Err(e),`
`204`	`204`	`Err(p) => {`
`205`		`- eprintln!("Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer.");`
	`205`	`+ eprintln!(`
	`206`	`+ "Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer."`
	`207`	`+ );`
`206`	`208`	`std::process::exit(1);`
`207`	`209`	`}`
`208`	`210`	`}`