Skip to content

trace: implement supervisor components for tracing #4405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ libloading = "0.8"
nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"] }
ipc-channel = "0.19.0"
serde = { version = "1.0.219", features = ["derive"] }
capstone = "0.13"

[dev-dependencies]
ui_test = "0.29.1"
Expand Down
73 changes: 56 additions & 17 deletions src/alloc/isolated_alloc.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::alloc::Layout;
use std::ptr::NonNull;

use nix::sys::mman;
use rustc_index::bit_set::DenseBitSet;

/// How many bytes of memory each bit in the bitset represents.
Expand All @@ -12,7 +14,7 @@ pub struct IsolatedAlloc {
/// Pointers to page-aligned memory that has been claimed by the allocator.
/// Every pointer here must point to a page-sized allocation claimed via
/// mmap. These pointers are used for "small" allocations.
page_ptrs: Vec<*mut u8>,
page_ptrs: Vec<NonNull<u8>>,
/// Metadata about which bytes have been allocated on each page. The length
/// of this vector must be the same as that of `page_ptrs`, and the domain
/// size of the bitset must be exactly `page_size / COMPRESSION_FACTOR`.
Expand All @@ -24,7 +26,7 @@ pub struct IsolatedAlloc {
page_infos: Vec<DenseBitSet<usize>>,
/// Pointers to multiple-page-sized allocations. These must also be page-aligned,
/// with their size stored as the second element of the vector.
huge_ptrs: Vec<(*mut u8, usize)>,
huge_ptrs: Vec<(NonNull<u8>, usize)>,
/// The host (not emulated) page size.
page_size: usize,
}
Expand Down Expand Up @@ -137,7 +139,7 @@ impl IsolatedAlloc {
unsafe fn alloc_small(
page_size: usize,
layout: Layout,
page: *mut u8,
page: NonNull<u8>,
pinfo: &mut DenseBitSet<usize>,
zeroed: bool,
) -> Option<*mut u8> {
Expand All @@ -164,15 +166,15 @@ impl IsolatedAlloc {
// zero out, even if we allocated more
ptr.write_bytes(0, layout.size());
}
return Some(ptr);
return Some(ptr.as_ptr());
}
}
}
None
}

/// Expands the available memory pool by adding one page.
fn add_page(&mut self) -> (*mut u8, &mut DenseBitSet<usize>) {
fn add_page(&mut self) -> (NonNull<u8>, &mut DenseBitSet<usize>) {
// SAFETY: mmap is always safe to call when requesting anonymous memory
let page_ptr = unsafe {
libc::mmap(
Expand All @@ -189,8 +191,8 @@ impl IsolatedAlloc {
// `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page.
assert!(self.page_size % COMPRESSION_FACTOR == 0);
self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR));
self.page_ptrs.push(page_ptr);
(page_ptr, self.page_infos.last_mut().unwrap())
self.page_ptrs.push(NonNull::new(page_ptr).unwrap());
(NonNull::new(page_ptr).unwrap(), self.page_infos.last_mut().unwrap())
}

/// Allocates in multiples of one page on the host system.
Expand All @@ -212,7 +214,7 @@ impl IsolatedAlloc {
.cast::<u8>()
};
assert_ne!(ret.addr(), usize::MAX, "mmap failed");
self.huge_ptrs.push((ret, size));
self.huge_ptrs.push((NonNull::new(ret).unwrap(), size));
// huge_normalized_layout ensures that we've overallocated enough space
// for this to be valid.
ret.map_addr(|a| a.next_multiple_of(layout.align()))
Expand Down Expand Up @@ -246,7 +248,7 @@ impl IsolatedAlloc {
// from us pointing to this page, and we know it was allocated
// in add_page as exactly a single page.
unsafe {
assert_eq!(libc::munmap(page_ptr.cast(), self.page_size), 0);
assert_eq!(libc::munmap(page_ptr.as_ptr().cast(), self.page_size), 0);
}
}
}
Expand All @@ -265,7 +267,7 @@ impl IsolatedAlloc {
// This could be made faster if the list was sorted -- the allocator isn't fully optimized at the moment.
let pinfo = std::iter::zip(&mut self.page_ptrs, &mut self.page_infos)
.enumerate()
.find(|(_, (page, _))| page.addr() == page_addr);
.find(|(_, (page, _))| page.addr().get() == page_addr);
let Some((idx_of_pinfo, (_, pinfo))) = pinfo else {
panic!("Freeing in an unallocated page: {ptr:?}\nHolding pages {:?}", self.page_ptrs)
};
Expand All @@ -287,30 +289,67 @@ impl IsolatedAlloc {
.huge_ptrs
.iter()
.position(|&(pg, size)| {
pg.addr() <= ptr.addr() && ptr.addr() < pg.addr().strict_add(size)
pg.addr().get() <= ptr.addr() && ptr.addr() < pg.addr().get().strict_add(size)
})
.expect("Freeing unallocated pages");
// And kick it from the list
let (un_offset_ptr, size2) = self.huge_ptrs.remove(idx);
assert_eq!(size, size2, "got wrong layout in dealloc");
// SAFETY: huge_ptrs contains allocations made with mmap with the size recorded there.
unsafe {
let ret = libc::munmap(un_offset_ptr.cast(), size);
let ret = libc::munmap(un_offset_ptr.as_ptr().cast(), size);
assert_eq!(ret, 0);
}
}

/// Returns a vector of page addresses managed by the allocator.
pub fn pages(&self) -> Vec<usize> {
let mut pages: Vec<_> =
self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance()).collect();
for (ptr, size) in &self.huge_ptrs {
let mut pages: Vec<usize> =
self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance().get()).collect();
self.huge_ptrs.iter().for_each(|(ptr, size)| {
for i in 0..size / self.page_size {
pages.push(ptr.expose_provenance().strict_add(i * self.page_size));
pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size));
}
}
});
pages
}

/// Protects all owned memory as `PROT_NONE`, preventing accesses.
///
/// SAFETY: Accessing memory after this point will result in a segfault
/// unless it is first unprotected.
pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> {
let prot = mman::ProtFlags::PROT_NONE;
unsafe { self.mprotect(prot) }
}

/// Deprotects all owned memory by setting it to RW. Erroring here is very
/// likely unrecoverable, so it may panic if applying those permissions
/// fails.
pub fn unprep_ffi(&mut self) {
let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE;
unsafe {
self.mprotect(prot).unwrap();
}
}
Comment on lines +321 to +334
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Elsewhere this is called start_ffi and end_ffi, right?

Why do we need to give a list of pages to the trace mechanism if we then still do this work here in the allocator...?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can change the naming if that's preferrable, sure.

The point is that here in the allocator we're doing the initial and final bulk protection/deprotection to make it so accesses to the machine's memory result in a segfault; the list of pages passed to the supervisor is used to tell when a segfault is "real" (e.g. the C code wrote through a null ptr) or if not (that is, if its address matches any of these pages) we log it down instead. I guess we could delete this and make the supervisor process do the protection to the child process through those mempr_on()/mempr_off() functions, but that would be quite messy

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we could delete this and make the supervisor process do the protection to the child process through those mempr_on()/mempr_off() functions, but that would be quite messy

What I was thinking was more along the lines of having all mprotect calls reasonably close to each other -- right now, some are in native_lib and some in the allocator. That does not mean they have to be done by the supervisor. They can be done by the child in start_ffi / end_ffi, but with the logic in the tracing code, not here.

If you also change the pages getter to return an impl Iterator instead of a Vec, you can avoid some unnecessary allocation in end_ffi.


/// Applies `prot` to every page managed by the allocator.
///
/// SAFETY: Accessing memory in violation of the protection flags will
/// trigger a segfault.
unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> {
for &pg in &self.page_ptrs {
unsafe {
mman::mprotect(pg.cast(), self.page_size, prot)?;
}
}
for &(hpg, size) in &self.huge_ptrs {
unsafe {
mman::mprotect(hpg.cast(), size.next_multiple_of(self.page_size), prot)?;
}
}
Ok(())
}
}

#[cfg(test)]
Expand Down
17 changes: 12 additions & 5 deletions src/shims/native_lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
.collect::<Vec<libffi::high::Arg<'_>>>();

// Call the function and store output, depending on return type in the function signature.
let (ret, _) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
let (ret, maybe_memevents) =
this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;

if cfg!(target_os = "linux")
&& let Some(events) = maybe_memevents
{
trace!("Registered FFI events:\n{events:#0x?}");
}

this.write_immediate(*ret, dest)?;
interp_ok(true)
Expand All @@ -250,15 +257,15 @@ unsafe fn do_native_call<T: libffi::high::CType>(

unsafe {
if let Some(alloc) = alloc {
// SAFETY: We don't touch the machine memory past this point
// SAFETY: We don't touch the machine memory past this point.
let (guard, stack_ptr) = Supervisor::start_ffi(alloc.clone());
// SAFETY: Upheld by caller
// SAFETY: Upheld by caller.
let ret = ffi::call(ptr, args);
// SAFETY: We got the guard and stack pointer from start_ffi, and
// the allocator is the same
// the allocator is the same.
(ret, Supervisor::end_ffi(guard, alloc, stack_ptr))
} else {
// SAFETY: Upheld by caller
// SAFETY: Upheld by caller.
(ffi::call(ptr, args), None)
}
}
Expand Down
Loading