Skip to content

Commit f7aa8a6

Browse files
committed
WIP: start hooking in obtained info
1 parent 9b970cf commit f7aa8a6

File tree

3 files changed

+114
-111
lines changed

3 files changed

+114
-111
lines changed

src/alloc_addresses/mod.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,13 +468,25 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
468468
/// This overapproximates the modifications which external code might make to memory:
469469
/// We set all reachable allocations as initialized, mark all reachable provenances as exposed
470470
/// and overwrite them with `Provenance::WILDCARD`.
471-
fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> {
471+
fn prepare_exposed_for_native_call(&mut self, _paranoid: bool) -> InterpResult<'tcx> {
472472
let this = self.eval_context_mut();
473473
// We need to make a deep copy of this list, but it's fine; it also serves as scratch space
474474
// for the search within `prepare_for_native_call`.
475475
let exposed: Vec<AllocId> =
476476
this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
477-
this.prepare_for_native_call(exposed)
477+
this.prepare_for_native_call(exposed /*, paranoid*/)
478+
}
479+
480+
/// Makes use of information obtained about memory accesses during FFI to determine which
481+
/// provenances should be exposed. Note that if `prepare_exposed_for_native_call` was not
482+
/// called before the FFI (with `paranoid` set to false) then some of the writes may be
483+
/// lost!
484+
fn apply_events(&mut self, _events: crate::shims::trace::MemEvents) -> InterpResult<'tcx> {
485+
let this = self.eval_context_mut();
486+
let _exposed: Vec<AllocId> =
487+
this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
488+
interp_ok(())
489+
//this.apply_accesses(exposed, events.reads, events.writes)
478490
}
479491
}
480492

src/shims/native_lib.rs

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
9292
fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
9393
let this = self.eval_context_mut();
9494
// Try getting the function from the shared library.
95-
let (lib, lib_path) = this.machine.native_lib.as_ref().unwrap();
95+
// On windows `_lib_path` will be unused, hence the name starting with `_`.
96+
let (lib, _lib_path) = this.machine.native_lib.as_ref().unwrap();
9697
let func: libloading::Symbol<'_, unsafe extern "C" fn()> =
9798
unsafe { lib.get(link_name.as_str().as_bytes()).ok()? };
9899
#[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
@@ -109,17 +110,16 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
109110
// This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
110111
// from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
111112
// using the `libc` crate where this interface is public.
112-
let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
113+
let mut info = std::mem::MaybeUninit::<libc::Dl_info>::uninit();
113114
unsafe {
114115
if libc::dladdr(fn_ptr, info.as_mut_ptr()) != 0 {
115116
let info = info.assume_init();
116117
#[cfg(target_os = "cygwin")]
117118
let fname_ptr = info.dli_fname.as_ptr();
118119
#[cfg(not(target_os = "cygwin"))]
119120
let fname_ptr = info.dli_fname;
120-
assert!(!fname_ptr.is_null());
121121
if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
122-
!= lib_path.to_str().unwrap()
122+
!= _lib_path.to_str().unwrap()
123123
{
124124
return None;
125125
}
@@ -180,10 +180,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
180180
}
181181
}
182182

183-
if super::trace::Supervisor::init().is_err() {
184-
// Worst-case prepare all exposed memory.
185-
this.prepare_exposed_for_native_call()?;
186-
}
183+
// Prepare all exposed memory, depending on whether we have a supervisor process.
184+
this.prepare_exposed_for_native_call(super::trace::Supervisor::init().is_err())?;
187185

188186
// Convert them to `libffi::high::Arg` type.
189187
let libffi_args = libffi_args
@@ -193,6 +191,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
193191

194192
// Call the function and store output, depending on return type in the function signature.
195193
let ret = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
194+
if let Some(events) = super::trace::Supervisor::get_events() {
195+
this.apply_events(events)?;
196+
}
196197
this.write_immediate(*ret, dest)?;
197198
interp_ok(true)
198199
}
@@ -202,15 +203,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
202203
unsafe fn do_native_call<T: libffi::high::CType>(ptr: CodePtr, args: &[ffi::Arg<'_>]) -> T {
203204
use shims::trace::Supervisor;
204205

205-
let ret = unsafe {
206+
unsafe {
206207
Supervisor::start_ffi();
207208
let ret = ffi::call(ptr, args);
208209
Supervisor::end_ffi();
209210
ret
210-
};
211-
let accesses = Supervisor::get_events().unwrap();
212-
eprintln!("accesses: {accesses:#018x?}");
213-
ret
211+
}
214212
}
215213

216214
#[cfg(not(all(unix, any(target_arch = "x86", target_arch = "x86_64"))))]

src/shims/trace.rs

Lines changed: 89 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::ops::Range;
2+
13
use ipc_channel::ipc;
24
use nix::sys::{ptrace, signal, wait};
35
use nix::unistd;
@@ -161,7 +163,21 @@ impl Supervisor {
161163
pub fn get_events() -> Option<MemEvents> {
162164
let mut sv_guard = SUPERVISOR.lock().unwrap();
163165
let sv = sv_guard.take()?;
164-
let ret = sv.r_event.recv().ok();
166+
// On the off-chance something really weird happens, don't block forever
167+
let ret = sv
168+
.r_event
169+
.try_recv_timeout(std::time::Duration::from_secs(1))
170+
.map_err(|e| {
171+
match e {
172+
ipc::TryRecvError::IpcError(e) => ipc::TryRecvError::IpcError(e),
173+
ipc::TryRecvError::Empty => {
174+
// timed out!
175+
eprintln!("Waiting for accesses from supervisor timed out!");
176+
ipc::TryRecvError::Empty
177+
}
178+
}
179+
})
180+
.ok();
165181
*sv_guard = Some(sv);
166182
ret
167183
}
@@ -176,47 +192,9 @@ pub enum TraceRequest {
176192

177193
#[derive(serde::Serialize, serde::Deserialize, Debug)]
178194
pub struct MemEvents {
179-
pub accesses: Vec<(u64, u64, MemAccessType)>,
180-
pub mappings: Vec<(u64, u64)>,
181-
}
182-
183-
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
184-
pub enum MemAccessType {
185-
Read,
186-
Write,
187-
ReadWrite,
188-
}
189-
190-
impl MemAccessType {
191-
fn update(&mut self, other: MemAccessType) {
192-
match self {
193-
MemAccessType::Read =>
194-
match other {
195-
MemAccessType::Read => (),
196-
_ => *self = MemAccessType::ReadWrite,
197-
},
198-
MemAccessType::Write =>
199-
match other {
200-
MemAccessType::Write => (),
201-
_ => *self = MemAccessType::ReadWrite,
202-
},
203-
MemAccessType::ReadWrite => (),
204-
}
205-
}
206-
207-
pub fn did_read(&self) -> bool {
208-
match self {
209-
MemAccessType::Write => false,
210-
_ => true,
211-
}
212-
}
213-
214-
pub fn did_write(&self) -> bool {
215-
match self {
216-
MemAccessType::Read => false,
217-
_ => true,
218-
}
219-
}
195+
pub reads: Vec<Range<u64>>,
196+
pub writes: Vec<Range<u64>>,
197+
pub mappings: Vec<Range<u64>>,
220198
}
221199

222200
struct ChildListener {
@@ -274,8 +252,9 @@ impl Iterator for ChildListener {
274252
/// created before the fork are the same).
275253
fn sv_loop(listener: ChildListener, t_event: ipc::IpcSender<MemEvents>) -> ! {
276254
// Things that we return to the child process
277-
let mut accesses: Vec<(u64, u64, MemAccessType)> = vec![];
278-
let mut mappings: Vec<(u64, u64)> = vec![];
255+
let mut reads: Vec<Range<u64>> = vec![];
256+
let mut writes: Vec<Range<u64>> = vec![];
257+
let mut mappings: Vec<Range<u64>> = vec![];
279258

280259
// Memory allocated on the MiriMachine
281260
let mut ch_pages = vec![];
@@ -310,7 +289,9 @@ fn sv_loop(listener: ChildListener, t_event: ipc::IpcSender<MemEvents>) -> ! {
310289
wait::WaitStatus::Stopped(pid, signal) => {
311290
match signal {
312291
signal::SIGSEGV => {
313-
if let Err(ret) = handle_segfault(pid, &ch_pages, &mut accesses) {
292+
if let Err(ret) =
293+
handle_segfault(pid, &ch_pages, &mut reads, &mut writes)
294+
{
314295
retcode = ret;
315296
break 'listen;
316297
}
@@ -354,7 +335,10 @@ fn sv_loop(listener: ChildListener, t_event: ipc::IpcSender<MemEvents>) -> ! {
354335
#[expect(clippy::as_conversions)]
355336
if regs.retval() as isize > 0 {
356337
let addr = regs.retval();
357-
mappings.push((addr.to_u64(), len.to_u64()));
338+
mappings.push(
339+
addr.to_u64()
340+
..addr.to_u64().strict_add(len.to_u64()),
341+
);
358342
}
359343
}
360344
Err(ret) => {
@@ -412,8 +396,9 @@ fn sv_loop(listener: ChildListener, t_event: ipc::IpcSender<MemEvents>) -> ! {
412396

413397
TraceRequest::EndFfi => {
414398
signal::kill(main_pid, signal::SIGSTOP).unwrap();
415-
t_event.send(MemEvents { accesses, mappings }).unwrap();
416-
accesses = vec![];
399+
t_event.send(MemEvents { reads, writes, mappings }).unwrap();
400+
reads = vec![];
401+
writes = vec![];
417402
mappings = vec![];
418403
if let Err(ret) = wait_for_signal(main_pid, signal::SIGSTOP, false) {
419404
retcode = ret;
@@ -509,19 +494,18 @@ fn wait_for_syscall(pid: unistd::Pid, syscall: i64) -> Result<libc::user_regs_st
509494
fn handle_munmap(
510495
pid: unistd::Pid,
511496
regs: libc::user_regs_struct,
512-
mappings: &mut Vec<(u64, u64)>,
497+
mappings: &mut Vec<Range<u64>>,
513498
) -> Result<(), i32> {
514499
// The unmap call might hit multiple mappings we've saved,
515500
// or overlap with them partially (or both)
516501
let um_start = regs.arg1().to_u64();
517502
let um_len = regs.arg2().to_u64();
518503
let um_end = um_start.strict_add(um_len);
519504
let mut idxes = vec![];
520-
for (idx, &(mp_start, len)) in mappings.iter().enumerate() {
521-
let mp_end = mp_start.strict_add(len);
522-
let cond = (mp_start..mp_end).contains(&um_start)
523-
|| (mp_start..mp_end).contains(&um_end)
524-
|| (um_start..um_end).contains(&mp_start);
505+
for (idx, mp) in mappings.iter().enumerate() {
506+
let cond = mp.contains(&um_start)
507+
|| mp.contains(&um_end)
508+
|| (um_start..um_end).contains(&mp.start);
525509

526510
if cond {
527511
idxes.push(idx);
@@ -542,16 +526,15 @@ fn handle_munmap(
542526
// but it may be only partial so we may readd some sections
543527
for idx in idxes {
544528
let um_end = um_start.strict_add(um_len);
545-
let (mp_start, mp_len) = mappings.remove(idx);
546-
let mp_end = mp_len.strict_add(mp_len);
529+
let mp = mappings.remove(idx);
547530

548-
if mp_start < um_start {
549-
let preserved_len_head = um_start.strict_sub(mp_start);
550-
mappings.push((mp_start, preserved_len_head));
531+
if mp.start < um_start {
532+
let preserved_len_head = um_start.strict_sub(mp.start);
533+
mappings.push(mp.start..mp.start.strict_add(preserved_len_head));
551534
}
552-
if mp_end > um_end {
553-
let preserved_len_tail = mp_end.strict_sub(um_end);
554-
mappings.push((um_end, preserved_len_tail));
535+
if mp.end > um_end {
536+
let preserved_len_tail = mp.end.strict_sub(um_end);
537+
mappings.push(um_end..um_end.strict_add(preserved_len_tail));
555538
}
556539
}
557540
}
@@ -562,7 +545,8 @@ fn handle_munmap(
562545
fn handle_segfault(
563546
pid: unistd::Pid,
564547
ch_pages: &[u64],
565-
accesses: &mut Vec<(u64, u64, MemAccessType)>,
548+
reads: &mut Vec<Range<u64>>,
549+
writes: &mut Vec<Range<u64>>,
566550
) -> Result<(), i32> {
567551
let siginfo = ptrace::getsiginfo(pid).unwrap();
568552
let addr = unsafe { siginfo.si_addr().addr().to_u64() };
@@ -617,28 +601,44 @@ fn handle_segfault(
617601
let instr = decoder.decode();
618602
let memsize = instr.op_code().memory_size().size().to_u64();
619603
let mem = fac.info(&instr).used_memory();
620-
let acc = mem.iter().fold(None, |mut curr: Option<MemAccessType>, m| {
621-
if let Some(m) = match m.access() {
622-
iced_x86::OpAccess::Read => Some(MemAccessType::Read),
623-
iced_x86::OpAccess::CondRead => Some(MemAccessType::Read),
624-
iced_x86::OpAccess::Write => Some(MemAccessType::Write),
625-
iced_x86::OpAccess::CondWrite => Some(MemAccessType::Write),
626-
iced_x86::OpAccess::ReadWrite => Some(MemAccessType::ReadWrite),
627-
iced_x86::OpAccess::ReadCondWrite => Some(MemAccessType::ReadWrite),
628-
_ => None,
629-
} {
630-
if let Some(curr) = curr.as_mut() {
631-
curr.update(m);
604+
605+
for acc in mem {
606+
let mut r = false;
607+
let mut w = false;
608+
match acc.access() {
609+
iced_x86::OpAccess::Read | iced_x86::OpAccess::CondRead => {
610+
r = true;
611+
}
612+
iced_x86::OpAccess::Write | iced_x86::OpAccess::CondWrite => {
613+
w = true;
614+
}
615+
iced_x86::OpAccess::ReadWrite | iced_x86::OpAccess::ReadCondWrite => {
616+
r = true;
617+
w = true;
618+
}
619+
_ => (),
620+
}
621+
let addr_end = addr.strict_add(memsize);
622+
if r {
623+
if let Some(idx) = reads.iter().position(|r| r.start <= addr_end && addr <= r.end) {
624+
let mut rg = reads[idx].clone();
625+
rg.start = std::cmp::min(rg.start, addr);
626+
rg.end = std::cmp::max(rg.end, addr_end);
627+
reads[idx] = rg;
632628
} else {
633-
curr = Some(m);
629+
reads.push(addr..addr_end);
634630
}
635631
}
636-
curr
637-
});
638-
if let Some(acc) = acc {
639-
match accesses.iter().position(|&(a, len, _)| a == addr && len == memsize) {
640-
Some(pos) => accesses[pos].2.update(acc),
641-
None => accesses.push((addr, memsize, acc)),
632+
if w {
633+
if let Some(idx) = writes.iter().position(|r| r.start <= addr_end && addr <= r.end)
634+
{
635+
let mut rg = writes[idx].clone();
636+
rg.start = std::cmp::min(rg.start, addr);
637+
rg.end = std::cmp::max(rg.end, addr_end);
638+
writes[idx] = rg;
639+
} else {
640+
writes.push(addr..addr_end);
641+
}
642642
}
643643
}
644644
#[expect(clippy::as_conversions)]
@@ -660,7 +660,7 @@ fn handle_segfault(
660660

661661
fn handle_sigtrap(
662662
pid: unistd::Pid,
663-
mappings: &mut Vec<(u64, u64)>,
663+
mappings: &mut Vec<Range<u64>>,
664664
malloc_bytes: i64,
665665
realloc_bytes: i64,
666666
free_bytes: i64,
@@ -677,31 +677,24 @@ fn handle_sigtrap(
677677
match regs.ip().strict_sub(1) {
678678
a if a == malloc_addr => {
679679
let size = regs.arg1().to_u64(); // !
680-
let ptr = intercept_retptr(pid, regs, malloc_addr, malloc_bytes)?;
681-
if ptr > 0 {
682-
mappings.push((ptr as u64, size));
680+
if let Ok(ptr) = intercept_retptr(pid, regs, malloc_addr, malloc_bytes)?.try_into() {
681+
mappings.push(ptr..ptr.strict_add(size));
683682
}
684683
}
685684
a if a == realloc_addr => {
686685
let old_ptr = regs.arg1().to_u64();
687686
let size = regs.arg2().to_u64();
688-
let pos = mappings
689-
.iter()
690-
.position(|&(ptr, size)| ptr <= old_ptr && old_ptr < ptr.strict_add(size));
687+
let pos = mappings.iter().position(|rg| rg.start <= old_ptr && old_ptr < rg.end);
691688
if let Some(pos) = pos {
692689
let _ = mappings.remove(pos);
693690
}
694-
let ptr = intercept_retptr(pid, regs, realloc_addr, realloc_bytes)?;
695-
if ptr > 0 {
696-
mappings.push((ptr as u64, size));
691+
if let Ok(ptr) = intercept_retptr(pid, regs, realloc_addr, realloc_bytes)?.try_into() {
692+
mappings.push(ptr..ptr.strict_add(size));
697693
}
698694
}
699695
a if a == free_addr => {
700696
let old_ptr = regs.arg1().to_u64();
701-
//let size = regs.rdi;
702-
let pos = mappings
703-
.iter()
704-
.position(|&(ptr, size)| ptr <= old_ptr && old_ptr < ptr.strict_add(size));
697+
let pos = mappings.iter().position(|rg| rg.start <= old_ptr && old_ptr < rg.end);
705698
if let Some(pos) = pos {
706699
let _ = mappings.remove(pos);
707700
}

0 commit comments

Comments
 (0)