Skip to content

Commit f97ebcf

Browse files
committed
fixup capstone mode
1 parent 8c0ee7e commit f97ebcf

File tree

3 files changed

+26
-164
lines changed

3 files changed

+26
-164
lines changed

src/alloc_addresses/mod.rs

Lines changed: 10 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -485,108 +485,26 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
485485
/// lost!
486486
#[cfg(target_os = "linux")]
487487
fn apply_events(&mut self, events: crate::shims::trace::MemEvents) -> InterpResult<'tcx> {
488-
use rustc_index::bit_set::DenseBitSet;
489-
490488
let this = self.eval_context_mut();
491489

492-
// In order to not overexpose provenances, we only want to mark as read
493-
// bytes that were read before being written. That's inefficient to track
494-
// live as it's happening, but cheaper (relatively) to do here. The alternative
495-
// is that we could feed our AccessEvent enum into rustc itself, but that
496-
// would come at a larger perf cost in most cases and make the API it
497-
// exposes less generic (as right now we can feed it arbitrary reads and
498-
// writes and just trust that it logs them down). Therefore, we have this.
499-
500-
// Each bitset holds alloc_cutoff bits
501-
let alloc_cutoff = events.alloc_cutoff;
502-
// FIXME: these could be u16s
503-
//eprintln!("bare accesses: {:#0x?}", events.acc_events);
504-
let mut reads_bitset: Vec<(DenseBitSet<u32>, usize)> = vec![];
505-
let mut writes_bitset: Vec<(DenseBitSet<u32>, usize)> = vec![];
490+
let mut reads = vec![];
491+
let mut writes = vec![];
506492
for acc in events.acc_events {
507493
match acc {
508-
// Reads have more logic to them since we don't want to count
509-
// them at all if a write already occurred but obviously we do
510-
// if the write has yet to happen
494+
// Ideally, we'd skip reads that occur after certain bytes were
495+
// already written to. However, these are always just conservative
496+
// overestimates - Read(range) means "a read maybe happened
497+
// spanning at most range" - so we can't make use of this for
498+
// now. Maybe we could also skip over reads/writes that hit the
499+
// same bytes, but that's best added together with the stuff above.
511500
shims::trace::AccessEvent::Read(range) => {
512-
// The tracer ensures access ranges don't go over this alignment
513-
let pg = range.start - range.start % alloc_cutoff;
514-
for byte in range {
515-
#[expect(clippy::as_conversions)]
516-
let ofs = (byte - pg) as u32;
517-
// Checks if any of the write-tracking bitsets match the page address
518-
if !writes_bitset.iter().fold(false, |found, (set, p)| {
519-
if found {
520-
true
521-
} else {
522-
// And if yes, whether they have this particular byte
523-
if *p == pg { set.contains(ofs) } else { false }
524-
}
525-
}) {
526-
// If this byte hasn't been written to yet, mark a read
527-
let pos = reads_bitset.iter().position(|(_, p)| *p == pg).unwrap_or({
528-
reads_bitset.push((DenseBitSet::new_empty(alloc_cutoff), pg));
529-
reads_bitset.len() - 1
530-
});
531-
reads_bitset[pos].0.insert(ofs);
532-
}
533-
}
501+
reads.push(range)
534502
}
535-
// Writes don't have much going on, but we need to track them
536-
// at the same time as reads for this to be useful. We just
537-
// insert ranges into the appropriate bitset
538503
shims::trace::AccessEvent::Write(range) => {
539-
let pg = range.start - range.start % alloc_cutoff;
540-
#[expect(clippy::as_conversions)]
541-
let rg_norm = ((range.start - pg) as u32)..((range.end - pg) as u32);
542-
let pos = writes_bitset.iter().position(|(_, p)| *p == pg).unwrap_or({
543-
writes_bitset.push((DenseBitSet::new_empty(alloc_cutoff), pg));
544-
writes_bitset.len() - 1
545-
});
546-
writes_bitset[pos].0.insert_range(rg_norm);
504+
writes.push(range);
547505
}
548506
}
549507
}
550-
// The rustc side expects a `Vec<Range<usize>>`, not our monstrosity, so
551-
// this turns our bitset vector into one of ranges
552-
let decompress: fn(Vec<(DenseBitSet<u32>, usize)>, &mut Vec<std::ops::Range<usize>>) =
553-
|sets, into| {
554-
sets.into_iter().for_each(|(set, p)| {
555-
// Iterates over the indices of 1s and so long as they
556-
// are contiguous, opt_so_far keeps growing
557-
let mut opt_so_far: Option<std::ops::Range<u32>> = None;
558-
#[expect(clippy::as_conversions)]
559-
for bit in set.iter() {
560-
match opt_so_far {
561-
Some(mut so_far) =>
562-
if so_far.end == bit {
563-
so_far.end = bit + 1;
564-
opt_so_far = Some(so_far);
565-
} else {
566-
// When there's a jump, push what we have so far and start anew
567-
into.push(
568-
(so_far.start as usize + p)..(so_far.end as usize + p),
569-
);
570-
opt_so_far = Some(bit..bit + 1);
571-
},
572-
// 1st time we obviously need to insert it
573-
None => opt_so_far = Some(bit..bit + 1),
574-
}
575-
}
576-
// When this set is out of bits, push what's been counted
577-
// (or this set was empty)
578-
#[expect(clippy::as_conversions)]
579-
if let Some(so_far) = opt_so_far {
580-
into.push((so_far.start as usize + p)..(so_far.end as usize + p));
581-
}
582-
});
583-
};
584-
let mut reads = vec![];
585-
let mut writes = vec![];
586-
decompress(reads_bitset, &mut reads);
587-
decompress(writes_bitset, &mut writes);
588-
//eprintln!("reads: {reads:#0x?}");
589-
//eprintln!("writes: {writes:#0x?}");
590508
let _exposed: Vec<AllocId> =
591509
this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
592510
interp_ok(())

src/shims/trace/mod.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,4 @@ pub enum AccessEvent {
4141
pub struct MemEvents {
4242
/// An ordered list of memory accesses that occurred.
4343
pub acc_events: Vec<AccessEvent>,
44-
/// A value modulo which `AccessEvent` ranges stay the same length. Makes
45-
/// parsing the events a lot easier. Should likely just be the page size.
46-
pub alloc_cutoff: usize,
4744
}

src/shims/trace/parent.rs

Lines changed: 16 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ pub fn sv_loop(
289289
// end_ffi was called by the child
290290
ExecEvent::End => {
291291
// Hand over the access info we traced
292-
event_tx.send(MemEvents { acc_events, alloc_cutoff: page_size }).unwrap();
292+
event_tx.send(MemEvents { acc_events }).unwrap();
293293
// And reset our values
294294
acc_events = Vec::new();
295295
ch_stack = None;
@@ -354,9 +354,9 @@ fn get_disasm() -> capstone::Capstone {
354354
#[cfg(target_arch = "x86")]
355355
{cs_pre.x86().mode(arch::x86::ArchMode::Mode32)}
356356
#[cfg(target_arch = "aarch64")]
357-
{cs_pre.arm64()}
357+
{cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)}
358358
#[cfg(target_arch = "arm")]
359-
{cs_pre.arm()}
359+
{cs_pre.arm().mode(arch::arm::ArchMode::Arm)}
360360
#[cfg(target_arch = "riscv64")]
361361
{cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV64)}
362362
#[cfg(target_arch = "riscv32")]
@@ -421,7 +421,6 @@ fn handle_segfault(
421421
fn capstone_disassemble(
422422
instr: &[u8],
423423
addr: usize,
424-
page_size: usize,
425424
cs: &capstone::Capstone,
426425
acc_events: &mut Vec<AccessEvent>,
427426
) -> capstone::CsResult<()> {
@@ -434,53 +433,21 @@ fn handle_segfault(
434433
let ins_detail = cs.insn_detail(&insns[0])?;
435434
let arch_detail = ins_detail.arch_detail();
436435

437-
// Take an (addr, size, cutoff_size) and split an access into multiple if needed
438-
let get_ranges: fn(usize, usize, usize) -> Vec<std::ops::Range<usize>> =
439-
|addr, size, cutoff_size: usize| {
440-
let addr_added = addr.strict_add(size);
441-
let mut counter = 0usize;
442-
let mut ret = vec![];
443-
loop {
444-
let curr = addr.strict_add(counter.strict_mul(cutoff_size));
445-
let next = curr.strict_add(cutoff_size);
446-
if next >= addr_added {
447-
ret.push(curr..addr_added);
448-
break;
449-
} else {
450-
ret.push(curr..curr.strict_add(cutoff_size));
451-
counter = counter.strict_add(1);
452-
}
453-
}
454-
ret
455-
};
456-
457436
for op in arch_detail.operands() {
458437
match op {
459438
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
460439
arch::ArchOperand::X86Operand(x86_operand) => {
461440
match x86_operand.op_type {
462441
// We only care about memory accesses
463442
arch::x86::X86OperandType::Mem(_) => {
464-
let append = get_ranges(addr, x86_operand.size.into(), page_size);
443+
let push = addr..addr.strict_add(usize::from(x86_operand.size));
465444
// It's called a "RegAccessType" but it also applies to memory
466445
let acc_ty = x86_operand.access.unwrap();
467446
if acc_ty.is_readable() {
468-
acc_events.append(
469-
&mut append
470-
.clone()
471-
.into_iter()
472-
.map(AccessEvent::Read)
473-
.collect(),
474-
);
447+
acc_events.push(AccessEvent::Read(push.clone()));
475448
}
476449
if acc_ty.is_writable() {
477-
acc_events.append(
478-
&mut append
479-
.clone()
480-
.into_iter()
481-
.map(AccessEvent::Write)
482-
.collect(),
483-
);
450+
acc_events.push(AccessEvent::Write(push));
484451
}
485452
}
486453
_ => (),
@@ -515,16 +482,12 @@ fn handle_segfault(
515482
| arch::arm64::Arm64Vas::ARM64_VAS_2D
516483
| arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16,
517484
};
518-
let append = get_ranges(addr, size, page_size);
485+
let push = addr..addr.strict_add(size);
519486
// FIXME: This now has access type info in the latest
520487
// git version of capstone because this pissed me off
521488
// and I added it. Change this when it updates
522-
acc_events.append(
523-
&mut append.clone().into_iter().map(AccessEvent::Read).collect(),
524-
);
525-
acc_events.append(
526-
&mut append.clone().into_iter().map(AccessEvent::Write).collect(),
527-
);
489+
acc_events.push(AccessEvent::Read(push.clone()));
490+
acc_events.push(AccessEvent::Write(push));
528491
}
529492
_ => (),
530493
}
@@ -540,25 +503,13 @@ fn handle_segfault(
540503
} else {
541504
ARCH_WORD_SIZE
542505
};
543-
let append = get_ranges(addr, size, page_size);
506+
let push = addr..addr.strict_add(size);
544507
let acc_ty = arm_operand.access.unwrap();
545508
if acc_ty.is_readable() {
546-
acc_events.append(
547-
&mut append
548-
.clone()
549-
.into_iter()
550-
.map(AccessEvent::Read)
551-
.collect(),
552-
);
509+
acc_events.push(AccessEvent::Read(push.clone()));
553510
}
554511
if acc_ty.is_writable() {
555-
acc_events.append(
556-
&mut append
557-
.clone()
558-
.into_iter()
559-
.map(AccessEvent::Write)
560-
.collect(),
561-
);
512+
acc_events.push(AccessEvent::Write(push));
562513
}
563514
}
564515
_ => (),
@@ -568,13 +519,9 @@ fn handle_segfault(
568519
match risc_voperand {
569520
arch::riscv::RiscVOperand::Mem(_) => {
570521
// We get basically no info here
571-
let append = get_ranges(addr, ARCH_MAX_ACCESS_SIZE, page_size);
572-
acc_events.append(
573-
&mut append.clone().into_iter().map(AccessEvent::Read).collect(),
574-
);
575-
acc_events.append(
576-
&mut append.clone().into_iter().map(AccessEvent::Write).collect(),
577-
);
522+
let push = addr..addr.strict_add(size);
523+
acc_events.push(AccessEvent::Read(push.clone()));
524+
acc_events.push(AccessEvent::Write(push));
578525
}
579526
_ => (),
580527
}
@@ -666,7 +613,7 @@ fn handle_segfault(
666613
// Now figure out the size + type of access and log it down
667614
// This will mark down e.g. the same area being read multiple times,
668615
// since it's more efficient to compress the accesses at the end
669-
if capstone_disassemble(&instr, addr, page_size, cs, acc_events).is_err() {
616+
if capstone_disassemble(&instr, addr, cs, acc_events).is_err() {
670617
// Read goes first because we need to be pessimistic
671618
acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
672619
acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));

0 commit comments

Comments
 (0)