Skip to content

Commit 0c9e916

Browse files
authored
Rollup merge of #129828 - RalfJung:miri-data-race, r=saethlin
miri: treat non-memory local variables properly for data race detection Fixes #3242 Miri has an optimization where some local variables are not represented in memory until something forces them to be stored in memory (most notably, creating a pointer/reference to the local will do that). However, for a subsystem triggering on memory accesses -- such as the data race detector -- this means that the memory access seems to happen only when the local is moved to memory, instead of at the time that it actually happens. This can lead to UB reports in programs that do not actually have UB. This PR fixes that by adding machine hooks for reads and writes to such efficiently represented local variables. The data race system tracks those very similar to how it would track reads and writes to addressable memory, and when a local is moved to memory, the clocks get overwritten with the information stored for the local.
2 parents 9cebc1d + 65ee714 commit 0c9e916

11 files changed

+483
-112
lines changed

src/concurrency/data_race.rs

Lines changed: 199 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ use std::{
4747
};
4848

4949
use rustc_ast::Mutability;
50+
use rustc_data_structures::fx::FxHashMap;
5051
use rustc_data_structures::fx::FxHashSet;
5152
use rustc_index::{Idx, IndexVec};
5253
use rustc_middle::{mir, ty::Ty};
@@ -1047,32 +1048,31 @@ impl VClockAlloc {
10471048
) -> InterpResult<'tcx> {
10481049
let current_span = machine.current_span();
10491050
let global = machine.data_race.as_ref().unwrap();
1050-
if global.race_detecting() {
1051-
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
1052-
let mut alloc_ranges = self.alloc_ranges.borrow_mut();
1053-
for (mem_clocks_range, mem_clocks) in
1054-
alloc_ranges.iter_mut(access_range.start, access_range.size)
1051+
if !global.race_detecting() {
1052+
return Ok(());
1053+
}
1054+
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
1055+
let mut alloc_ranges = self.alloc_ranges.borrow_mut();
1056+
for (mem_clocks_range, mem_clocks) in
1057+
alloc_ranges.iter_mut(access_range.start, access_range.size)
1058+
{
1059+
if let Err(DataRace) =
1060+
mem_clocks.read_race_detect(&mut thread_clocks, index, read_type, current_span)
10551061
{
1056-
if let Err(DataRace) =
1057-
mem_clocks.read_race_detect(&mut thread_clocks, index, read_type, current_span)
1058-
{
1059-
drop(thread_clocks);
1060-
// Report data-race.
1061-
return Self::report_data_race(
1062-
global,
1063-
&machine.threads,
1064-
mem_clocks,
1065-
AccessType::NaRead(read_type),
1066-
access_range.size,
1067-
interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
1068-
ty,
1069-
);
1070-
}
1062+
drop(thread_clocks);
1063+
// Report data-race.
1064+
return Self::report_data_race(
1065+
global,
1066+
&machine.threads,
1067+
mem_clocks,
1068+
AccessType::NaRead(read_type),
1069+
access_range.size,
1070+
interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
1071+
ty,
1072+
);
10711073
}
1072-
Ok(())
1073-
} else {
1074-
Ok(())
10751074
}
1075+
Ok(())
10761076
}
10771077

10781078
/// Detect data-races for an unsynchronized write operation. It will not perform
@@ -1090,33 +1090,129 @@ impl VClockAlloc {
10901090
) -> InterpResult<'tcx> {
10911091
let current_span = machine.current_span();
10921092
let global = machine.data_race.as_mut().unwrap();
1093-
if global.race_detecting() {
1094-
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
1095-
for (mem_clocks_range, mem_clocks) in
1096-
self.alloc_ranges.get_mut().iter_mut(access_range.start, access_range.size)
1093+
if !global.race_detecting() {
1094+
return Ok(());
1095+
}
1096+
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
1097+
for (mem_clocks_range, mem_clocks) in
1098+
self.alloc_ranges.get_mut().iter_mut(access_range.start, access_range.size)
1099+
{
1100+
if let Err(DataRace) =
1101+
mem_clocks.write_race_detect(&mut thread_clocks, index, write_type, current_span)
10971102
{
1098-
if let Err(DataRace) = mem_clocks.write_race_detect(
1099-
&mut thread_clocks,
1100-
index,
1101-
write_type,
1102-
current_span,
1103-
) {
1104-
drop(thread_clocks);
1105-
// Report data-race
1106-
return Self::report_data_race(
1107-
global,
1108-
&machine.threads,
1109-
mem_clocks,
1110-
AccessType::NaWrite(write_type),
1111-
access_range.size,
1112-
interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
1113-
ty,
1114-
);
1115-
}
1103+
drop(thread_clocks);
1104+
// Report data-race
1105+
return Self::report_data_race(
1106+
global,
1107+
&machine.threads,
1108+
mem_clocks,
1109+
AccessType::NaWrite(write_type),
1110+
access_range.size,
1111+
interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
1112+
ty,
1113+
);
11161114
}
1117-
Ok(())
1115+
}
1116+
Ok(())
1117+
}
1118+
}
1119+
1120+
/// Vector clock state for a stack frame (tracking the local variables
1121+
/// that do not have an allocation yet).
1122+
#[derive(Debug, Default)]
1123+
pub struct FrameState {
1124+
local_clocks: RefCell<FxHashMap<mir::Local, LocalClocks>>,
1125+
}
1126+
1127+
/// Stripped-down version of [`MemoryCellClocks`] for the clocks we need to keep track
1128+
/// of in a local that does not yet have addressable memory -- and hence can only
1129+
/// be accessed from the thread its stack frame belongs to, and cannot be access atomically.
1130+
#[derive(Debug)]
1131+
struct LocalClocks {
1132+
write: VTimestamp,
1133+
write_type: NaWriteType,
1134+
read: VTimestamp,
1135+
}
1136+
1137+
impl Default for LocalClocks {
1138+
fn default() -> Self {
1139+
Self { write: VTimestamp::ZERO, write_type: NaWriteType::Allocate, read: VTimestamp::ZERO }
1140+
}
1141+
}
1142+
1143+
impl FrameState {
1144+
pub fn local_write(&self, local: mir::Local, storage_live: bool, machine: &MiriMachine<'_>) {
1145+
let current_span = machine.current_span();
1146+
let global = machine.data_race.as_ref().unwrap();
1147+
if !global.race_detecting() {
1148+
return;
1149+
}
1150+
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
1151+
// This should do the same things as `MemoryCellClocks::write_race_detect`.
1152+
if !current_span.is_dummy() {
1153+
thread_clocks.clock.index_mut(index).span = current_span;
1154+
}
1155+
let mut clocks = self.local_clocks.borrow_mut();
1156+
if storage_live {
1157+
let new_clocks = LocalClocks {
1158+
write: thread_clocks.clock[index],
1159+
write_type: NaWriteType::Allocate,
1160+
read: VTimestamp::ZERO,
1161+
};
1162+
// There might already be an entry in the map for this, if the local was previously
1163+
// live already.
1164+
clocks.insert(local, new_clocks);
11181165
} else {
1119-
Ok(())
1166+
// This can fail to exist if `race_detecting` was false when the allocation
1167+
// occurred, in which case we can backdate this to the beginning of time.
1168+
let clocks = clocks.entry(local).or_insert_with(Default::default);
1169+
clocks.write = thread_clocks.clock[index];
1170+
clocks.write_type = NaWriteType::Write;
1171+
}
1172+
}
1173+
1174+
pub fn local_read(&self, local: mir::Local, machine: &MiriMachine<'_>) {
1175+
let current_span = machine.current_span();
1176+
let global = machine.data_race.as_ref().unwrap();
1177+
if !global.race_detecting() {
1178+
return;
1179+
}
1180+
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
1181+
// This should do the same things as `MemoryCellClocks::read_race_detect`.
1182+
if !current_span.is_dummy() {
1183+
thread_clocks.clock.index_mut(index).span = current_span;
1184+
}
1185+
thread_clocks.clock.index_mut(index).set_read_type(NaReadType::Read);
1186+
// This can fail to exist if `race_detecting` was false when the allocation
1187+
// occurred, in which case we can backdate this to the beginning of time.
1188+
let mut clocks = self.local_clocks.borrow_mut();
1189+
let clocks = clocks.entry(local).or_insert_with(Default::default);
1190+
clocks.read = thread_clocks.clock[index];
1191+
}
1192+
1193+
pub fn local_moved_to_memory(
1194+
&self,
1195+
local: mir::Local,
1196+
alloc: &mut VClockAlloc,
1197+
machine: &MiriMachine<'_>,
1198+
) {
1199+
let global = machine.data_race.as_ref().unwrap();
1200+
if !global.race_detecting() {
1201+
return;
1202+
}
1203+
let (index, _thread_clocks) = global.active_thread_state_mut(&machine.threads);
1204+
// Get the time the last write actually happened. This can fail to exist if
1205+
// `race_detecting` was false when the write occurred, in that case we can backdate this
1206+
// to the beginning of time.
1207+
let local_clocks = self.local_clocks.borrow_mut().remove(&local).unwrap_or_default();
1208+
for (_mem_clocks_range, mem_clocks) in alloc.alloc_ranges.get_mut().iter_mut_all() {
1209+
// The initialization write for this already happened, just at the wrong timestamp.
1210+
// Check that the thread index matches what we expect.
1211+
assert_eq!(mem_clocks.write.0, index);
1212+
// Convert the local's clocks into memory clocks.
1213+
mem_clocks.write = (index, local_clocks.write);
1214+
mem_clocks.write_type = local_clocks.write_type;
1215+
mem_clocks.read = VClock::new_with_index(index, local_clocks.read);
11201216
}
11211217
}
11221218
}
@@ -1305,69 +1401,67 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
13051401
) -> InterpResult<'tcx> {
13061402
let this = self.eval_context_ref();
13071403
assert!(access.is_atomic());
1308-
if let Some(data_race) = &this.machine.data_race {
1309-
if data_race.race_detecting() {
1310-
let size = place.layout.size;
1311-
let (alloc_id, base_offset, _prov) = this.ptr_get_alloc_id(place.ptr(), 0)?;
1312-
// Load and log the atomic operation.
1313-
// Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
1314-
let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
1315-
trace!(
1316-
"Atomic op({}) with ordering {:?} on {:?} (size={})",
1317-
access.description(None, None),
1318-
&atomic,
1319-
place.ptr(),
1320-
size.bytes()
1321-
);
1404+
let Some(data_race) = &this.machine.data_race else { return Ok(()) };
1405+
if !data_race.race_detecting() {
1406+
return Ok(());
1407+
}
1408+
let size = place.layout.size;
1409+
let (alloc_id, base_offset, _prov) = this.ptr_get_alloc_id(place.ptr(), 0)?;
1410+
// Load and log the atomic operation.
1411+
// Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
1412+
let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
1413+
trace!(
1414+
"Atomic op({}) with ordering {:?} on {:?} (size={})",
1415+
access.description(None, None),
1416+
&atomic,
1417+
place.ptr(),
1418+
size.bytes()
1419+
);
13221420

1323-
let current_span = this.machine.current_span();
1324-
// Perform the atomic operation.
1325-
data_race.maybe_perform_sync_operation(
1326-
&this.machine.threads,
1327-
current_span,
1328-
|index, mut thread_clocks| {
1329-
for (mem_clocks_range, mem_clocks) in
1330-
alloc_meta.alloc_ranges.borrow_mut().iter_mut(base_offset, size)
1331-
{
1332-
if let Err(DataRace) = op(mem_clocks, &mut thread_clocks, index, atomic)
1333-
{
1334-
mem::drop(thread_clocks);
1335-
return VClockAlloc::report_data_race(
1336-
data_race,
1337-
&this.machine.threads,
1338-
mem_clocks,
1339-
access,
1340-
place.layout.size,
1341-
interpret::Pointer::new(
1342-
alloc_id,
1343-
Size::from_bytes(mem_clocks_range.start),
1344-
),
1345-
None,
1346-
)
1347-
.map(|_| true);
1348-
}
1349-
}
1350-
1351-
// This conservatively assumes all operations have release semantics
1352-
Ok(true)
1353-
},
1354-
)?;
1355-
1356-
// Log changes to atomic memory.
1357-
if tracing::enabled!(tracing::Level::TRACE) {
1358-
for (_offset, mem_clocks) in
1359-
alloc_meta.alloc_ranges.borrow().iter(base_offset, size)
1360-
{
1361-
trace!(
1362-
"Updated atomic memory({:?}, size={}) to {:#?}",
1363-
place.ptr(),
1364-
size.bytes(),
1365-
mem_clocks.atomic_ops
1366-
);
1421+
let current_span = this.machine.current_span();
1422+
// Perform the atomic operation.
1423+
data_race.maybe_perform_sync_operation(
1424+
&this.machine.threads,
1425+
current_span,
1426+
|index, mut thread_clocks| {
1427+
for (mem_clocks_range, mem_clocks) in
1428+
alloc_meta.alloc_ranges.borrow_mut().iter_mut(base_offset, size)
1429+
{
1430+
if let Err(DataRace) = op(mem_clocks, &mut thread_clocks, index, atomic) {
1431+
mem::drop(thread_clocks);
1432+
return VClockAlloc::report_data_race(
1433+
data_race,
1434+
&this.machine.threads,
1435+
mem_clocks,
1436+
access,
1437+
place.layout.size,
1438+
interpret::Pointer::new(
1439+
alloc_id,
1440+
Size::from_bytes(mem_clocks_range.start),
1441+
),
1442+
None,
1443+
)
1444+
.map(|_| true);
13671445
}
13681446
}
1447+
1448+
// This conservatively assumes all operations have release semantics
1449+
Ok(true)
1450+
},
1451+
)?;
1452+
1453+
// Log changes to atomic memory.
1454+
if tracing::enabled!(tracing::Level::TRACE) {
1455+
for (_offset, mem_clocks) in alloc_meta.alloc_ranges.borrow().iter(base_offset, size) {
1456+
trace!(
1457+
"Updated atomic memory({:?}, size={}) to {:#?}",
1458+
place.ptr(),
1459+
size.bytes(),
1460+
mem_clocks.atomic_ops
1461+
);
13691462
}
13701463
}
1464+
13711465
Ok(())
13721466
}
13731467
}

src/concurrency/thread.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,9 @@ impl<'tcx> ThreadManager<'tcx> {
530530
}
531531

532532
/// Mutably borrow the stack of the active thread.
533-
fn active_thread_stack_mut(&mut self) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
533+
pub fn active_thread_stack_mut(
534+
&mut self,
535+
) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
534536
&mut self.threads[self.active_thread].stack
535537
}
536538
pub fn all_stacks(

src/concurrency/vector_clock.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,19 @@ impl Ord for VTimestamp {
130130
/// also this means that there is only one unique valid length
131131
/// for each set of vector clock values and hence the PartialEq
132132
/// and Eq derivations are correct.
133+
///
134+
/// This means we cannot represent a clock where the last entry is a timestamp-0 read that occurs
135+
/// because of a retag. That's fine, all it does is risk wrong diagnostics in a extreme corner case.
133136
#[derive(PartialEq, Eq, Default, Debug)]
134137
pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);
135138

136139
impl VClock {
137140
/// Create a new vector-clock containing all zeros except
138141
/// for a value at the given index
139142
pub(super) fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
143+
if timestamp.time() == 0 {
144+
return VClock::default();
145+
}
140146
let len = index.index() + 1;
141147
let mut vec = smallvec::smallvec![VTimestamp::ZERO; len];
142148
vec[index.index()] = timestamp;

0 commit comments

Comments
 (0)