Skip to content

Commit a7827a2

Browse files
authored
firmware_uefi: EfiDiagnostics switch to polling instead of Arc<Mutex<>> (#1693)
This PR aims to take a different approach to flushing EfiDiagnostics on Watchdog timeout. - Removes Arc<Mutex<>>, which was originally used to share the diagnostics service between UefiDevice and its WatchdogPlatform - Creates a mesh::channel before creating the UefiDevice. The watchdog_callback that gets added to the UefiDevice has its `on_timeout` method modified to invoke the sender to send a notification. - The mesh::receiver end gets sent down the UefiDevice, and it is used in `poll_device()` to freely respond to watchdog events
1 parent 7cfbfba commit a7827a2

File tree

7 files changed

+101
-93
lines changed

7 files changed

+101
-93
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1739,7 +1739,6 @@ dependencies = [
17391739
"open_enum",
17401740
"openssl",
17411741
"pal_async",
1742-
"parking_lot",
17431742
"test_with_tracing",
17441743
"thiserror 2.0.12",
17451744
"time",

openhcl/underhill_core/src/worker.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,6 +2082,7 @@ async fn new_underhill_vm(
20822082
},
20832083
};
20842084

2085+
let (watchdog_send, watchdog_recv) = mesh::channel();
20852086
deps_hyperv_firmware_uefi = Some(dev::HyperVFirmwareUefi {
20862087
config,
20872088
logger: Box::new(UnderhillLogger {
@@ -2121,19 +2122,22 @@ async fn new_underhill_vm(
21212122
#[cfg(guest_arch = "x86_64")]
21222123
let watchdog_callback = WatchdogTimeoutNmi {
21232124
partition: partition.clone(),
2125+
watchdog_send: Some(watchdog_send),
21242126
};
21252127

21262128
// ARM64 does not have NMI support yet, so halt instead
21272129
#[cfg(guest_arch = "aarch64")]
21282130
let watchdog_callback = WatchdogTimeoutReset {
21292131
halt_vps: halt_vps.clone(),
2132+
watchdog_send: Some(watchdog_send),
21302133
};
21312134

21322135
// Add the callback
21332136
underhill_watchdog_platform.add_callback(Box::new(watchdog_callback));
21342137

21352138
Box::new(underhill_watchdog_platform)
21362139
},
2140+
watchdog_recv,
21372141
vsm_config: Some(Box::new(UnderhillVsmConfig {
21382142
partition: Arc::downgrade(&partition),
21392143
})),
@@ -2458,13 +2462,15 @@ async fn new_underhill_vm(
24582462
EphemeralNonVolatileStore::new_boxed()
24592463
};
24602464

2461-
let trigger_reset = WatchdogTimeoutReset {
2465+
let watchdog_callback = WatchdogTimeoutReset {
24622466
halt_vps: halt_vps.clone(),
2467+
watchdog_send: None, // This is not the UEFI watchdog, so no need to send
2468+
// watchdog notifications.
24632469
};
24642470

24652471
let mut underhill_watchdog_platform =
24662472
UnderhillWatchdogPlatform::new(store, get_client.clone()).await?;
2467-
underhill_watchdog_platform.add_callback(Box::new(trigger_reset));
2473+
underhill_watchdog_platform.add_callback(Box::new(watchdog_callback));
24682474

24692475
Box::new(underhill_watchdog_platform)
24702476
},
@@ -3461,6 +3467,7 @@ impl ChipsetDevice for FallbackMmioDevice {
34613467
#[cfg(guest_arch = "x86_64")]
34623468
struct WatchdogTimeoutNmi {
34633469
partition: Arc<UhPartition>,
3470+
watchdog_send: Option<mesh::Sender<()>>,
34643471
}
34653472

34663473
#[cfg(guest_arch = "x86_64")]
@@ -3474,18 +3481,27 @@ impl WatchdogCallback for WatchdogTimeoutNmi {
34743481
Vtl::Vtl0,
34753482
MsiRequest::new_x86(virt::irqcon::DeliveryMode::NMI, 0, false, 0, false),
34763483
);
3484+
3485+
if let Some(watchdog_send) = &self.watchdog_send {
3486+
watchdog_send.send(());
3487+
}
34773488
}
34783489
}
34793490

34803491
struct WatchdogTimeoutReset {
34813492
halt_vps: Arc<Halt>,
3493+
watchdog_send: Option<mesh::Sender<()>>,
34823494
}
34833495

34843496
#[async_trait::async_trait]
34853497
impl WatchdogCallback for WatchdogTimeoutReset {
34863498
async fn on_timeout(&mut self) {
34873499
crate::livedump::livedump().await;
34883500

3489-
self.halt_vps.halt(HaltReason::Reset)
3501+
self.halt_vps.halt(HaltReason::Reset);
3502+
3503+
if let Some(watchdog_send) = &self.watchdog_send {
3504+
watchdog_send.send(());
3505+
}
34903506
}
34913507
}

openvmm/hvlite_core/src/worker/dispatch.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,7 @@ impl InitializedVm {
10511051
let mut deps_hyperv_firmware_uefi = None;
10521052
match &cfg.load_mode {
10531053
LoadMode::Uefi { .. } => {
1054+
let (watchdog_send, watchdog_recv) = mesh::channel();
10541055
deps_hyperv_firmware_uefi = Some(dev::HyperVFirmwareUefi {
10551056
config: firmware_uefi::UefiConfig {
10561057
custom_uefi_vars: cfg.custom_uefi_vars,
@@ -1102,19 +1103,22 @@ impl InitializedVm {
11021103
#[cfg(guest_arch = "x86_64")]
11031104
let watchdog_callback = WatchdogTimeoutNmi {
11041105
partition: partition.clone(),
1106+
watchdog_send: Some(watchdog_send),
11051107
};
11061108

11071109
// ARM64 does not have NMI support yet, so halt instead
11081110
#[cfg(guest_arch = "aarch64")]
11091111
let watchdog_callback = WatchdogTimeoutReset {
11101112
halt_vps: halt_vps.clone(),
1113+
watchdog_send: Some(watchdog_send),
11111114
};
11121115

11131116
// Add callbacks
11141117
base_watchdog_platform.add_callback(Box::new(watchdog_callback));
11151118

11161119
Box::new(base_watchdog_platform)
11171120
},
1121+
watchdog_recv,
11181122
vsm_config: None,
11191123
// TODO: persist SystemTimeClock time across reboots.
11201124
time_source: Box::new(local_clock::SystemTimeClock::new(
@@ -1335,6 +1339,8 @@ impl InitializedVm {
13351339
// Create callback to reset on watchdog timeout
13361340
let watchdog_callback = WatchdogTimeoutReset {
13371341
halt_vps: halt_vps.clone(),
1342+
watchdog_send: None, // This is not the UEFI watchdog, so no need to send
1343+
// watchdog notifications
13381344
};
13391345

13401346
// Add callbacks
@@ -2996,6 +3002,7 @@ fn add_devices_to_dsdt(
29963002
#[cfg(guest_arch = "x86_64")]
29973003
struct WatchdogTimeoutNmi {
29983004
partition: Arc<dyn HvlitePartition>,
3005+
watchdog_send: Option<mesh::Sender<()>>,
29993006
}
30003007

30013008
#[cfg(guest_arch = "x86_64")]
@@ -3007,16 +3014,25 @@ impl WatchdogCallback for WatchdogTimeoutNmi {
30073014
Vtl::Vtl0,
30083015
virt::irqcon::MsiRequest::new_x86(virt::irqcon::DeliveryMode::NMI, 0, false, 0, false),
30093016
);
3017+
3018+
if let Some(watchdog_send) = &self.watchdog_send {
3019+
watchdog_send.send(());
3020+
}
30103021
}
30113022
}
30123023

30133024
struct WatchdogTimeoutReset {
30143025
halt_vps: Arc<Halt>,
3026+
watchdog_send: Option<mesh::Sender<()>>,
30153027
}
30163028

30173029
#[async_trait::async_trait]
30183030
impl WatchdogCallback for WatchdogTimeoutReset {
30193031
async fn on_timeout(&mut self) {
3020-
self.halt_vps.halt(HaltReason::Reset)
3032+
self.halt_vps.halt(HaltReason::Reset);
3033+
3034+
if let Some(watchdog_send) = &self.watchdog_send {
3035+
watchdog_send.send(());
3036+
}
30213037
}
30223038
}

vm/devices/firmware/firmware_uefi/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ bitfield-struct.workspace = true
4545
der = { workspace = true, features = ["derive", "alloc", "oid"], optional = true }
4646
getrandom.workspace = true
4747
openssl = { optional = true, workspace = true }
48-
parking_lot.workspace = true
4948
thiserror.workspace = true
5049
time = { workspace = true, features = ["local-offset"] }
5150
tracelimit.workspace = true

vm/devices/firmware/firmware_uefi/src/lib.rs

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,12 @@ use inspect::Inspect;
6868
use inspect::InspectMut;
6969
use local_clock::InspectableLocalClock;
7070
use pal_async::local::block_on;
71-
use parking_lot::Mutex;
7271
use platform::logger::UefiLogger;
7372
use platform::nvram::VsmConfig;
7473
use std::convert::TryInto;
7574
use std::ops::RangeInclusive;
76-
use std::sync::Arc;
7775
use std::task::Context;
76+
use std::task::Poll;
7877
use thiserror::Error;
7978
use uefi_nvram_storage::VmmNvramStorage;
8079
use vmcore::device_state::ChangeDeviceState;
@@ -106,7 +105,7 @@ struct UefiDeviceServices {
106105
generation_id: service::generation_id::GenerationIdServices,
107106
#[inspect(mut)]
108107
time: service::time::TimeServices,
109-
diagnostics: Arc<Mutex<service::diagnostics::DiagnosticsServices>>,
108+
diagnostics: service::diagnostics::DiagnosticsServices,
110109
}
111110

112111
// Begin and end range are inclusive.
@@ -135,6 +134,7 @@ pub struct UefiRuntimeDeps<'a> {
135134
pub logger: Box<dyn UefiLogger>,
136135
pub vmtime: &'a VmTimeSource,
137136
pub watchdog_platform: Box<dyn WatchdogPlatform>,
137+
pub watchdog_recv: mesh::Receiver<()>,
138138
pub generation_id_deps: generation_id::GenerationIdRuntimeDeps,
139139
pub vsm_config: Option<Box<dyn VsmConfig>>,
140140
pub time_source: Box<dyn InspectableLocalClock>,
@@ -158,6 +158,10 @@ pub struct UefiDevice {
158158
// Volatile state
159159
#[inspect(hex)]
160160
address: u32,
161+
162+
// Receiver for watchdog timeout events
163+
#[inspect(skip)]
164+
watchdog_recv: mesh::Receiver<()>,
161165
}
162166

163167
impl UefiDevice {
@@ -171,26 +175,20 @@ impl UefiDevice {
171175
nvram_storage,
172176
logger,
173177
vmtime,
174-
mut watchdog_platform,
178+
watchdog_platform,
179+
watchdog_recv,
175180
generation_id_deps,
176181
vsm_config,
177182
time_source,
178183
} = runtime_deps;
179184

180-
// Create diagnostics serparately since it will be shared.
181-
let diagnostics = Arc::new(Mutex::new(service::diagnostics::DiagnosticsServices::new()));
182-
183-
// Add a watchdog callback to process diagnostics on timeout.
184-
watchdog_platform.add_callback(Box::new(
185-
service::diagnostics::DiagnosticsWatchdogCallback::new(diagnostics.clone(), gm.clone()),
186-
));
187-
188185
// Create the UEFI device with the rest of the services.
189186
let uefi = UefiDevice {
190187
use_mmio: cfg.use_mmio,
191188
command_set: cfg.command_set,
192189
address: 0,
193190
gm,
191+
watchdog_recv,
194192
service: UefiDeviceServices {
195193
nvram: service::nvram::NvramServices::new(
196194
nvram_storage,
@@ -212,7 +210,7 @@ impl UefiDevice {
212210
generation_id_deps,
213211
),
214212
time: service::time::TimeServices::new(time_source),
215-
diagnostics,
213+
diagnostics: service::diagnostics::DiagnosticsServices::new(),
216214
},
217215
};
218216

@@ -269,15 +267,15 @@ impl UefiDevice {
269267
}
270268
UefiCommand::SET_EFI_DIAGNOSTICS_GPA => {
271269
tracelimit::info_ratelimited!(?addr, data, "set gpa for diagnostics");
272-
self.service.diagnostics.lock().set_gpa(data)
270+
self.service.diagnostics.set_gpa(data)
273271
}
274-
UefiCommand::PROCESS_EFI_DIAGNOSTICS => self.ratelimited_process_diagnostics(),
272+
UefiCommand::PROCESS_EFI_DIAGNOSTICS => self.process_diagnostics(false, "guest"),
275273
_ => tracelimit::warn_ratelimited!(addr, data, "unknown uefi write"),
276274
}
277275
}
278276

279277
fn inspect_extra(&mut self, _resp: &mut inspect::Response<'_>) {
280-
self.force_process_diagnostics("inspect");
278+
self.process_diagnostics(true, "inspect_extra");
281279
}
282280
}
283281

@@ -293,7 +291,7 @@ impl ChangeDeviceState for UefiDevice {
293291
self.service.event_log.reset();
294292
self.service.uefi_watchdog.watchdog.reset();
295293
self.service.generation_id.reset();
296-
self.service.diagnostics.lock().reset();
294+
self.service.diagnostics.reset();
297295
}
298296
}
299297

@@ -313,8 +311,15 @@ impl ChipsetDevice for UefiDevice {
313311

314312
impl PollDevice for UefiDevice {
315313
fn poll_device(&mut self, cx: &mut Context<'_>) {
314+
// Poll services
316315
self.service.uefi_watchdog.watchdog.poll(cx);
317316
self.service.generation_id.poll(cx);
317+
318+
// Poll watchdog timeout events
319+
if let Poll::Ready(Ok(())) = self.watchdog_recv.poll_recv(cx) {
320+
tracing::info!("watchdog timeout received");
321+
self.process_diagnostics(true, "watchdog timeout");
322+
}
318323
}
319324
}
320325

@@ -500,6 +505,7 @@ mod save_restore {
500505
use_mmio: _,
501506
command_set: _,
502507
gm: _,
508+
watchdog_recv: _,
503509
service:
504510
UefiDeviceServices {
505511
nvram,
@@ -520,7 +526,7 @@ mod save_restore {
520526
watchdog: uefi_watchdog.save()?,
521527
generation_id: generation_id.save()?,
522528
time: time.save()?,
523-
diagnostics: diagnostics.lock().save()?,
529+
diagnostics: diagnostics.save()?,
524530
})
525531
}
526532

@@ -543,7 +549,7 @@ mod save_restore {
543549
self.service.uefi_watchdog.restore(watchdog)?;
544550
self.service.generation_id.restore(generation_id)?;
545551
self.service.time.restore(time)?;
546-
self.service.diagnostics.lock().restore(diagnostics)?;
552+
self.service.diagnostics.restore(diagnostics)?;
547553

548554
Ok(())
549555
}

0 commit comments

Comments
 (0)