Skip to content

Commit 22f426f

Browse files
committed
firmware_uefi: EfiDiagnostics switch to polling instead of Arc<Mutex<>> (microsoft#1693)
This PR aims to take a different approach to flushing EfiDiagnostics on Watchdog timeout. - Removes Arc<Mutex<>>, which was originally used to share the diagnostics service between UefiDevice and its WatchdogPlatform - Creates a mesh::channel before creating the UefiDevice. The watchdog_callback that gets added to the UefiDevice has its `on_timeout` method modified to invoke the sender to send a notification. - The mesh::receiver end gets sent down the UefiDevice, and it is used in `poll_device()` to freely respond to watchdog events
1 parent 1a2b305 commit 22f426f

File tree

7 files changed

+101
-93
lines changed

7 files changed

+101
-93
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1721,7 +1721,6 @@ dependencies = [
17211721
"open_enum",
17221722
"openssl",
17231723
"pal_async",
1724-
"parking_lot",
17251724
"test_with_tracing",
17261725
"thiserror 2.0.12",
17271726
"time",

openhcl/underhill_core/src/worker.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,7 @@ async fn new_underhill_vm(
20602060
},
20612061
};
20622062

2063+
let (watchdog_send, watchdog_recv) = mesh::channel();
20632064
deps_hyperv_firmware_uefi = Some(dev::HyperVFirmwareUefi {
20642065
config,
20652066
logger: Box::new(UnderhillLogger {
@@ -2091,19 +2092,22 @@ async fn new_underhill_vm(
20912092
#[cfg(guest_arch = "x86_64")]
20922093
let watchdog_callback = WatchdogTimeoutNmi {
20932094
partition: partition.clone(),
2095+
watchdog_send: Some(watchdog_send),
20942096
};
20952097

20962098
// ARM64 does not have NMI support yet, so halt instead
20972099
#[cfg(guest_arch = "aarch64")]
20982100
let watchdog_callback = WatchdogTimeoutReset {
20992101
halt_vps: halt_vps.clone(),
2102+
watchdog_send: Some(watchdog_send),
21002103
};
21012104

21022105
// Add the callback
21032106
underhill_watchdog_platform.add_callback(Box::new(watchdog_callback));
21042107

21052108
Box::new(underhill_watchdog_platform)
21062109
},
2110+
watchdog_recv,
21072111
vsm_config: Some(Box::new(UnderhillVsmConfig {
21082112
partition: Arc::downgrade(&partition),
21092113
})),
@@ -2422,13 +2426,15 @@ async fn new_underhill_vm(
24222426
.as_non_volatile_store(vmgs::FileId::GUEST_WATCHDOG, false)
24232427
.context("failed to instantiate guest watchdog store")?;
24242428

2425-
let trigger_reset = WatchdogTimeoutReset {
2429+
let watchdog_callback = WatchdogTimeoutReset {
24262430
halt_vps: halt_vps.clone(),
2431+
watchdog_send: None, // This is not the UEFI watchdog, so no need to send
2432+
// watchdog notifications.
24272433
};
24282434

24292435
let mut underhill_watchdog_platform =
24302436
UnderhillWatchdogPlatform::new(store, get_client.clone()).await?;
2431-
underhill_watchdog_platform.add_callback(Box::new(trigger_reset));
2437+
underhill_watchdog_platform.add_callback(Box::new(watchdog_callback));
24322438

24332439
Box::new(underhill_watchdog_platform)
24342440
},
@@ -3441,6 +3447,7 @@ impl ChipsetDevice for FallbackMmioDevice {
34413447
#[cfg(guest_arch = "x86_64")]
34423448
struct WatchdogTimeoutNmi {
34433449
partition: Arc<UhPartition>,
3450+
watchdog_send: Option<mesh::Sender<()>>,
34443451
}
34453452

34463453
#[cfg(guest_arch = "x86_64")]
@@ -3454,18 +3461,27 @@ impl WatchdogCallback for WatchdogTimeoutNmi {
34543461
Vtl::Vtl0,
34553462
MsiRequest::new_x86(virt::irqcon::DeliveryMode::NMI, 0, false, 0, false),
34563463
);
3464+
3465+
if let Some(watchdog_send) = &self.watchdog_send {
3466+
watchdog_send.send(());
3467+
}
34573468
}
34583469
}
34593470

34603471
struct WatchdogTimeoutReset {
34613472
halt_vps: Arc<Halt>,
3473+
watchdog_send: Option<mesh::Sender<()>>,
34623474
}
34633475

34643476
#[async_trait::async_trait]
34653477
impl WatchdogCallback for WatchdogTimeoutReset {
34663478
async fn on_timeout(&mut self) {
34673479
crate::livedump::livedump().await;
34683480

3469-
self.halt_vps.halt(HaltReason::Reset)
3481+
self.halt_vps.halt(HaltReason::Reset);
3482+
3483+
if let Some(watchdog_send) = &self.watchdog_send {
3484+
watchdog_send.send(());
3485+
}
34703486
}
34713487
}

openvmm/hvlite_core/src/worker/dispatch.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,7 @@ impl InitializedVm {
10471047
let mut deps_hyperv_firmware_uefi = None;
10481048
match &cfg.load_mode {
10491049
LoadMode::Uefi { .. } => {
1050+
let (watchdog_send, watchdog_recv) = mesh::channel();
10501051
deps_hyperv_firmware_uefi = Some(dev::HyperVFirmwareUefi {
10511052
config: firmware_uefi::UefiConfig {
10521053
custom_uefi_vars: cfg.custom_uefi_vars,
@@ -1094,19 +1095,22 @@ impl InitializedVm {
10941095
#[cfg(guest_arch = "x86_64")]
10951096
let watchdog_callback = WatchdogTimeoutNmi {
10961097
partition: partition.clone(),
1098+
watchdog_send: Some(watchdog_send),
10971099
};
10981100

10991101
// ARM64 does not have NMI support yet, so halt instead
11001102
#[cfg(guest_arch = "aarch64")]
11011103
let watchdog_callback = WatchdogTimeoutReset {
11021104
halt_vps: halt_vps.clone(),
1105+
watchdog_send: Some(watchdog_send),
11031106
};
11041107

11051108
// Add callbacks
11061109
base_watchdog_platform.add_callback(Box::new(watchdog_callback));
11071110

11081111
Box::new(base_watchdog_platform)
11091112
},
1113+
watchdog_recv,
11101114
vsm_config: None,
11111115
// TODO: persist SystemTimeClock time across reboots.
11121116
time_source: Box::new(local_clock::SystemTimeClock::new(
@@ -1326,6 +1330,8 @@ impl InitializedVm {
13261330
// Create callback to reset on watchdog timeout
13271331
let watchdog_callback = WatchdogTimeoutReset {
13281332
halt_vps: halt_vps.clone(),
1333+
watchdog_send: None, // This is not the UEFI watchdog, so no need to send
1334+
// watchdog notifications
13291335
};
13301336

13311337
// Add callbacks
@@ -2987,6 +2993,7 @@ fn add_devices_to_dsdt(
29872993
#[cfg(guest_arch = "x86_64")]
29882994
struct WatchdogTimeoutNmi {
29892995
partition: Arc<dyn HvlitePartition>,
2996+
watchdog_send: Option<mesh::Sender<()>>,
29902997
}
29912998

29922999
#[cfg(guest_arch = "x86_64")]
@@ -2998,16 +3005,25 @@ impl WatchdogCallback for WatchdogTimeoutNmi {
29983005
Vtl::Vtl0,
29993006
virt::irqcon::MsiRequest::new_x86(virt::irqcon::DeliveryMode::NMI, 0, false, 0, false),
30003007
);
3008+
3009+
if let Some(watchdog_send) = &self.watchdog_send {
3010+
watchdog_send.send(());
3011+
}
30013012
}
30023013
}
30033014

30043015
struct WatchdogTimeoutReset {
30053016
halt_vps: Arc<Halt>,
3017+
watchdog_send: Option<mesh::Sender<()>>,
30063018
}
30073019

30083020
#[async_trait::async_trait]
30093021
impl WatchdogCallback for WatchdogTimeoutReset {
30103022
async fn on_timeout(&mut self) {
3011-
self.halt_vps.halt(HaltReason::Reset)
3023+
self.halt_vps.halt(HaltReason::Reset);
3024+
3025+
if let Some(watchdog_send) = &self.watchdog_send {
3026+
watchdog_send.send(());
3027+
}
30123028
}
30133029
}

vm/devices/firmware/firmware_uefi/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ bitfield-struct.workspace = true
4545
der = { workspace = true, features = ["derive", "alloc", "oid"], optional = true }
4646
getrandom.workspace = true
4747
openssl = { optional = true, workspace = true }
48-
parking_lot.workspace = true
4948
thiserror.workspace = true
5049
time = { workspace = true, features = ["local-offset"] }
5150
tracelimit.workspace = true

vm/devices/firmware/firmware_uefi/src/lib.rs

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,12 @@ use inspect::Inspect;
6868
use inspect::InspectMut;
6969
use local_clock::InspectableLocalClock;
7070
use pal_async::local::block_on;
71-
use parking_lot::Mutex;
7271
use platform::logger::UefiLogger;
7372
use platform::nvram::VsmConfig;
7473
use std::convert::TryInto;
7574
use std::ops::RangeInclusive;
76-
use std::sync::Arc;
7775
use std::task::Context;
76+
use std::task::Poll;
7877
use thiserror::Error;
7978
use uefi_nvram_storage::InspectableNvramStorage;
8079
use vmcore::device_state::ChangeDeviceState;
@@ -106,7 +105,7 @@ struct UefiDeviceServices {
106105
generation_id: service::generation_id::GenerationIdServices,
107106
#[inspect(mut)]
108107
time: service::time::TimeServices,
109-
diagnostics: Arc<Mutex<service::diagnostics::DiagnosticsServices>>,
108+
diagnostics: service::diagnostics::DiagnosticsServices,
110109
}
111110

112111
// Begin and end range are inclusive.
@@ -135,6 +134,7 @@ pub struct UefiRuntimeDeps<'a> {
135134
pub logger: Box<dyn UefiLogger>,
136135
pub vmtime: &'a VmTimeSource,
137136
pub watchdog_platform: Box<dyn WatchdogPlatform>,
137+
pub watchdog_recv: mesh::Receiver<()>,
138138
pub generation_id_deps: generation_id::GenerationIdRuntimeDeps,
139139
pub vsm_config: Option<Box<dyn VsmConfig>>,
140140
pub time_source: Box<dyn InspectableLocalClock>,
@@ -158,6 +158,10 @@ pub struct UefiDevice {
158158
// Volatile state
159159
#[inspect(hex)]
160160
address: u32,
161+
162+
// Receiver for watchdog timeout events
163+
#[inspect(skip)]
164+
watchdog_recv: mesh::Receiver<()>,
161165
}
162166

163167
impl UefiDevice {
@@ -171,26 +175,20 @@ impl UefiDevice {
171175
nvram_storage,
172176
logger,
173177
vmtime,
174-
mut watchdog_platform,
178+
watchdog_platform,
179+
watchdog_recv,
175180
generation_id_deps,
176181
vsm_config,
177182
time_source,
178183
} = runtime_deps;
179184

180-
// Create diagnostics serparately since it will be shared.
181-
let diagnostics = Arc::new(Mutex::new(service::diagnostics::DiagnosticsServices::new()));
182-
183-
// Add a watchdog callback to process diagnostics on timeout.
184-
watchdog_platform.add_callback(Box::new(
185-
service::diagnostics::DiagnosticsWatchdogCallback::new(diagnostics.clone(), gm.clone()),
186-
));
187-
188185
// Create the UEFI device with the rest of the services.
189186
let uefi = UefiDevice {
190187
use_mmio: cfg.use_mmio,
191188
command_set: cfg.command_set,
192189
address: 0,
193190
gm,
191+
watchdog_recv,
194192
service: UefiDeviceServices {
195193
nvram: service::nvram::NvramServices::new(
196194
nvram_storage,
@@ -212,7 +210,7 @@ impl UefiDevice {
212210
generation_id_deps,
213211
),
214212
time: service::time::TimeServices::new(time_source),
215-
diagnostics,
213+
diagnostics: service::diagnostics::DiagnosticsServices::new(),
216214
},
217215
};
218216

@@ -269,15 +267,15 @@ impl UefiDevice {
269267
}
270268
UefiCommand::SET_EFI_DIAGNOSTICS_GPA => {
271269
tracelimit::info_ratelimited!(?addr, data, "set gpa for diagnostics");
272-
self.service.diagnostics.lock().set_gpa(data)
270+
self.service.diagnostics.set_gpa(data)
273271
}
274-
UefiCommand::PROCESS_EFI_DIAGNOSTICS => self.ratelimited_process_diagnostics(),
272+
UefiCommand::PROCESS_EFI_DIAGNOSTICS => self.process_diagnostics(false, "guest"),
275273
_ => tracelimit::warn_ratelimited!(addr, data, "unknown uefi write"),
276274
}
277275
}
278276

279277
fn inspect_extra(&mut self, _resp: &mut inspect::Response<'_>) {
280-
self.force_process_diagnostics("inspect");
278+
self.process_diagnostics(true, "inspect_extra");
281279
}
282280
}
283281

@@ -293,7 +291,7 @@ impl ChangeDeviceState for UefiDevice {
293291
self.service.event_log.reset();
294292
self.service.uefi_watchdog.watchdog.reset();
295293
self.service.generation_id.reset();
296-
self.service.diagnostics.lock().reset();
294+
self.service.diagnostics.reset();
297295
}
298296
}
299297

@@ -313,8 +311,15 @@ impl ChipsetDevice for UefiDevice {
313311

314312
impl PollDevice for UefiDevice {
315313
fn poll_device(&mut self, cx: &mut Context<'_>) {
314+
// Poll services
316315
self.service.uefi_watchdog.watchdog.poll(cx);
317316
self.service.generation_id.poll(cx);
317+
318+
// Poll watchdog timeout events
319+
if let Poll::Ready(Ok(())) = self.watchdog_recv.poll_recv(cx) {
320+
tracing::info!("watchdog timeout received");
321+
self.process_diagnostics(true, "watchdog timeout");
322+
}
318323
}
319324
}
320325

@@ -500,6 +505,7 @@ mod save_restore {
500505
use_mmio: _,
501506
command_set: _,
502507
gm: _,
508+
watchdog_recv: _,
503509
service:
504510
UefiDeviceServices {
505511
nvram,
@@ -520,7 +526,7 @@ mod save_restore {
520526
watchdog: uefi_watchdog.save()?,
521527
generation_id: generation_id.save()?,
522528
time: time.save()?,
523-
diagnostics: diagnostics.lock().save()?,
529+
diagnostics: diagnostics.save()?,
524530
})
525531
}
526532

@@ -543,7 +549,7 @@ mod save_restore {
543549
self.service.uefi_watchdog.restore(watchdog)?;
544550
self.service.generation_id.restore(generation_id)?;
545551
self.service.time.restore(time)?;
546-
self.service.diagnostics.lock().restore(diagnostics)?;
552+
self.service.diagnostics.restore(diagnostics)?;
547553

548554
Ok(())
549555
}

0 commit comments

Comments
 (0)