Skip to content

firmware_uefi: EfiDiagnostics backports for watchdog handling, tracelimit and other improvements #1691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3046,7 +3046,6 @@ dependencies = [
"vmswitch",
"vpci",
"watchdog_core",
"watchdog_vmgs_format",
"zerocopy 0.8.24",
]

Expand Down Expand Up @@ -9157,12 +9156,14 @@ version = "0.0.0"
dependencies = [
"async-trait",
"bitfield-struct 0.10.1",
"cvm_tracing",
"inspect",
"mesh",
"pal_async",
"thiserror 2.0.12",
"tracing",
"vmcore",
"watchdog_vmgs_format",
]

[[package]]
Expand Down
71 changes: 28 additions & 43 deletions openhcl/underhill_core/src/emuplat/watchdog.rs
Original file line number Diff line number Diff line change
@@ -1,75 +1,60 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

use cvm_tracing::CVM_ALLOWED;
//! An implementation of [`WatchdogPlatform`] for use with the Underhill
//! environment.
//!
//! This implementation wraps over [`BaseWatchdogPlatform`] in
//! [watchdog_core::platform::BaseWatchdogPlatform], providing additional
//! functionality specific to the Underhill environment.

use vmcore::non_volatile_store::NonVolatileStore;
use watchdog_core::platform::BaseWatchdogPlatform;
use watchdog_core::platform::WatchdogCallback;
use watchdog_core::platform::WatchdogPlatform;
use watchdog_vmgs_format::WatchdogVmgsFormatStore;
use watchdog_vmgs_format::WatchdogVmgsFormatStoreError;

/// An implementation of [`WatchdogPlatform`] for use with both the UEFI
/// watchdog and the Guest Watchdog in Underhill.
pub struct UnderhillWatchdog {
store: WatchdogVmgsFormatStore,
/// An implementation of [`WatchdogPlatform`] for the Underhill environment.
pub struct UnderhillWatchdogPlatform {
/// The base watchdog platform implementation
base: BaseWatchdogPlatform,
/// Handle to the guest emulation transport client
get: guest_emulation_transport::GuestEmulationTransportClient,
hook: Box<dyn WatchdogTimeout>,
}

#[async_trait::async_trait]
pub trait WatchdogTimeout: Send + Sync {
async fn on_timeout(&self);
}

impl UnderhillWatchdog {
impl UnderhillWatchdogPlatform {
pub async fn new(
store: Box<dyn NonVolatileStore>,
get: guest_emulation_transport::GuestEmulationTransportClient,
hook: Box<dyn WatchdogTimeout>,
) -> Result<Self, WatchdogVmgsFormatStoreError> {
Ok(UnderhillWatchdog {
store: WatchdogVmgsFormatStore::new(store).await?,
Ok(UnderhillWatchdogPlatform {
base: BaseWatchdogPlatform::new(store).await?,
get,
hook,
})
}
}

#[async_trait::async_trait]
impl WatchdogPlatform for UnderhillWatchdog {
impl WatchdogPlatform for UnderhillWatchdogPlatform {
async fn on_timeout(&mut self) {
let res = self.store.set_boot_failure().await;
if let Err(e) = res {
tracing::error!(
CVM_ALLOWED,
error = &e as &dyn std::error::Error,
"error persisting watchdog status"
);
}

// Call the hook before reporting this to the GET, as the hook may
// want to do something before the host tears us down.
self.hook.on_timeout().await;
// Call the parent implementation first
self.base.on_timeout().await;

// FUTURE: consider emitting different events for the UEFI watchdog vs.
// the guest watchdog
//
// NOTE: This must be done last to ensure that all callbacks
// have been executed before we log the event.
self.get
.event_log_fatal(get_protocol::EventLogId::WATCHDOG_TIMEOUT_RESET)
.await;
}

async fn read_and_clear_boot_status(&mut self) -> bool {
let res = self.store.read_and_clear_boot_status().await;
match res {
Ok(status) => status,
Err(e) => {
tracing::error!(
CVM_ALLOWED,
error = &e as &dyn std::error::Error,
"error reading watchdog status"
);
// assume no failure
false
}
}
self.base.read_and_clear_boot_status().await
}

fn add_callback(&mut self, callback: Box<dyn WatchdogCallback>) {
self.base.add_callback(callback);
}
}
64 changes: 45 additions & 19 deletions openhcl/underhill_core/src/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ use crate::emuplat::non_volatile_store::VmbsBrokerNonVolatileStore;
use crate::emuplat::tpm::resources::GetTpmLoggerHandle;
use crate::emuplat::tpm::resources::GetTpmRequestAkCertHelperHandle;
use crate::emuplat::vga_proxy::UhRegisterHostIoFastPath;
use crate::emuplat::watchdog::UnderhillWatchdog;
use crate::emuplat::watchdog::WatchdogTimeout;
use crate::emuplat::watchdog::UnderhillWatchdogPlatform;
use crate::loader::LoadKind;
use crate::loader::vtl0_config::MeasuredVtl0Info;
use crate::loader::vtl2_config::RuntimeParameters;
Expand Down Expand Up @@ -177,6 +176,8 @@ use vmotherboard::BaseChipsetBuilderOutput;
use vmotherboard::ChipsetDeviceHandle;
use vmotherboard::options::BaseChipsetDevices;
use vmotherboard::options::BaseChipsetFoundation;
use watchdog_core::platform::WatchdogCallback;
use watchdog_core::platform::WatchdogPlatform;
use zerocopy::FromZeros;

pub(crate) const PM_BASE: u16 = 0x400;
Expand Down Expand Up @@ -2070,6 +2071,7 @@ async fn new_underhill_vm(
},
};

let (watchdog_send, watchdog_recv) = mesh::channel();
deps_hyperv_firmware_uefi = Some(dev::HyperVFirmwareUefi {
config,
logger: Box::new(UnderhillLogger {
Expand All @@ -2093,20 +2095,30 @@ async fn new_underhill_vm(
// UEFI watchdog doesn't persist to VMGS at this time
let store = EphemeralNonVolatileStore::new_boxed();

// Create base watchdog platform
let mut underhill_watchdog_platform =
UnderhillWatchdogPlatform::new(store, get_client.clone()).await?;

// Inject NMI on watchdog timeout
#[cfg(guest_arch = "x86_64")]
let watchdog_reset = WatchdogTimeoutNmi {
let watchdog_callback = WatchdogTimeoutNmi {
partition: partition.clone(),
watchdog_send: Some(watchdog_send),
};

// ARM64 does not have NMI support yet, so halt instead
#[cfg(guest_arch = "aarch64")]
let watchdog_reset = WatchdogTimeoutHalt {
let watchdog_callback = WatchdogTimeoutReset {
halt_vps: halt_vps.clone(),
watchdog_send: Some(watchdog_send),
};

Box::new(
UnderhillWatchdog::new(store, get_client.clone(), Box::new(watchdog_reset))
.await?,
)
// Add the callback
underhill_watchdog_platform.add_callback(Box::new(watchdog_callback));

Box::new(underhill_watchdog_platform)
},
watchdog_recv,
vsm_config: Some(Box::new(UnderhillVsmConfig {
partition: Arc::downgrade(&partition),
})),
Expand Down Expand Up @@ -2424,14 +2436,18 @@ async fn new_underhill_vm(
let store = vmgs_client
.as_non_volatile_store(vmgs::FileId::GUEST_WATCHDOG, false)
.context("failed to instantiate guest watchdog store")?;
let trigger_reset = WatchdogTimeoutHalt {

let watchdog_callback = WatchdogTimeoutReset {
halt_vps: halt_vps.clone(),
watchdog_send: None, // This is not the UEFI watchdog, so no need to send
// watchdog notifications.
};

Box::new(
UnderhillWatchdog::new(store, get_client.clone(), Box::new(trigger_reset))
.await?,
)
let mut underhill_watchdog_platform =
UnderhillWatchdogPlatform::new(store, get_client.clone()).await?;
underhill_watchdog_platform.add_callback(Box::new(watchdog_callback));

Box::new(underhill_watchdog_platform)
},
})
} else {
Expand Down Expand Up @@ -3442,31 +3458,41 @@ impl ChipsetDevice for FallbackMmioDevice {
#[cfg(guest_arch = "x86_64")]
struct WatchdogTimeoutNmi {
partition: Arc<UhPartition>,
watchdog_send: Option<mesh::Sender<()>>,
}

#[cfg(guest_arch = "x86_64")]
#[async_trait::async_trait]
impl WatchdogTimeout for WatchdogTimeoutNmi {
async fn on_timeout(&self) {
impl WatchdogCallback for WatchdogTimeoutNmi {
async fn on_timeout(&mut self) {
crate::livedump::livedump().await;

// Unlike Hyper-V, we only send the NMI to the BSP.
self.partition.request_msi(
Vtl::Vtl0,
MsiRequest::new_x86(virt::irqcon::DeliveryMode::NMI, 0, false, 0, false),
);

if let Some(watchdog_send) = &self.watchdog_send {
watchdog_send.send(());
}
}
}

struct WatchdogTimeoutHalt {
struct WatchdogTimeoutReset {
halt_vps: Arc<Halt>,
watchdog_send: Option<mesh::Sender<()>>,
}

#[async_trait::async_trait]
impl WatchdogTimeout for WatchdogTimeoutHalt {
async fn on_timeout(&self) {
impl WatchdogCallback for WatchdogTimeoutReset {
async fn on_timeout(&mut self) {
crate::livedump::livedump().await;

self.halt_vps.halt(HaltReason::Reset)
self.halt_vps.halt(HaltReason::Reset);

if let Some(watchdog_send) = &self.watchdog_send {
watchdog_send.send(());
}
}
}
1 change: 0 additions & 1 deletion openvmm/hvlite_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ vmbus_core.workspace = true
vmbus_server.workspace = true
vpci.workspace = true
watchdog_core.workspace = true
watchdog_vmgs_format.workspace = true

cache_topology.workspace = true
debug_ptr.workspace = true
Expand Down
1 change: 0 additions & 1 deletion openvmm/hvlite_core/src/emuplat/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@

pub mod firmware;
pub mod i440bx_host_pci_bridge;
pub mod watchdog;
56 changes: 0 additions & 56 deletions openvmm/hvlite_core/src/emuplat/watchdog.rs

This file was deleted.

Loading