Skip to content

firmware_uefi: EfiDiagnostics change to tracelimit with custom period and limit #1706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 168 additions & 7 deletions support/tracelimit/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ pub fn disable_rate_limiting(disabled: bool) {

#[doc(hidden)]
pub struct RateLimiter {
period_ms: u32,
events_per_period: u32,
state: Mutex<RateLimiterState>,
}

struct RateLimiterState {
period_ms: u32,
events_per_period: u32,
start: Option<Instant>,
events: u32,
missed: u64,
Expand All @@ -59,9 +59,9 @@ impl RateLimiter {

pub const fn new(period_ms: u32, events_per_period: u32) -> Self {
Self {
period_ms,
events_per_period,
state: Mutex::new(RateLimiterState {
period_ms,
events_per_period,
start: None,
events: 0,
missed: 0,
Expand All @@ -74,31 +74,110 @@ impl RateLimiter {
/// `missed_events` is `Some(n)` if there were any missed events or if this
/// event is the last one before rate limiting kicks in.
pub fn event(&self) -> Result<Option<u64>, RateLimited> {
self.event_with_config(None, None)
}

/// Returns `Ok(missed_events)` if this event should be logged.
/// Optionally reconfigures the rate limiter if new parameters are provided.
///
/// `missed_events` is `Some(n)` if there were any missed events or if this
/// event is the last one before rate limiting kicks in.
pub fn event_with_config(
&self,
period_ms: Option<u32>,
events_per_period: Option<u32>,
) -> Result<Option<u64>, RateLimited> {
if DISABLE_RATE_LIMITING.load(Ordering::Relaxed) {
return Ok(None);
}

let mut state = self.state.try_lock().ok_or(RateLimited)?;

// Reconfigure if new parameters are provided
let mut reset_state = false;
if let Some(new_period) = period_ms {
if state.period_ms != new_period {
state.period_ms = new_period;
reset_state = true;
}
}
if let Some(new_events_per_period) = events_per_period {
if state.events_per_period != new_events_per_period {
state.events_per_period = new_events_per_period;
reset_state = true;
}
}

// Reset state when parameters change
if reset_state {
state.start = None;
state.events = 0;
state.missed = 0;
}

let now = Instant::now();
let period_ms = state.period_ms;
let start = state.start.get_or_insert(now);
let elapsed = now.duration_since(*start);
if elapsed.as_millis() > self.period_ms as u128 {
if elapsed.as_millis() > period_ms as u128 {
*start = now;
state.events = 0;
}
if state.events >= self.events_per_period {
if state.events >= state.events_per_period {
state.missed += 1;
return Err(RateLimited);
}
state.events += 1;
let missed = std::mem::take(&mut state.missed);
let missed = (missed != 0 || state.events == self.events_per_period).then_some(missed);
let missed = (missed != 0 || state.events == state.events_per_period).then_some(missed);
Ok(missed)
}
}

/// As [`tracing::error!`], but rate limited.
///
/// Can be called with optional parameters to customize rate limiting:
/// - `period: <ms>` - rate limiting period in milliseconds
/// - `limit: <count>` - maximum events per period
///
/// Examples:
/// ```
/// use tracelimit::error_ratelimited;
/// error_ratelimited!("simple message");
/// error_ratelimited!(period: 1000, limit: 5, "custom rate limit");
/// error_ratelimited!(period: 10000, "custom period only");
/// error_ratelimited!(limit: 50, "custom limit only");
/// ```
#[macro_export]
macro_rules! error_ratelimited {
// With both period and limit
(period: $period:expr, limit: $limit:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), Some($limit)) {
$crate::tracing::error!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// With period only
(period: $period:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), None) {
$crate::tracing::error!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// With limit only
(limit: $limit:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(None, Some($limit)) {
$crate::tracing::error!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// Default case (no custom parameters)
($($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
Expand All @@ -110,8 +189,49 @@ macro_rules! error_ratelimited {
}

/// As [`tracing::warn!`], but rate limited.
///
/// Can be called with optional parameters to customize rate limiting:
/// - `period: <ms>` - rate limiting period in milliseconds
/// - `limit: <count>` - maximum events per period
///
/// Examples:
/// ```
/// use tracelimit::warn_ratelimited;
/// warn_ratelimited!("simple message");
/// warn_ratelimited!(period: 1000, limit: 5, "custom rate limit");
/// warn_ratelimited!(period: 10000, "custom period only");
/// warn_ratelimited!(limit: 50, "custom limit only");
/// ```
#[macro_export]
macro_rules! warn_ratelimited {
// With both period and limit
(period: $period:expr, limit: $limit:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), Some($limit)) {
$crate::tracing::warn!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// With period only
(period: $period:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), None) {
$crate::tracing::warn!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// With limit only
(limit: $limit:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(None, Some($limit)) {
$crate::tracing::warn!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// Default case (no custom parameters)
($($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
Expand All @@ -123,8 +243,49 @@ macro_rules! warn_ratelimited {
}

/// As [`tracing::info!`], but rate limited.
///
/// Can be called with optional parameters to customize rate limiting:
/// - `period: <ms>` - rate limiting period in milliseconds
/// - `limit: <count>` - maximum events per period
///
/// Examples:
/// ```
/// use tracelimit::info_ratelimited;
/// info_ratelimited!("simple message");
/// info_ratelimited!(period: 1000, limit: 5, "custom rate limit");
/// info_ratelimited!(period: 10000, "custom period only");
/// info_ratelimited!(limit: 50, "custom limit only");
/// ```
#[macro_export]
macro_rules! info_ratelimited {
// With both period and limit
(period: $period:expr, limit: $limit:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), Some($limit)) {
$crate::tracing::info!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// With period only
(period: $period:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), None) {
$crate::tracing::info!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// With limit only
(limit: $limit:expr, $($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
if let Ok(missed_events) = RATE_LIMITER.event_with_config(None, Some($limit)) {
$crate::tracing::info!(dropped_ratelimited = missed_events, $($rest)*);
}
}
};
// Default case (no custom parameters)
($($rest:tt)*) => {
{
static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default();
Expand Down
15 changes: 11 additions & 4 deletions vm/devices/firmware/firmware_uefi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ use local_clock::InspectableLocalClock;
use pal_async::local::block_on;
use platform::logger::UefiLogger;
use platform::nvram::VsmConfig;
use service::diagnostics::DEFAULT_LOGS_PER_PERIOD;
use std::convert::TryInto;
use std::ops::RangeInclusive;
use std::task::Context;
Expand Down Expand Up @@ -269,13 +270,17 @@ impl UefiDevice {
tracelimit::info_ratelimited!(?addr, data, "set gpa for diagnostics");
self.service.diagnostics.set_gpa(data)
}
UefiCommand::PROCESS_EFI_DIAGNOSTICS => self.process_diagnostics(false, "guest"),
UefiCommand::PROCESS_EFI_DIAGNOSTICS => {
self.process_diagnostics(false, DEFAULT_LOGS_PER_PERIOD, "guest")
}
_ => tracelimit::warn_ratelimited!(addr, data, "unknown uefi write"),
}
}

fn inspect_extra(&mut self, _resp: &mut inspect::Response<'_>) {
self.process_diagnostics(true, "inspect_extra");
// This is the only place where we allow reprocessing diagnostics,
// since this will be driven by the user themselves.
self.process_diagnostics(true, DEFAULT_LOGS_PER_PERIOD, "inspect_extra");
}
}

Expand Down Expand Up @@ -317,8 +322,10 @@ impl PollDevice for UefiDevice {

// Poll watchdog timeout events
if let Poll::Ready(Ok(())) = self.watchdog_recv.poll_recv(cx) {
tracing::info!("watchdog timeout received");
self.process_diagnostics(true, "watchdog timeout");
// NOTE: Do not allow reprocessing diagnostics here.
// UEFI programs the watchdog's configuration, so we should assume that
// this path could trigger multiple times.
self.process_diagnostics(false, DEFAULT_LOGS_PER_PERIOD, "watchdog timeout");
}
}
}
Expand Down
37 changes: 28 additions & 9 deletions vm/devices/firmware/firmware_uefi/src/service/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ use uefi_specs::hyperv::debug_level::DEBUG_FLAG_NAMES;
use uefi_specs::hyperv::debug_level::DEBUG_WARN;
use zerocopy::FromBytes;

/// Default number of EfiDiagnosticsLogs emitted per period
pub const DEFAULT_LOGS_PER_PERIOD: u32 = 150;

/// 8-byte alignment for every entry
const ALIGNMENT: usize = 8;

Expand Down Expand Up @@ -103,26 +106,29 @@ fn phase_to_string(phase: u16) -> &'static str {
}

/// Defines how we want EfiDiagnosticsLog entries to be handled.
pub fn handle_efi_diagnostics_log<'a>(log: EfiDiagnosticsLog<'a>) {
pub fn handle_efi_diagnostics_log(log: EfiDiagnosticsLog<'_>, limit: u32) {
let debug_level_str = debug_level_to_string(log.debug_level);
let phase_str = phase_to_string(log.phase);

match log.debug_level {
DEBUG_WARN => tracing::warn!(
DEBUG_WARN => tracelimit::warn_ratelimited!(
limit: limit,
debug_level = %debug_level_str,
ticks = log.ticks,
phase = %phase_str,
log_message = log.message,
"EFI log entry"
),
DEBUG_ERROR => tracing::error!(
DEBUG_ERROR => tracelimit::error_ratelimited!(
limit: limit,
debug_level = %debug_level_str,
ticks = log.ticks,
phase = %phase_str,
log_message = log.message,
"EFI log entry"
),
_ => tracing::info!(
_ => tracelimit::info_ratelimited!(
limit: limit,
debug_level = %debug_level_str,
ticks = log.ticks,
phase = %phase_str,
Expand Down Expand Up @@ -283,11 +289,13 @@ impl DiagnosticsServices {
///
/// # Arguments
/// * `allow_reprocess` - If true, allows processing even if already processed for guest
/// * `triggered_by` - String to indicate who triggered the diagnostics processing
/// * `gm` - Guest memory to read diagnostics from
/// * `log_handler` - Function to handle each parsed log entry
fn process_diagnostics<F>(
&mut self,
allow_reprocess: bool,
triggered_by: &str,
gm: &GuestMemory,
mut log_handler: F,
) -> Result<(), DiagnosticsError>
Expand Down Expand Up @@ -450,7 +458,12 @@ impl DiagnosticsServices {
}

// Print summary statistics
tracelimit::info_ratelimited!(entries_processed, bytes_read, "processed EFI log entries");
tracelimit::info_ratelimited!(
triggered_by,
entries_processed,
bytes_read,
"processed EFI log entries"
);

Ok(())
}
Expand All @@ -461,16 +474,22 @@ impl UefiDevice {
///
/// # Arguments
/// * `allow_reprocess` - If true, allows processing even if already processed for guest
/// * `context` - String to indicate who triggered the diagnostics processing
pub(crate) fn process_diagnostics(&mut self, allow_reprocess: bool, context: &str) {
/// * `triggered_by` - String to indicate who triggered the diagnostics processing
pub(crate) fn process_diagnostics(
&mut self,
allow_reprocess: bool,
limit: u32,
triggered_by: &str,
) {
if let Err(error) = self.service.diagnostics.process_diagnostics(
allow_reprocess,
triggered_by,
&self.gm,
handle_efi_diagnostics_log,
|log| handle_efi_diagnostics_log(log, limit),
) {
tracelimit::error_ratelimited!(
error = &error as &dyn std::error::Error,
context,
triggered_by,
"failed to process diagnostics buffer"
);
}
Expand Down