Skip to content

Commit 0bef6e9

Browse files
authored
sp-sim: instant host phase 1 flash hashing by default (#8627)
1 parent 356e0b3 commit 0bef6e9

File tree

5 files changed

+126
-73
lines changed

5 files changed

+126
-73
lines changed

nexus/mgs-updates/tests/host_phase1_hash.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use omicron_test_utils::dev::poll::CondCheckError;
1919
use omicron_test_utils::dev::poll::wait_for_condition;
2020
use sha2::Digest as _;
2121
use sha2::Sha256;
22+
use sp_sim::HostFlashHashPolicy;
2223
use sp_sim::SimulatedSp;
2324
use std::time::Duration;
2425
use uuid::Uuid;
@@ -79,6 +80,14 @@ async fn test_host_phase1_hashing() {
7980
sp_component,
8081
};
8182

83+
// We want explicit (i.e., not-timer-based) control over when hashing
84+
// completes.
85+
let hashing_complete_sender = {
86+
let (policy, sender) = HostFlashHashPolicy::channel();
87+
sp_sim.set_phase1_hash_policy(policy).await;
88+
sender
89+
};
90+
8291
// We haven't yet started hashing; we should get the error we expect for
8392
// both slots.
8493
for firmware_slot in [0, 1] {
@@ -97,11 +106,6 @@ async fn test_host_phase1_hashing() {
97106
}
98107
}
99108

100-
// We want explicit (i.e., not-timer-based) control over when hashing
101-
// completes.
102-
let hashing_complete_sender =
103-
sp_sim.set_phase1_hash_policy_explicit_control().await;
104-
105109
// Start hashing firmware slot 0.
106110
mgs_client
107111
.sp_component_hash_firmware_start(sp_type, sp_slot, sp_component, 0)

sp-sim/src/gimlet.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5-
use crate::HostFlashHashCompletionSender;
5+
use crate::HostFlashHashPolicy;
66
use crate::Responsiveness;
77
use crate::SimulatedSp;
88
use crate::config::GimletConfig;
@@ -207,7 +207,11 @@ impl SimulatedSp for Gimlet {
207207
}
208208

209209
impl Gimlet {
210-
pub async fn spawn(gimlet: &GimletConfig, log: Logger) -> Result<Self> {
210+
pub async fn spawn(
211+
gimlet: &GimletConfig,
212+
phase1_hash_policy: HostFlashHashPolicy,
213+
log: Logger,
214+
) -> Result<Self> {
211215
info!(log, "setting up simulated gimlet");
212216

213217
let attached_mgs = Arc::new(Mutex::new(None));
@@ -265,6 +269,7 @@ impl Gimlet {
265269
let mut update_state = SimSpUpdate::new(
266270
BaseboardKind::Gimlet,
267271
gimlet.common.no_stage0_caboose,
272+
phase1_hash_policy,
268273
);
269274
let ereport_state = {
270275
let mut cfg = gimlet.common.ereport_config.clone();
@@ -406,23 +411,20 @@ impl Gimlet {
406411
*self.last_request_handled.lock().unwrap()
407412
}
408413

409-
/// Instead of host phase 1 hashing completing after a few seconds, return a
410-
/// handle that can be used to explicitly trigger completion.
414+
/// Set the policy for simulating host phase 1 flash hashing.
411415
///
412416
/// # Panics
413417
///
414418
/// Panics if this `Gimlet` was created with only an RoT instead of a full
415419
/// SP + RoT complex.
416-
pub async fn set_phase1_hash_policy_explicit_control(
417-
&self,
418-
) -> HostFlashHashCompletionSender {
420+
pub async fn set_phase1_hash_policy(&self, policy: HostFlashHashPolicy) {
419421
self.handler
420422
.as_ref()
421423
.expect("gimlet was created with SP config")
422424
.lock()
423425
.await
424426
.update_state
425-
.set_phase1_hash_policy_explicit_control()
427+
.set_phase1_hash_policy(policy)
426428
}
427429
}
428430

sp-sim/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use std::net::SocketAddrV6;
2727
use tokio::sync::mpsc;
2828
use tokio::sync::watch;
2929
pub use update::HostFlashHashCompletionSender;
30+
pub use update::HostFlashHashPolicy;
3031

3132
pub const SIM_ROT_BOARD: &str = "SimRot";
3233
pub const SIM_ROT_STAGE0_BOARD: &str = "SimRotStage0";
@@ -156,6 +157,11 @@ impl SimRack {
156157
gimlets.push(
157158
Gimlet::spawn(
158159
gimlet,
160+
// We could expose this in the config file if we want
161+
// callers to be able configure timer-based hashing instead?
162+
// For now, just use the fastest version (assume contents
163+
// are always hashed).
164+
HostFlashHashPolicy::assume_already_hashed(),
159165
log.new(slog::o!("slot" => format!("gimlet {}", i))),
160166
)
161167
.await?,

sp-sim/src/sidecar.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5+
use crate::HostFlashHashPolicy;
56
use crate::Responsiveness;
67
use crate::SimulatedSp;
78
use crate::config::Config;
@@ -531,6 +532,8 @@ impl Handler {
531532
update_state: SimSpUpdate::new(
532533
BaseboardKind::Sidecar,
533534
no_stage0_caboose,
535+
// sidecar doesn't have phase 1 flash; any policy is fine
536+
HostFlashHashPolicy::assume_already_hashed(),
534537
),
535538
reset_pending: None,
536539
should_fail_to_respond_signal: None,

sp-sim/src/update.rs

Lines changed: 98 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ use sha3::Digest;
2929
use sha3::Sha3_256;
3030
use tokio::sync::mpsc;
3131

32-
// How long do we take to hash host flash? Real SPs take a handful of seconds;
33-
// we'll pick something similar.
34-
const TIME_TO_HASH_HOST_PHASE_1: Duration = Duration::from_secs(5);
35-
3632
pub(crate) struct SimSpUpdate {
3733
/// tracks the state of any ongoing simulated update
3834
///
@@ -53,7 +49,7 @@ pub(crate) struct SimSpUpdate {
5349
/// us to default to `TIME_TO_HASH_HOST_PHASE_1` (e.g., for running sp-sim
5450
/// as a part of `omicron-dev`) while giving tests that want explicit
5551
/// control the ability to precisely trigger completion of hashing.
56-
phase1_hash_policy: HostFlashHashPolicy,
52+
phase1_hash_policy: HostFlashHashPolicyInner,
5753

5854
/// records whether a change to the stage0 "active slot" has been requested
5955
pending_stage0_update: bool,
@@ -78,6 +74,7 @@ impl SimSpUpdate {
7874
pub(crate) fn new(
7975
baseboard_kind: BaseboardKind,
8076
no_stage0_caboose: bool,
77+
phase1_hash_policy: HostFlashHashPolicy,
8178
) -> Self {
8279
const SP_GITC0: &str = "ffffffff";
8380
const SP_GITC1: &str = "fefefefe";
@@ -194,9 +191,7 @@ impl SimSpUpdate {
194191
last_rot_update_data: None,
195192
last_host_phase1_update_data: BTreeMap::new(),
196193
phase1_hash_state: BTreeMap::new(),
197-
phase1_hash_policy: HostFlashHashPolicy::Timer(
198-
TIME_TO_HASH_HOST_PHASE_1,
199-
),
194+
phase1_hash_policy: phase1_hash_policy.0,
200195

201196
pending_stage0_update: false,
202197

@@ -211,14 +206,11 @@ impl SimSpUpdate {
211206
}
212207
}
213208

214-
/// Instead of host phase 1 hashing completing after a few seconds, return a
215-
/// handle that can be used to explicitly trigger completion.
216-
pub(crate) fn set_phase1_hash_policy_explicit_control(
209+
pub(crate) fn set_phase1_hash_policy(
217210
&mut self,
218-
) -> HostFlashHashCompletionSender {
219-
let (tx, rx) = mpsc::unbounded_channel();
220-
self.phase1_hash_policy = HostFlashHashPolicy::Channel(rx);
221-
HostFlashHashCompletionSender(tx)
211+
policy: HostFlashHashPolicy,
212+
) {
213+
self.phase1_hash_policy = policy.0;
222214
}
223215

224216
pub(crate) fn sp_update_prepare(
@@ -493,28 +485,23 @@ impl SimSpUpdate {
493485
&mut self,
494486
slot: u16,
495487
) -> Result<(), SpError> {
488+
self.check_host_flash_state_and_policy(slot);
496489
match self
497490
.phase1_hash_state
498-
.entry(slot)
499-
.or_insert(HostFlashHashState::NeverHashed)
491+
.get_mut(&slot)
492+
.expect("check_host_flash_state_and_policy always inserts")
500493
{
501-
// No current hash; record our start time so we can emulate hashing
502-
// taking a few seconds.
494+
// Already hashed; this is a no-op.
495+
HostFlashHashState::Hashed(_) => Ok(()),
496+
// No current hash; record our start time.
503497
state @ (HostFlashHashState::NeverHashed
504498
| HostFlashHashState::HashInvalidated) => {
505499
*state = HostFlashHashState::HashStarted(Instant::now());
506500
Ok(())
507501
}
508-
// Already hashed; this is a no-op.
509-
HostFlashHashState::Hashed(_) => Ok(()),
510-
// Still hashing; check and see if it's done. This is either an
511-
// error (if we're still hashing) or a no-op (if we're done).
512-
HostFlashHashState::HashStarted(started) => {
513-
let started = *started;
514-
self.finalize_host_flash_hash_if_sufficient_time_elapsed(
515-
slot, started,
516-
)?;
517-
Ok(())
502+
// Still hashing; this is an error.
503+
HostFlashHashState::HashStarted(_) => {
504+
Err(SpError::Hf(HfError::HashInProgress))
518505
}
519506
}
520507
}
@@ -523,51 +510,68 @@ impl SimSpUpdate {
523510
&mut self,
524511
slot: u16,
525512
) -> Result<[u8; 32], SpError> {
513+
self.check_host_flash_state_and_policy(slot);
526514
match self
527515
.phase1_hash_state
528-
.entry(slot)
529-
.or_insert(HostFlashHashState::NeverHashed)
516+
.get_mut(&slot)
517+
.expect("check_host_flash_state_and_policy always inserts")
530518
{
519+
HostFlashHashState::Hashed(hash) => Ok(*hash),
531520
HostFlashHashState::NeverHashed => {
532521
Err(SpError::Hf(HfError::HashUncalculated))
533522
}
534-
HostFlashHashState::HashStarted(started) => {
535-
let started = *started;
536-
self.finalize_host_flash_hash_if_sufficient_time_elapsed(
537-
slot, started,
538-
)
523+
HostFlashHashState::HashStarted(_) => {
524+
Err(SpError::Hf(HfError::HashInProgress))
539525
}
540-
HostFlashHashState::Hashed(hash) => Ok(*hash),
541526
HostFlashHashState::HashInvalidated => {
542527
Err(SpError::Hf(HfError::RecalculateHash))
543528
}
544529
}
545530
}
546531

547-
fn finalize_host_flash_hash_if_sufficient_time_elapsed(
548-
&mut self,
549-
slot: u16,
550-
started: Instant,
551-
) -> Result<[u8; 32], SpError> {
552-
match &mut self.phase1_hash_policy {
553-
HostFlashHashPolicy::Timer(duration) => {
554-
if started.elapsed() < *duration {
555-
return Err(SpError::Hf(HfError::HashInProgress));
556-
}
557-
}
558-
HostFlashHashPolicy::Channel(rx) => {
559-
if rx.try_recv().is_err() {
560-
return Err(SpError::Hf(HfError::HashInProgress));
561-
}
562-
}
532+
fn check_host_flash_state_and_policy(&mut self, slot: u16) {
533+
let state = self
534+
.phase1_hash_state
535+
.entry(slot)
536+
.or_insert(HostFlashHashState::NeverHashed);
537+
538+
// If we've already hashed this slot, we're done.
539+
if matches!(state, HostFlashHashState::Hashed(_)) {
540+
return;
563541
}
564542

565-
let data = self.last_host_phase1_update_data(slot);
566-
let data = data.as_deref().unwrap_or(&[]);
567-
let hash = Sha256::digest(&data).into();
568-
self.phase1_hash_state.insert(slot, HostFlashHashState::Hashed(hash));
543+
// Should we hash the flash now? It depends on our state + policy.
544+
let should_hash = match (&mut self.phase1_hash_policy, state) {
545+
// If we want to always assume contents are hashed, compute that
546+
// hash _unless_ the contents have changed (in which case a client
547+
// would need to send us an explicit "start hashing" request).
548+
(
549+
HostFlashHashPolicyInner::AssumeAlreadyHashed,
550+
HostFlashHashState::HashInvalidated,
551+
) => false,
552+
(HostFlashHashPolicyInner::AssumeAlreadyHashed, _) => true,
553+
// If we're timer based, only hash if the timer has elapsed.
554+
(
555+
HostFlashHashPolicyInner::Timer(timeout),
556+
HostFlashHashState::HashStarted(started),
557+
) => *timeout >= started.elapsed(),
558+
(HostFlashHashPolicyInner::Timer(_), _) => false,
559+
// If we're channel based, only hash if we've gotten a request to
560+
// start hashing and there's a message in the channel.
561+
(
562+
HostFlashHashPolicyInner::Channel(rx),
563+
HostFlashHashState::HashStarted(_),
564+
) => rx.try_recv().is_ok(),
565+
(HostFlashHashPolicyInner::Channel(_), _) => false,
566+
};
569567

570-
Ok(hash)
568+
if should_hash {
569+
let data = self.last_host_phase1_update_data(slot);
570+
let data = data.as_deref().unwrap_or(&[]);
571+
let hash = Sha256::digest(&data).into();
572+
self.phase1_hash_state
573+
.insert(slot, HostFlashHashState::Hashed(hash));
574+
}
571575
}
572576

573577
pub(crate) fn get_component_caboose_value(
@@ -784,14 +788,48 @@ fn fake_fwid_compute(data: &[u8]) -> Fwid {
784788

785789
#[derive(Debug, Clone, Copy)]
786790
enum HostFlashHashState {
791+
Hashed([u8; 32]),
787792
NeverHashed,
788793
HashStarted(Instant),
789-
Hashed([u8; 32]),
790794
HashInvalidated,
791795
}
792796

797+
/// Policy controlling how `sp-sim` behaves when asked to flash its host phase 1
798+
/// contents.
799+
#[derive(Debug)]
800+
pub struct HostFlashHashPolicy(HostFlashHashPolicyInner);
801+
802+
impl HostFlashHashPolicy {
803+
/// Always return computed hashes when asked.
804+
///
805+
/// This emulates an SP that has previously computed its phase 1 flash hash
806+
/// and whose contents haven't changed. Most Nexus tests should use this
807+
/// policy by default to allow inventory collections to complete promptly.
808+
pub fn assume_already_hashed() -> Self {
809+
Self(HostFlashHashPolicyInner::AssumeAlreadyHashed)
810+
}
811+
812+
/// Return `HashInProgress` for `timeout` after hashing has started before
813+
/// completing it successfully.
814+
pub fn timer(timeout: Duration) -> Self {
815+
Self(HostFlashHashPolicyInner::Timer(timeout))
816+
}
817+
818+
/// Returns a channel that allows the caller to control when hashing
819+
/// completes.
820+
pub fn channel() -> (Self, HostFlashHashCompletionSender) {
821+
let (tx, rx) = mpsc::unbounded_channel();
822+
(
823+
Self(HostFlashHashPolicyInner::Channel(rx)),
824+
HostFlashHashCompletionSender(tx),
825+
)
826+
}
827+
}
828+
793829
#[derive(Debug)]
794-
enum HostFlashHashPolicy {
830+
enum HostFlashHashPolicyInner {
831+
/// always assume hashing has already been computed
832+
AssumeAlreadyHashed,
795833
/// complete hashing after `Duration` has elapsed
796834
Timer(Duration),
797835
/// complete hashing if there's a message in this channel

0 commit comments

Comments
 (0)