Skip to content

Commit 6a01f8e

Browse files
committed
flesh out test_host_phase1_hashing()
1 parent d3e4313 commit 6a01f8e

File tree

8 files changed

+292
-8
lines changed

8 files changed

+292
-8
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/gateway-client/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ progenitor::generate_api!(
6060
}),
6161
derives = [schemars::JsonSchema],
6262
patch = {
63+
ComponentFirmwareHashStatus = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6364
HostPhase2RecoveryImageId = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6465
ImageVersion = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6566
RotImageDetails = { derives = [PartialEq, Eq, PartialOrd, Ord] },

gateway/src/http_entrypoints.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -559,12 +559,20 @@ impl GatewayApi for GatewayImpl {
559559
));
560560
}
561561

562-
// TODO-john catch hash in progress?
563-
sp.start_host_flash_hash(firmware_slot).await.map_err(|err| {
564-
SpCommsError::SpCommunicationFailed { sp: sp_id, err }
565-
})?;
566-
567-
Ok(HttpResponseUpdatedNoContent())
562+
// The SP (reasonably!) returns a `HashInProgress` error if we try
563+
// to start hashing while hashing is being calculated, but we're
564+
// presenting an idempotent "start hashing if it isn't started"
565+
// endpoint instead. Swallow that error.
566+
match sp.start_host_flash_hash(firmware_slot).await {
567+
Ok(())
568+
| Err(CommunicationError::SpError(SpError::Hf(
569+
HfError::HashInProgress,
570+
))) => Ok(HttpResponseUpdatedNoContent()),
571+
Err(err) => {
572+
Err(SpCommsError::SpCommunicationFailed { sp: sp_id, err }
573+
.into())
574+
}
575+
}
568576
};
569577
apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await
570578
}

nexus/mgs-updates/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dropshot.workspace = true
4141
gateway-messages.workspace = true
4242
gateway-test-utils.workspace = true
4343
hubtools.workspace = true
44+
omicron-test-utils.workspace = true
4445
rand.workspace = true
4546
repo-depot-api.workspace = true
4647
sp-sim.workspace = true

nexus/mgs-updates/tests/host_phase1_hash.rs

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,55 @@
88
//! ask it to start hashing, then we'll get "still hashing" errors for a few
99
//! seconds, then we'll get the hash result.
1010
11+
use gateway_client::Client;
1112
use gateway_client::SpComponent;
1213
use gateway_client::types::ComponentFirmwareHashStatus;
1314
use gateway_client::types::SpType;
15+
use gateway_client::types::SpUpdateStatus;
1416
use gateway_messages::SpPort;
1517
use gateway_test_utils::setup as mgs_setup;
18+
use omicron_test_utils::dev::poll::CondCheckError;
19+
use omicron_test_utils::dev::poll::wait_for_condition;
20+
use sha2::Digest as _;
21+
use sha2::Sha256;
22+
use sp_sim::SimulatedSp;
23+
use std::time::Duration;
24+
use uuid::Uuid;
1625

26+
struct Phase1HashStatusChecker<'a> {
27+
mgs_client: &'a Client,
28+
sp_type: SpType,
29+
sp_slot: u16,
30+
sp_component: &'a str,
31+
}
32+
33+
impl Phase1HashStatusChecker<'_> {
34+
async fn assert_status(
35+
&self,
36+
expected: &[(u16, ComponentFirmwareHashStatus)],
37+
) {
38+
for (firmware_slot, expected_status) in expected {
39+
let status = self
40+
.mgs_client
41+
.sp_component_hash_firmware_get(
42+
self.sp_type,
43+
self.sp_slot,
44+
self.sp_component,
45+
*firmware_slot,
46+
)
47+
.await
48+
.expect("got firmware hash status");
49+
assert_eq!(
50+
status.into_inner(),
51+
*expected_status,
52+
"unexpected status for slot {firmware_slot}"
53+
);
54+
}
55+
}
56+
}
57+
58+
// This is primarily a test of the `sp-sim` implementation of host phase 1
59+
// flashing, with a minor side test that MGS's endpoints wrap it faithfully.
1760
#[tokio::test]
1861
async fn test_host_phase1_hashing() {
1962
// Start MGS + Sim SP.
@@ -25,9 +68,16 @@ async fn test_host_phase1_hashing() {
2568

2669
// We'll only talk to one sp-sim for this test.
2770
let mgs_client = mgstestctx.client();
71+
let sp_sim = &mgstestctx.simrack.gimlets[0];
2872
let sp_type = SpType::Sled;
2973
let sp_component = SpComponent::HOST_CPU_BOOT_FLASH.const_as_str();
3074
let sp_slot = 0;
75+
let phase1_checker = Phase1HashStatusChecker {
76+
mgs_client: &mgs_client,
77+
sp_type,
78+
sp_slot,
79+
sp_component,
80+
};
3181

3282
// We haven't yet started hashing; we should get the error we expect for
3383
// both slots.
@@ -46,4 +96,158 @@ async fn test_host_phase1_hashing() {
4696
other => panic!("unexpected status: {other:?}"),
4797
}
4898
}
99+
100+
// We want explicit (i.e., not-timer-based) control over when hashing
101+
// completes.
102+
let hashing_complete_sender =
103+
sp_sim.set_phase1_hash_policy_explicit_control().await;
104+
105+
// Start hashing firmware slot 0.
106+
mgs_client
107+
.sp_component_hash_firmware_start(sp_type, sp_slot, sp_component, 0)
108+
.await
109+
.expect("started firmware hashing");
110+
111+
// We should see the expected status; hash is computing in slot 0 and not
112+
// yet started in slot 1.
113+
phase1_checker
114+
.assert_status(&[
115+
(0, ComponentFirmwareHashStatus::HashInProgress),
116+
(1, ComponentFirmwareHashStatus::HashNotCalculated),
117+
])
118+
.await;
119+
120+
// We can start hashing firmware slot 0 again; this should be a no-op while
121+
// hashing is being done.
122+
mgs_client
123+
.sp_component_hash_firmware_start(sp_type, sp_slot, sp_component, 0)
124+
.await
125+
.expect("starting hashing while hashing should be okay");
126+
127+
// Calculate the hash we expect to see.
128+
let expected_sha256_0 = Sha256::digest(
129+
sp_sim.last_host_phase1_update_data(0).await.as_deref().unwrap_or(&[]),
130+
)
131+
.into();
132+
133+
// Allow the next `get()` to succeed.
134+
hashing_complete_sender.complete_next_hashing_attempt();
135+
136+
// We should see the expected status; hash is complete in slot 0 and not
137+
// yet started in slot 1.
138+
phase1_checker
139+
.assert_status(&[
140+
(0, ComponentFirmwareHashStatus::Hashed(expected_sha256_0)),
141+
(1, ComponentFirmwareHashStatus::HashNotCalculated),
142+
])
143+
.await;
144+
145+
// Repeat, but slot 1.
146+
mgs_client
147+
.sp_component_hash_firmware_start(sp_type, sp_slot, sp_component, 1)
148+
.await
149+
.expect("started firmware hashing");
150+
hashing_complete_sender.complete_next_hashing_attempt();
151+
phase1_checker
152+
.assert_status(&[
153+
(0, ComponentFirmwareHashStatus::Hashed(expected_sha256_0)),
154+
(1, ComponentFirmwareHashStatus::Hashed(expected_sha256_0)),
155+
])
156+
.await;
157+
158+
// Upload a new, fake phase1 to slot 1.
159+
let fake_phase1 = b"test_host_phase1_hashing_fake_data".as_slice();
160+
let expected_sha256_1 = Sha256::digest(fake_phase1).into();
161+
162+
// Drive the update to completion.
163+
{
164+
let update_id = Uuid::new_v4();
165+
mgs_client
166+
.sp_component_update(
167+
sp_type,
168+
sp_slot,
169+
sp_component,
170+
1,
171+
&update_id,
172+
fake_phase1,
173+
)
174+
.await
175+
.expect("started slot 1 update");
176+
wait_for_condition(
177+
|| async {
178+
let update_status = mgs_client
179+
.sp_component_update_status(sp_type, sp_slot, sp_component)
180+
.await
181+
.expect("got update status")
182+
.into_inner();
183+
match update_status {
184+
// expected terminal state
185+
SpUpdateStatus::Complete { id } => {
186+
if id == update_id {
187+
Ok(())
188+
} else {
189+
Err(CondCheckError::Failed(format!(
190+
"unexpected complete ID \
191+
(got {id} expected {update_id})"
192+
)))
193+
}
194+
}
195+
196+
// expected intermediate states
197+
SpUpdateStatus::Preparing { .. }
198+
| SpUpdateStatus::InProgress { .. } => {
199+
Err(CondCheckError::NotYet)
200+
}
201+
202+
// never-expect-to-see states
203+
SpUpdateStatus::None
204+
| SpUpdateStatus::Aborted { .. }
205+
| SpUpdateStatus::Failed { .. }
206+
| SpUpdateStatus::RotError { .. } => {
207+
Err(CondCheckError::Failed(format!(
208+
"unexpected status: {update_status:?}"
209+
)))
210+
}
211+
}
212+
},
213+
&Duration::from_millis(100),
214+
&Duration::from_secs(30),
215+
)
216+
.await
217+
.expect("update to sp-sim completed within timeout");
218+
}
219+
220+
// Confirm the simulator wrote the expected data in slot 1.
221+
let slot_1_data = sp_sim.last_host_phase1_update_data(1).await.unwrap();
222+
assert_eq!(*slot_1_data, *fake_phase1);
223+
224+
// Writing an update should have put slot 1 back into the "needs hashing"
225+
// state.
226+
phase1_checker
227+
.assert_status(&[
228+
(0, ComponentFirmwareHashStatus::Hashed(expected_sha256_0)),
229+
(1, ComponentFirmwareHashStatus::HashNotCalculated),
230+
])
231+
.await;
232+
233+
// Start hashing firmware slot 1.
234+
mgs_client
235+
.sp_component_hash_firmware_start(sp_type, sp_slot, sp_component, 1)
236+
.await
237+
.expect("started firmware hashing");
238+
phase1_checker
239+
.assert_status(&[
240+
(0, ComponentFirmwareHashStatus::Hashed(expected_sha256_0)),
241+
(1, ComponentFirmwareHashStatus::HashInProgress),
242+
])
243+
.await;
244+
245+
// Allow hashing to complete.
246+
hashing_complete_sender.complete_next_hashing_attempt();
247+
phase1_checker
248+
.assert_status(&[
249+
(0, ComponentFirmwareHashStatus::Hashed(expected_sha256_0)),
250+
(1, ComponentFirmwareHashStatus::Hashed(expected_sha256_1)),
251+
])
252+
.await;
49253
}

sp-sim/src/gimlet.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5+
use crate::HostFlashHashCompletionSender;
56
use crate::Responsiveness;
67
use crate::SimulatedSp;
78
use crate::config::GimletConfig;
@@ -404,6 +405,25 @@ impl Gimlet {
404405
pub fn last_request_handled(&self) -> Option<SimSpHandledRequest> {
405406
*self.last_request_handled.lock().unwrap()
406407
}
408+
409+
/// Instead of host phase 1 hashing completing after a few seconds, return a
410+
/// handle that can be used to explicitly trigger completion.
411+
///
412+
/// # Panics
413+
///
414+
/// Panics if this `Gimlet` was created with only an RoT instead of a full
415+
/// SP + RoT complex.
416+
pub async fn set_phase1_hash_policy_explicit_control(
417+
&self,
418+
) -> HostFlashHashCompletionSender {
419+
self.handler
420+
.as_ref()
421+
.expect("gimlet was created with SP config")
422+
.lock()
423+
.await
424+
.update_state
425+
.set_phase1_hash_policy_explicit_control()
426+
}
407427
}
408428

409429
struct SerialConsoleTcpTask {

sp-sim/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub use slog::Logger;
2626
use std::net::SocketAddrV6;
2727
use tokio::sync::mpsc;
2828
use tokio::sync::watch;
29+
pub use update::HostFlashHashCompletionSender;
2930

3031
pub const SIM_ROT_BOARD: &str = "SimRot";
3132
pub const SIM_ROT_STAGE0_BOARD: &str = "SimRotStage0";

0 commit comments

Comments
 (0)