Skip to content

Commit 1c53bcd

Browse files
authored
[MGS] Add endpoints for host phase 1 flash hashing (#8593)
This builds on #8584. The main point of this PR is adding two new MGS endpoints ("start" and "get status" for the async host phase1 hashing operation); the bulk of the diff is adding a test that exercises the simulator to show the expected behavior of these endpoints.
1 parent c153a79 commit 1c53bcd

File tree

12 files changed

+672
-8
lines changed

12 files changed

+672
-8
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/gateway-client/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ progenitor::generate_api!(
6060
}),
6161
derives = [schemars::JsonSchema],
6262
patch = {
63+
ComponentFirmwareHashStatus = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6364
HostPhase2RecoveryImageId = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6465
ImageVersion = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6566
RotImageDetails = { derives = [PartialEq, Eq, PartialOrd, Ord] },

gateway-api/src/lib.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use gateway_types::{
1616
SpState,
1717
},
1818
component_details::SpComponentDetails,
19-
host::HostStartupOptions,
19+
host::{ComponentFirmwareHashStatus, HostStartupOptions},
2020
ignition::{IgnitionCommand, SpIgnitionInfo},
2121
rot::{RotCfpa, RotCfpaSlot, RotCmpa, RotState},
2222
sensor::SpSensorReading,
@@ -259,6 +259,41 @@ pub trait GatewayApi {
259259
path: Path<PathSpComponent>,
260260
) -> Result<HttpResponseOk<SpUpdateStatus>, HttpError>;
261261

262+
/// Start computing the hash of a given slot of a component.
263+
///
264+
/// This endpoint is only valid for the `host-boot-flash` component.
265+
///
266+
/// Computing the hash takes several seconds; callers should poll for results
267+
/// using `sp_component_hash_firmware_get()`. In general they should call
268+
/// `sp_component_hash_firmware_get()` first anyway, as the hashes are
269+
/// cached in the SP and may already be ready.
270+
#[endpoint {
271+
method = POST,
272+
path = "/sp/{type}/{slot}/component/{component}/hash/{firmware_slot}",
273+
}]
274+
async fn sp_component_hash_firmware_start(
275+
rqctx: RequestContext<Self::Context>,
276+
path: Path<PathSpComponentFirmwareSlot>,
277+
) -> Result<HttpResponseUpdatedNoContent, HttpError>;
278+
279+
/// Get a computed hash of a given slot of a component.
280+
///
281+
/// This endpoint is only valid for the `host-boot-flash` component.
282+
///
283+
/// Computing the hash takes several seconds; this endpoint returns the
284+
/// current status. If the status is `HashNotStarted`, callers should start
285+
/// hashing using `sp_component_hash_firmware_start()`. If the status is
286+
/// `HashInProgress`, callers should wait a bit then call this endpoint
287+
/// again.
288+
#[endpoint {
289+
method = GET,
290+
path = "/sp/{type}/{slot}/component/{component}/hash/{firmware_slot}",
291+
}]
292+
async fn sp_component_hash_firmware_get(
293+
rqctx: RequestContext<Self::Context>,
294+
path: Path<PathSpComponentFirmwareSlot>,
295+
) -> Result<HttpResponseOk<ComponentFirmwareHashStatus>, HttpError>;
296+
262297
/// Abort any in-progress update an SP component
263298
///
264299
/// Aborting an update to the SP itself is done via the component name
@@ -542,6 +577,19 @@ pub struct PathSpComponent {
542577
pub component: String,
543578
}
544579

580+
#[derive(Deserialize, JsonSchema)]
581+
pub struct PathSpComponentFirmwareSlot {
582+
/// ID for the SP that the gateway service translates into the appropriate
583+
/// port for communicating with the given SP.
584+
#[serde(flatten)]
585+
pub sp: SpIdentifier,
586+
/// ID for the component of the SP; this is the internal identifier used by
587+
/// the SP itself to identify its components.
588+
pub component: String,
589+
/// Firmware slot of the component.
590+
pub firmware_slot: u16,
591+
}
592+
545593
#[derive(Deserialize, JsonSchema)]
546594
pub struct PathSpTaskDumpIndex {
547595
/// ID for the SP that the gateway service translates into the appropriate

gateway-types/src/host.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,18 @@ impl From<StartupOptions> for HostStartupOptions {
5656
}
5757
}
5858
}
59+
60+
#[derive(Serialize, Deserialize, JsonSchema)]
61+
#[serde(tag = "status", rename_all = "snake_case")]
62+
pub enum ComponentFirmwareHashStatus {
63+
/// The hash is not available; the client must issue a separate request to
64+
/// begin calculating the hash.
65+
HashNotCalculated,
66+
/// The hash is currently being calculated; the client should sleep briefly
67+
/// then check again.
68+
///
69+
/// We expect this operation to take a handful of seconds in practice.
70+
HashInProgress,
71+
/// The hash of the given firmware slot.
72+
Hashed { sha256: [u8; 32] },
73+
}

gateway/src/http_entrypoints.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ use dropshot::WebsocketEndpointResult;
2323
use dropshot::WebsocketUpgrade;
2424
use futures::TryFutureExt;
2525
use gateway_api::*;
26+
use gateway_messages::HfError;
2627
use gateway_messages::RotBootInfo;
2728
use gateway_messages::SpComponent;
29+
use gateway_messages::SpError;
2830
use gateway_sp_comms::HostPhase2Provider;
2931
use gateway_sp_comms::VersionedSpState;
3032
use gateway_sp_comms::error::CommunicationError;
@@ -36,6 +38,7 @@ use gateway_types::component::SpComponentList;
3638
use gateway_types::component::SpIdentifier;
3739
use gateway_types::component::SpState;
3840
use gateway_types::component_details::SpComponentDetails;
41+
use gateway_types::host::ComponentFirmwareHashStatus;
3942
use gateway_types::host::HostStartupOptions;
4043
use gateway_types::ignition::SpIgnitionInfo;
4144
use gateway_types::rot::RotCfpa;
@@ -536,6 +539,93 @@ impl GatewayApi for GatewayImpl {
536539
apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await
537540
}
538541

542+
async fn sp_component_hash_firmware_start(
543+
rqctx: RequestContext<Self::Context>,
544+
path: Path<PathSpComponentFirmwareSlot>,
545+
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
546+
let apictx = rqctx.context();
547+
548+
let PathSpComponentFirmwareSlot { sp, component, firmware_slot } =
549+
path.into_inner();
550+
let sp_id = sp.into();
551+
let handler = async {
552+
let sp = apictx.mgmt_switch.sp(sp_id)?;
553+
let component = component_from_str(&component)?;
554+
555+
if component != SpComponent::HOST_CPU_BOOT_FLASH {
556+
return Err(HttpError::for_bad_request(
557+
Some("RequestUnsupportedForComponent".to_string()),
558+
"Only the host boot flash can be hashed".into(),
559+
));
560+
}
561+
562+
// The SP (reasonably!) returns a `HashInProgress` error if we try
563+
// to start hashing while hashing is being calculated, but we're
564+
// presenting an idempotent "start hashing if it isn't started"
565+
// endpoint instead. Swallow that error.
566+
match sp.start_host_flash_hash(firmware_slot).await {
567+
Ok(())
568+
| Err(CommunicationError::SpError(SpError::Hf(
569+
HfError::HashInProgress,
570+
))) => Ok(HttpResponseUpdatedNoContent()),
571+
Err(err) => {
572+
Err(SpCommsError::SpCommunicationFailed { sp: sp_id, err }
573+
.into())
574+
}
575+
}
576+
};
577+
apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await
578+
}
579+
580+
async fn sp_component_hash_firmware_get(
581+
rqctx: RequestContext<Self::Context>,
582+
path: Path<PathSpComponentFirmwareSlot>,
583+
) -> Result<HttpResponseOk<ComponentFirmwareHashStatus>, HttpError> {
584+
let apictx = rqctx.context();
585+
586+
let PathSpComponentFirmwareSlot { sp, component, firmware_slot } =
587+
path.into_inner();
588+
let sp_id = sp.into();
589+
let handler = async {
590+
let sp = apictx.mgmt_switch.sp(sp_id)?;
591+
let component = component_from_str(&component)?;
592+
593+
if component != SpComponent::HOST_CPU_BOOT_FLASH {
594+
return Err(HttpError::for_bad_request(
595+
Some("RequestUnsupportedForComponent".to_string()),
596+
"Only the host boot flash can be hashed".into(),
597+
));
598+
}
599+
600+
let status = match sp.get_host_flash_hash(firmware_slot).await {
601+
// success
602+
Ok(sha256) => ComponentFirmwareHashStatus::Hashed { sha256 },
603+
604+
// expected failure: hash needs to be calculated (or
605+
// recalculated; either way the client operation is the same)
606+
Err(CommunicationError::SpError(SpError::Hf(
607+
HfError::HashUncalculated | HfError::RecalculateHash,
608+
))) => ComponentFirmwareHashStatus::HashNotCalculated,
609+
610+
// expected failure: hashing is currently in progress; client
611+
// needs to wait and try again later
612+
Err(CommunicationError::SpError(SpError::Hf(
613+
HfError::HashInProgress,
614+
))) => ComponentFirmwareHashStatus::HashInProgress,
615+
616+
// other errors are failures
617+
Err(err) => {
618+
return Err(HttpError::from(
619+
SpCommsError::SpCommunicationFailed { sp: sp_id, err },
620+
));
621+
}
622+
};
623+
624+
Ok(HttpResponseOk(status))
625+
};
626+
apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await
627+
}
628+
539629
async fn sp_component_update_abort(
540630
rqctx: RequestContext<Self::Context>,
541631
path: Path<PathSpComponent>,

nexus/mgs-updates/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dropshot.workspace = true
4141
gateway-messages.workspace = true
4242
gateway-test-utils.workspace = true
4343
hubtools.workspace = true
44+
omicron-test-utils.workspace = true
4445
rand.workspace = true
4546
repo-depot-api.workspace = true
4647
sp-sim.workspace = true

0 commit comments

Comments
 (0)