From b867be759dd54fb5d0f5eabd72a2226ba283cc5d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 8 Jul 2025 10:50:01 -0400 Subject: [PATCH 1/7] OmicronSledConfig: add desired host phase 2 contents --- dev-tools/omdb/src/bin/omdb/db.rs | 19 ++++++++ nexus-sled-agent-shared/src/inventory.rs | 42 ++++++++++++++++++ nexus/db-model/src/inventory.rs | 44 +++++++++++++++++++ nexus/db-model/src/schema_versions.rs | 3 +- .../db-queries/src/db/datastore/inventory.rs | 2 + nexus/db-schema/src/schema.rs | 2 + nexus/inventory/src/collector.rs | 8 ++++ nexus/inventory/src/examples.rs | 4 ++ nexus/test-utils/src/lib.rs | 3 ++ nexus/types/src/deployment.rs | 3 ++ schema/crdb/dbinit.sql | 7 ++- .../sled-config-desired-host-phase-2/up1.sql | 2 + .../sled-config-desired-host-phase-2/up2.sql | 2 + sled-agent/config-reconciler/src/ledger.rs | 8 ++++ .../src/ledger/legacy_configs.rs | 3 ++ sled-agent/src/rack_setup/service.rs | 5 ++- sled-agent/src/sim/sled_agent.rs | 7 +-- 17 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 schema/crdb/sled-config-desired-host-phase-2/up1.sql create mode 100644 schema/crdb/sled-config-desired-host-phase-2/up2.sql diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 0a635c8ffc9..824041a6f35 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -131,6 +131,7 @@ use nexus_sled_agent_shared::inventory::BootPartitionDetails; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredContents; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use nexus_sled_agent_shared::inventory::OrphanedDataset; @@ -7567,12 +7568,30 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { datasets, zones, remove_mupdate_override, + host_phase_2, } = config; println!("\n{label} SLED CONFIG"); println!(" generation: {}", generation); println!(" remove_mupdate_override: {remove_mupdate_override:?}"); + let display_host_phase_2_desired = |desired| match desired { + HostPhase2DesiredContents::CurrentContents => { + Cow::Borrowed("keep existing current contents") + } + HostPhase2DesiredContents::Artifact(artifact) => { + Cow::Owned(format!("artifact {artifact}")) + } + }; + println!( + " desired host phase 2 slot a: {}", + display_host_phase_2_desired(host_phase_2.slot_a) + ); + println!( + " desired host phase 2 slot b: {}", + display_host_phase_2_desired(host_phase_2.slot_b) + ); + if disks.is_empty() { println!(" disk config empty"); } else { diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 9bf57335d54..f47a45cbcf9 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -576,6 +576,45 @@ pub enum SledRole { Scrimlet, } +/// Describes the desired contents of a host phase 2 slot (i.e., the boot +/// partition on one of the internal M.2 drives). +#[derive( + Clone, Copy, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, +)] +pub enum HostPhase2DesiredContents { + /// Do not change the current contents. + /// + /// We use this value when we've detected a sled has been mupdated (and we + /// don't want to overwrite phase 2 images until we understand how to + /// recover from that mupdate) and as the default value when reading an + /// [`OmicronSledConfig`] that was ledgered before this concept existed. + CurrentContents, + + /// Set the phase 2 slot to the given artifact. + /// + /// The artifact will come from an unpacked and distributed TUF repo. + Artifact(ArtifactHash), +} + +/// Describes the desired contents for both host phase 2 slots. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] +pub struct HostPhase2DesiredSlots { + pub slot_a: HostPhase2DesiredContents, + pub slot_b: HostPhase2DesiredContents, +} + +impl HostPhase2DesiredSlots { + /// Return a `HostPhase2DesiredSlots` with both slots set to + /// [`HostPhase2DesiredContents::CurrentContents`]; i.e., "make no changes + /// to the current contents of either slot". + pub const fn current_contents() -> Self { + Self { + slot_a: HostPhase2DesiredContents::CurrentContents, + slot_b: HostPhase2DesiredContents::CurrentContents, + } + } +} + /// Describes the set of Reconfigurator-managed configuration elements of a sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] pub struct OmicronSledConfig { @@ -584,6 +623,8 @@ pub struct OmicronSledConfig { pub datasets: IdMap, pub zones: IdMap, pub remove_mupdate_override: Option, + #[serde(default = "HostPhase2DesiredSlots::current_contents")] + pub host_phase_2: HostPhase2DesiredSlots, } impl Default for OmicronSledConfig { @@ -594,6 +635,7 @@ impl Default for OmicronSledConfig { datasets: IdMap::default(), zones: IdMap::default(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } } diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index acbaae1b7bb..0b899a36d8f 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -44,6 +44,8 @@ use nexus_db_schema::schema::{ use nexus_sled_agent_shared::inventory::BootImageHeader; use nexus_sled_agent_shared::inventory::BootPartitionDetails; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredContents; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::MupdateOverrideBootInventory; use nexus_sled_agent_shared::inventory::MupdateOverrideInventory; use nexus_sled_agent_shared::inventory::MupdateOverrideNonBootInventory; @@ -1944,6 +1946,9 @@ pub struct InvOmicronSledConfig { pub id: DbTypedUuid, pub generation: Generation, pub remove_mupdate_override: Option>, + + #[diesel(embed)] + pub host_phase_2: DbHostPhase2DesiredSlots, } impl InvOmicronSledConfig { @@ -1952,12 +1957,51 @@ impl InvOmicronSledConfig { id: OmicronSledConfigUuid, generation: external::Generation, remove_mupdate_override: Option, + host_phase_2: HostPhase2DesiredSlots, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), id: id.into(), generation: Generation(generation), remove_mupdate_override: remove_mupdate_override.map(From::from), + host_phase_2: host_phase_2.into(), + } + } +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_omicron_sled_config)] +pub struct DbHostPhase2DesiredSlots { + pub host_phase_2_desired_slot_a: Option, + pub host_phase_2_desired_slot_b: Option, +} + +impl From for DbHostPhase2DesiredSlots { + fn from(value: HostPhase2DesiredSlots) -> Self { + let remap = |desired| match desired { + HostPhase2DesiredContents::CurrentContents => None, + HostPhase2DesiredContents::Artifact(artifact) => { + Some(ArtifactHash(artifact)) + } + }; + Self { + host_phase_2_desired_slot_a: remap(value.slot_a), + host_phase_2_desired_slot_b: remap(value.slot_b), + } + } +} + +impl From for HostPhase2DesiredSlots { + fn from(value: DbHostPhase2DesiredSlots) -> Self { + let remap = |maybe_artifact| match maybe_artifact { + None => HostPhase2DesiredContents::CurrentContents, + Some(ArtifactHash(artifact)) => { + HostPhase2DesiredContents::Artifact(artifact) + } + }; + Self { + slot_a: remap(value.host_phase_2_desired_slot_a), + slot_b: remap(value.host_phase_2_desired_slot_b), } } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 12bbda4723a..2c1f5bd7f96 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(157, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(158, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(158, "sled-config-desired-host-phase-2"), KnownVersion::new(157, "user-data-export"), KnownVersion::new(156, "boot-partitions-inventory"), KnownVersion::new(155, "vpc-firewall-icmp"), diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 925d6c1138a..8797fd0e520 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2774,6 +2774,7 @@ impl DataStore { disks: IdMap::default(), datasets: IdMap::default(), zones: IdMap::default(), + host_phase_2: sled_config.host_phase_2.into(), }, }); } @@ -3871,6 +3872,7 @@ impl ConfigReconcilerRows { sled_config_id, config.generation, config.remove_mupdate_override, + config.host_phase_2.clone(), )); self.disks.extend(config.disks.iter().map(|disk| { InvOmicronSledConfigDisk::new( diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 3d8f2d2fd7f..a813336c699 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1788,6 +1788,8 @@ table! { generation -> Int8, remove_mupdate_override -> Nullable, + host_phase_2_desired_slot_a -> Nullable, + host_phase_2_desired_slot_b -> Nullable, } } diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 36aff0f2c8a..e624c03a538 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -410,6 +410,7 @@ mod test { use gateway_messages::SpPort; use id_map::IdMap; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; + use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; @@ -434,6 +435,7 @@ mod test { datasets, zones, remove_mupdate_override, + host_phase_2, } = config; writeln!(s, " generation: {generation}").unwrap(); @@ -442,6 +444,11 @@ mod test { " remove_mupdate_override: {remove_mupdate_override:?}" ) .unwrap(); + { + let HostPhase2DesiredSlots { slot_a, slot_b } = host_phase_2; + writeln!(s, " host_phase_2.slot_a: {slot_a:?}").unwrap(); + writeln!(s, " host_phase_2.slot_b: {slot_b:?}").unwrap(); + } for disk in disks { writeln!( s, @@ -687,6 +694,7 @@ mod test { .into_iter() .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }) .await .expect("failed to write initial zone version to fake sled agent"); diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index cbf035ea1ed..453380f8ecb 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -21,6 +21,7 @@ use nexus_sled_agent_shared::inventory::BootImageHeader; use nexus_sled_agent_shared::inventory::BootPartitionDetails; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; use nexus_sled_agent_shared::inventory::InventoryDisk; @@ -345,6 +346,7 @@ pub fn representative() -> Representative { datasets: Default::default(), zones: sled14.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; let sled16 = OmicronSledConfig { generation: sled16.generation, @@ -352,6 +354,7 @@ pub fn representative() -> Representative { datasets: Default::default(), zones: sled16.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; let sled17 = OmicronSledConfig { generation: sled17.generation, @@ -359,6 +362,7 @@ pub fn representative() -> Representative { datasets: Default::default(), zones: sled17.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; // Create iterator producing fixed IDs. diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index f512c40897a..24b53f8ab4f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -34,6 +34,7 @@ use nexus_config::MgdConfig; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_config::NexusConfig; use nexus_db_queries::db::pub_test_utils::crdb; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_sled_agent_shared::recovery_silo::RecoverySiloConfig; @@ -1147,6 +1148,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .map(From::from) .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }) .await .expect("Failed to configure sled agent with our zones"); @@ -1186,6 +1188,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { datasets: IdMap::default(), zones: IdMap::default(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }) .await .expect("Failed to configure sled agent with our zones"); diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index eca14e08e3a..a0ce04cf02a 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -20,6 +20,7 @@ pub use crate::inventory::ZpoolName; use blueprint_diff::ClickhouseClusterConfigDiffTablesForSingleBlueprint; use blueprint_display::BpDatasetsTableSchema; use daft::Diffable; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; @@ -743,6 +744,8 @@ impl BlueprintSledConfig { }) .collect(), remove_mupdate_override: self.remove_mupdate_override, + // TODO-john Open issue to add this to the blueprint + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 3d35ceffc11..a4f6f34ca13 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3922,6 +3922,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( -- remove mupdate override ID, if set remove_mupdate_override UUID, + -- desired artifact hash for internal disk slots' boot partitions + -- NULL is translated to `HostPhase2DesiredContents::CurrentContents` + host_phase_2_desired_slot_a STRING(64), + host_phase_2_desired_slot_b STRING(64), + PRIMARY KEY (inv_collection_id, id) ); @@ -6220,7 +6225,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '157.0.0', NULL) + (TRUE, NOW(), NOW(), '158.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/sled-config-desired-host-phase-2/up1.sql b/schema/crdb/sled-config-desired-host-phase-2/up1.sql new file mode 100644 index 00000000000..176cebe1ebc --- /dev/null +++ b/schema/crdb/sled-config-desired-host-phase-2/up1.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_omicron_sled_config +ADD COLUMN IF NOT EXISTS host_phase_2_desired_slot_a STRING(64); diff --git a/schema/crdb/sled-config-desired-host-phase-2/up2.sql b/schema/crdb/sled-config-desired-host-phase-2/up2.sql new file mode 100644 index 00000000000..02d9f98f696 --- /dev/null +++ b/schema/crdb/sled-config-desired-host-phase-2/up2.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_omicron_sled_config +ADD COLUMN IF NOT EXISTS host_phase_2_desired_slot_b STRING(64); diff --git a/sled-agent/config-reconciler/src/ledger.rs b/sled-agent/config-reconciler/src/ledger.rs index e2710359f8f..39410ab949f 100644 --- a/sled-agent/config-reconciler/src/ledger.rs +++ b/sled-agent/config-reconciler/src/ledger.rs @@ -648,6 +648,7 @@ mod tests { use camino_tempfile::Utf8TempDir; use id_map::IdMap; use illumos_utils::zpool::ZpoolName; + use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use nexus_sled_agent_shared::inventory::OmicronZoneType; @@ -861,6 +862,7 @@ mod tests { datasets: IdMap::default(), zones: IdMap::default(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } @@ -1062,6 +1064,7 @@ mod tests { .into_iter() .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; // The ledger task should reject this config due to a missing artifact. @@ -1088,8 +1091,11 @@ mod tests { .into_iter() .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; + // TODO-john also test host phase 2 artifacts! + test_harness .task_handle .set_new_config(config) @@ -1152,6 +1158,8 @@ mod tests { .expect("no ledger task error") .expect("config is valid"); + // TODO-john also test host phase 2 artifacts! + logctx.cleanup_successful(); } diff --git a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs index 587303397ad..04a535bec8c 100644 --- a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs +++ b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs @@ -6,6 +6,7 @@ //! datasets, and zones) into the current unified [`OmicronSledConfig`]. use camino::Utf8PathBuf; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use omicron_common::api::external::Generation; @@ -213,6 +214,8 @@ fn merge_old_configs( zones: zones.zones.into_iter().map(|z| z.zone).collect(), // Old configs are pre-mupdate overrides. remove_mupdate_override: None, + // Old configs are pre-host-phase-2 knowledge. + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 55d5eb15e76..b70875802ce 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -89,8 +89,8 @@ use nexus_client::{ Client as NexusClient, Error as NexusError, types as NexusTypes, }; use nexus_sled_agent_shared::inventory::{ - ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, - OmicronZoneType, OmicronZonesConfig, + ConfigReconcilerInventoryResult, HostPhase2DesiredSlots, OmicronSledConfig, + OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::deployment::{ Blueprint, BlueprintDatasetConfig, BlueprintDatasetDisposition, @@ -592,6 +592,7 @@ impl ServiceInner { datasets: config.datasets.values().cloned().collect(), zones: zones_config.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; self.set_config_on_sled(*sled_address, sled_config).await?; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 3ded1197d25..fdfce7eb2da 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -24,9 +24,9 @@ use dropshot::Body; use dropshot::HttpError; use futures::Stream; use nexus_sled_agent_shared::inventory::{ - ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, - InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronZonesConfig, - SledRole, ZoneImageResolverInventory, + ConfigReconcilerInventoryStatus, HostPhase2DesiredSlots, Inventory, + InventoryDataset, InventoryDisk, InventoryZpool, OmicronSledConfig, + OmicronZonesConfig, SledRole, ZoneImageResolverInventory, }; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -741,6 +741,7 @@ impl SledAgent { datasets: datasets_config.datasets.into_values().collect(), zones: zones_config.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; Ok(Inventory { From a3dc67489c0eaae370889c572bedda368e18a82b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 7 Jul 2025 17:02:02 -0400 Subject: [PATCH 2/7] openapi --- dev-tools/omdb/src/bin/omdb/db.rs | 4 +- nexus-sled-agent-shared/src/inventory.rs | 4 +- nexus/db-model/src/inventory.rs | 8 +-- openapi/sled-agent.json | 71 ++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 7 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 824041a6f35..6de322219b7 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -7579,8 +7579,8 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { HostPhase2DesiredContents::CurrentContents => { Cow::Borrowed("keep existing current contents") } - HostPhase2DesiredContents::Artifact(artifact) => { - Cow::Owned(format!("artifact {artifact}")) + HostPhase2DesiredContents::Artifact { hash } => { + Cow::Owned(format!("artifact {hash}")) } }; println!( diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index f47a45cbcf9..292bef04509 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -581,6 +581,7 @@ pub enum SledRole { #[derive( Clone, Copy, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, )] +#[serde(tag = "type", rename_all = "snake_case")] pub enum HostPhase2DesiredContents { /// Do not change the current contents. /// @@ -593,11 +594,12 @@ pub enum HostPhase2DesiredContents { /// Set the phase 2 slot to the given artifact. /// /// The artifact will come from an unpacked and distributed TUF repo. - Artifact(ArtifactHash), + Artifact { hash: ArtifactHash }, } /// Describes the desired contents for both host phase 2 slots. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] pub struct HostPhase2DesiredSlots { pub slot_a: HostPhase2DesiredContents, pub slot_b: HostPhase2DesiredContents, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 0b899a36d8f..830bba8b70d 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -1980,8 +1980,8 @@ impl From for DbHostPhase2DesiredSlots { fn from(value: HostPhase2DesiredSlots) -> Self { let remap = |desired| match desired { HostPhase2DesiredContents::CurrentContents => None, - HostPhase2DesiredContents::Artifact(artifact) => { - Some(ArtifactHash(artifact)) + HostPhase2DesiredContents::Artifact { hash } => { + Some(ArtifactHash(hash)) } }; Self { @@ -1995,8 +1995,8 @@ impl From for HostPhase2DesiredSlots { fn from(value: DbHostPhase2DesiredSlots) -> Self { let remap = |maybe_artifact| match maybe_artifact { None => HostPhase2DesiredContents::CurrentContents, - Some(ArtifactHash(artifact)) => { - HostPhase2DesiredContents::Artifact(artifact) + Some(ArtifactHash(hash)) => { + HostPhase2DesiredContents::Artifact { hash } } }; Self { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 6f3ea02b617..78a9fd34c23 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -4722,6 +4722,62 @@ } ] }, + "HostPhase2DesiredContents": { + "description": "Describes the desired contents of a host phase 2 slot (i.e., the boot partition on one of the internal M.2 drives).", + "oneOf": [ + { + "description": "Do not change the current contents.\n\nWe use this value when we've detected a sled has been mupdated (and we don't want to overwrite phase 2 images until we understand how to recover from that mupdate) and as the default value when reading an [`OmicronSledConfig`] that was ledgered before this concept existed.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "current_contents" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Set the phase 2 slot to the given artifact.\n\nThe artifact will come from an unpacked and distributed TUF repo.", + "type": "object", + "properties": { + "hash": { + "type": "string", + "format": "hex string (32 bytes)" + }, + "type": { + "type": "string", + "enum": [ + "artifact" + ] + } + }, + "required": [ + "hash", + "type" + ] + } + ] + }, + "HostPhase2DesiredSlots": { + "description": "Describes the desired contents for both host phase 2 slots.", + "type": "object", + "properties": { + "slot_a": { + "$ref": "#/components/schemas/HostPhase2DesiredContents" + }, + "slot_b": { + "$ref": "#/components/schemas/HostPhase2DesiredContents" + } + }, + "required": [ + "slot_a", + "slot_b" + ] + }, "HostPortConfig": { "type": "object", "properties": { @@ -5874,6 +5930,21 @@ "generation": { "$ref": "#/components/schemas/Generation" }, + "host_phase_2": { + "default": { + "slot_a": { + "type": "current_contents" + }, + "slot_b": { + "type": "current_contents" + } + }, + "allOf": [ + { + "$ref": "#/components/schemas/HostPhase2DesiredSlots" + } + ] + }, "remove_mupdate_override": { "nullable": true, "allOf": [ From f55d70b2a591c580b5d4056b28f3fd72391c2949 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 8 Jul 2025 11:15:33 -0400 Subject: [PATCH 3/7] replace TODO with issue link --- nexus/types/src/deployment.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index a0ce04cf02a..72e35004e0a 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -744,7 +744,8 @@ impl BlueprintSledConfig { }) .collect(), remove_mupdate_override: self.remove_mupdate_override, - // TODO-john Open issue to add this to the blueprint + // TODO BlueprintSledConfig should have a corresponding field. + // https://github.com/oxidecomputer/omicron/issues/8542 host_phase_2: HostPhase2DesiredSlots::current_contents(), } } From 3d4f72c1fe5d31c3fe0280663a9a1bb9d89889f9 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 8 Jul 2025 13:05:19 -0400 Subject: [PATCH 4/7] expectorate --- nexus/inventory/tests/output/collector_basic.txt | 4 ++++ nexus/inventory/tests/output/collector_sled_agent_errors.txt | 2 ++ 2 files changed, 6 insertions(+) diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index 3eed7f61adb..893e28e674d 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -92,6 +92,8 @@ sled agents found: ledgered sled config: generation: 3 remove_mupdate_override: None + host_phase_2.slot_a: CurrentContents + host_phase_2.slot_b: CurrentContents zone 8b88a56f-3eb6-4d80-ba42-75d867bc427d type oximeter no completed reconciliation reconciler task not yet run @@ -100,6 +102,8 @@ sled agents found: ledgered sled config: generation: 3 remove_mupdate_override: None + host_phase_2.slot_a: CurrentContents + host_phase_2.slot_b: CurrentContents zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter no completed reconciliation reconciler task not yet run diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt index e4f3efc8739..eabcf16bc51 100644 --- a/nexus/inventory/tests/output/collector_sled_agent_errors.txt +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -91,6 +91,8 @@ sled agents found: ledgered sled config: generation: 3 remove_mupdate_override: None + host_phase_2.slot_a: CurrentContents + host_phase_2.slot_b: CurrentContents zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter no completed reconciliation reconciler task not yet run From 8027c0d0b45bc9e19687cc3179041d84281b63e2 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 8 Jul 2025 13:10:50 -0400 Subject: [PATCH 5/7] expectorate again --- .../test-data/expectorate/merged-sled-config.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json b/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json index 3c238f0190b..c985a09af81 100644 --- a/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json +++ b/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json @@ -810,5 +810,13 @@ } } }, - "remove_mupdate_override": null + "remove_mupdate_override": null, + "host_phase_2": { + "slot_a": { + "type": "current_contents" + }, + "slot_b": { + "type": "current_contents" + } + } } \ No newline at end of file From 1e6ce9f91d5ea0ebd1387c33831e3ac465aa4ad6 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 8 Jul 2025 15:23:38 -0400 Subject: [PATCH 6/7] fix string --- dev-tools/omdb/src/bin/omdb/db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 6de322219b7..938e9094499 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -7577,7 +7577,7 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { let display_host_phase_2_desired = |desired| match desired { HostPhase2DesiredContents::CurrentContents => { - Cow::Borrowed("keep existing current contents") + Cow::Borrowed("keep current contents") } HostPhase2DesiredContents::Artifact { hash } => { Cow::Owned(format!("artifact {hash}")) From 8f6cb7137ce4abfe83d406345b301844cd09f55c Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 8 Jul 2025 15:24:09 -0400 Subject: [PATCH 7/7] swrite! instead of write!().unwrap() --- Cargo.lock | 1 + nexus/inventory/Cargo.toml | 1 + nexus/inventory/src/collector.rs | 155 +++++++++++++++---------------- 3 files changed, 76 insertions(+), 81 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b79a910eed8..6a0d9cb8c52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6436,6 +6436,7 @@ dependencies = [ "sled-agent-zone-images-examples", "slog", "strum", + "swrite", "thiserror 2.0.12", "tokio", "tufaceous-artifact", diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 420c4b8e54d..9aac7d25833 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -32,6 +32,7 @@ sled-agent-types.workspace = true sled-agent-zone-images-examples.workspace = true slog.workspace = true strum.workspace = true +swrite.workspace = true thiserror.workspace = true tufaceous-artifact.workspace = true typed-rng.workspace = true diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index e624c03a538..86ec9322682 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -423,10 +423,12 @@ mod test { use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use slog::o; - use std::fmt::Write; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::sync::Arc; + use swrite::SWrite as _; + use swrite::swrite; + use swrite::swriteln; fn dump_sled_config(s: &mut String, config: &OmicronSledConfig) { let OmicronSledConfig { @@ -438,45 +440,41 @@ mod test { host_phase_2, } = config; - writeln!(s, " generation: {generation}").unwrap(); - writeln!( + swriteln!(s, " generation: {generation}"); + swriteln!( s, " remove_mupdate_override: {remove_mupdate_override:?}" - ) - .unwrap(); + ); { let HostPhase2DesiredSlots { slot_a, slot_b } = host_phase_2; - writeln!(s, " host_phase_2.slot_a: {slot_a:?}").unwrap(); - writeln!(s, " host_phase_2.slot_b: {slot_b:?}").unwrap(); + swriteln!(s, " host_phase_2.slot_a: {slot_a:?}"); + swriteln!(s, " host_phase_2.slot_b: {slot_b:?}"); } for disk in disks { - writeln!( + swriteln!( s, " disk {}: {} / {} / {}", disk.id, disk.identity.vendor, disk.identity.model, disk.identity.serial - ) - .unwrap(); + ); } for dataset in datasets { - writeln!( + swriteln!( s, " dataset {}: {}", dataset.id, dataset.name.full_name() - ) - .unwrap(); + ); } for zone in zones { - writeln!( + swriteln!( s, " zone {} type {}", zone.id, zone.zone_type.kind().report_str(), - ) - .unwrap(); + ); } } @@ -489,143 +487,138 @@ mod test { // depends on what the serialization is for. It's easy enough to just // print what we want here. let mut s = String::new(); - write!(&mut s, "baseboards:\n").unwrap(); + swrite!(s, "baseboards:\n"); for b in &collection.baseboards { - write!( - &mut s, + swrite!( + s, " part {:?} serial {:?}\n", - b.part_number, b.serial_number - ) - .unwrap(); + b.part_number, + b.serial_number + ); } - write!(&mut s, "\ncabooses:\n").unwrap(); + swrite!(s, "\ncabooses:\n"); for c in &collection.cabooses { - write!( - &mut s, + swrite!( + s, " board {:?} name {:?} version {:?} git_commit {:?} sign {:?}\n", - c.board, c.name, c.version, c.git_commit, c.sign, - ) - .unwrap(); + c.board, + c.name, + c.version, + c.git_commit, + c.sign, + ); } - write!(&mut s, "\nrot pages:\n").unwrap(); + swrite!(s, "\nrot pages:\n"); for p in &collection.rot_pages { - write!(&mut s, " data_base64 {:?}\n", p.data_base64).unwrap(); + swrite!(s, " data_base64 {:?}\n", p.data_base64); } // All we really need to check here is that we're reporting the right // SPs, RoTs, and cabooses. The actual SP data, RoT data, and caboose // data comes straight from MGS. And proper handling of that data is // tested in the builder. - write!(&mut s, "\nSPs:\n").unwrap(); + swrite!(s, "\nSPs:\n"); for (bb, _) in &collection.sps { - write!( - &mut s, + swrite!( + s, " baseboard part {:?} serial {:?}\n", - bb.part_number, bb.serial_number, - ) - .unwrap(); + bb.part_number, + bb.serial_number, + ); } - write!(&mut s, "\nRoTs:\n").unwrap(); + swrite!(s, "\nRoTs:\n"); for (bb, _) in &collection.rots { - write!( - &mut s, + swrite!( + s, " baseboard part {:?} serial {:?}\n", - bb.part_number, bb.serial_number, - ) - .unwrap(); + bb.part_number, + bb.serial_number, + ); } - write!(&mut s, "\ncabooses found:\n").unwrap(); + swrite!(s, "\ncabooses found:\n"); for (kind, bb_to_found) in &collection.cabooses_found { for (bb, found) in bb_to_found { - write!( - &mut s, + swrite!( + s, " {:?} baseboard part {:?} serial {:?}: board {:?}\n", - kind, bb.part_number, bb.serial_number, found.caboose.board, - ) - .unwrap(); + kind, + bb.part_number, + bb.serial_number, + found.caboose.board, + ); } } - write!(&mut s, "\nrot pages found:\n").unwrap(); + swrite!(s, "\nrot pages found:\n"); for (kind, bb_to_found) in &collection.rot_pages_found { for (bb, found) in bb_to_found { - write!( - &mut s, + swrite!( + s, " {:?} baseboard part {:?} serial {:?}: \ data_base64 {:?}\n", kind, bb.part_number, bb.serial_number, found.page.data_base64 - ) - .unwrap(); + ); } } - write!(&mut s, "\nsled agents found:\n").unwrap(); + swrite!(s, "\nsled agents found:\n"); for sled_info in &collection.sled_agents { - write!( - &mut s, + swrite!( + s, " sled {} ({:?})\n", - sled_info.sled_id, sled_info.sled_role - ) - .unwrap(); - write!(&mut s, " baseboard {:?}\n", sled_info.baseboard_id) - .unwrap(); + sled_info.sled_id, + sled_info.sled_role + ); + swrite!(s, " baseboard {:?}\n", sled_info.baseboard_id); if let Some(config) = &sled_info.ledgered_sled_config { - writeln!(&mut s, " ledgered sled config:").unwrap(); + swriteln!(s, " ledgered sled config:"); dump_sled_config(&mut s, config); } else { - writeln!(&mut s, " no ledgered sled config").unwrap(); + swriteln!(s, " no ledgered sled config"); } if let Some(last_reconciliation) = &sled_info.last_reconciliation { - writeln!(&mut s, " last reconciled config:").unwrap(); + swriteln!(s, " last reconciled config:"); dump_sled_config( &mut s, &last_reconciliation.last_reconciled_config, ); for (id, result) in &last_reconciliation.external_disks { - writeln!(&mut s, " result for disk {id}: {result:?}") - .unwrap(); + swriteln!(s, " result for disk {id}: {result:?}"); } for (id, result) in &last_reconciliation.datasets { - writeln!(&mut s, " result for dataset {id}: {result:?}") - .unwrap(); + swriteln!(s, " result for dataset {id}: {result:?}"); } for (id, result) in &last_reconciliation.zones { - writeln!(&mut s, " result for zone {id}: {result:?}") - .unwrap(); + swriteln!(s, " result for zone {id}: {result:?}"); } } else { - writeln!(&mut s, " no completed reconciliation").unwrap(); + swriteln!(s, " no completed reconciliation"); } match &sled_info.reconciler_status { ConfigReconcilerInventoryStatus::NotYetRun => { - writeln!(&mut s, " reconciler task not yet run") - .unwrap(); + swriteln!(s, " reconciler task not yet run"); } ConfigReconcilerInventoryStatus::Running { config, .. } => { - writeln!( - &mut s, - " reconciler task running with config:" - ) - .unwrap(); + swriteln!(s, " reconciler task running with config:"); dump_sled_config(&mut s, config); } ConfigReconcilerInventoryStatus::Idle { .. } => { - writeln!(&mut s, " reconciler task idle").unwrap(); + swriteln!(s, " reconciler task idle"); } } } - write!(&mut s, "\nerrors:\n").unwrap(); + swrite!(s, "\nerrors:\n"); let os_error_re = regex::Regex::new(r"os error \d+").unwrap(); let comm_error_re = regex::Regex::new(r"Communication Error.*").unwrap(); @@ -642,7 +635,7 @@ mod test { // general sense. let message = comm_error_re .replace_all(&message, "Communication Error <>"); - write!(&mut s, "error: {}\n", message).unwrap(); + swrite!(s, "error: {}\n", message); } s