diff --git a/Cargo.lock b/Cargo.lock index b79a910eed8..6a0d9cb8c52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6436,6 +6436,7 @@ dependencies = [ "sled-agent-zone-images-examples", "slog", "strum", + "swrite", "thiserror 2.0.12", "tokio", "tufaceous-artifact", diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 0a635c8ffc9..938e9094499 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -131,6 +131,7 @@ use nexus_sled_agent_shared::inventory::BootPartitionDetails; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredContents; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use nexus_sled_agent_shared::inventory::OrphanedDataset; @@ -7567,12 +7568,30 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { datasets, zones, remove_mupdate_override, + host_phase_2, } = config; println!("\n{label} SLED CONFIG"); println!(" generation: {}", generation); println!(" remove_mupdate_override: {remove_mupdate_override:?}"); + let display_host_phase_2_desired = |desired| match desired { + HostPhase2DesiredContents::CurrentContents => { + Cow::Borrowed("keep current contents") + } + HostPhase2DesiredContents::Artifact { hash } => { + Cow::Owned(format!("artifact {hash}")) + } + }; + println!( + " desired host phase 2 slot a: {}", + display_host_phase_2_desired(host_phase_2.slot_a) + ); + println!( + " desired host phase 2 slot b: {}", + display_host_phase_2_desired(host_phase_2.slot_b) + ); + if disks.is_empty() { println!(" disk config empty"); } else { diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 9bf57335d54..292bef04509 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -576,6 +576,47 @@ pub enum SledRole { Scrimlet, } +/// Describes the desired contents of a host phase 2 slot (i.e., the boot +/// partition on one of the internal M.2 drives). +#[derive( + Clone, Copy, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum HostPhase2DesiredContents { + /// Do not change the current contents. + /// + /// We use this value when we've detected a sled has been mupdated (and we + /// don't want to overwrite phase 2 images until we understand how to + /// recover from that mupdate) and as the default value when reading an + /// [`OmicronSledConfig`] that was ledgered before this concept existed. + CurrentContents, + + /// Set the phase 2 slot to the given artifact. + /// + /// The artifact will come from an unpacked and distributed TUF repo. + Artifact { hash: ArtifactHash }, +} + +/// Describes the desired contents for both host phase 2 slots. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub struct HostPhase2DesiredSlots { + pub slot_a: HostPhase2DesiredContents, + pub slot_b: HostPhase2DesiredContents, +} + +impl HostPhase2DesiredSlots { + /// Return a `HostPhase2DesiredSlots` with both slots set to + /// [`HostPhase2DesiredContents::CurrentContents`]; i.e., "make no changes + /// to the current contents of either slot". + pub const fn current_contents() -> Self { + Self { + slot_a: HostPhase2DesiredContents::CurrentContents, + slot_b: HostPhase2DesiredContents::CurrentContents, + } + } +} + /// Describes the set of Reconfigurator-managed configuration elements of a sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] pub struct OmicronSledConfig { @@ -584,6 +625,8 @@ pub struct OmicronSledConfig { pub datasets: IdMap, pub zones: IdMap, pub remove_mupdate_override: Option, + #[serde(default = "HostPhase2DesiredSlots::current_contents")] + pub host_phase_2: HostPhase2DesiredSlots, } impl Default for OmicronSledConfig { @@ -594,6 +637,7 @@ impl Default for OmicronSledConfig { datasets: IdMap::default(), zones: IdMap::default(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } } diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index acbaae1b7bb..830bba8b70d 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -44,6 +44,8 @@ use nexus_db_schema::schema::{ use nexus_sled_agent_shared::inventory::BootImageHeader; use nexus_sled_agent_shared::inventory::BootPartitionDetails; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredContents; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::MupdateOverrideBootInventory; use nexus_sled_agent_shared::inventory::MupdateOverrideInventory; use nexus_sled_agent_shared::inventory::MupdateOverrideNonBootInventory; @@ -1944,6 +1946,9 @@ pub struct InvOmicronSledConfig { pub id: DbTypedUuid, pub generation: Generation, pub remove_mupdate_override: Option>, + + #[diesel(embed)] + pub host_phase_2: DbHostPhase2DesiredSlots, } impl InvOmicronSledConfig { @@ -1952,12 +1957,51 @@ impl InvOmicronSledConfig { id: OmicronSledConfigUuid, generation: external::Generation, remove_mupdate_override: Option, + host_phase_2: HostPhase2DesiredSlots, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), id: id.into(), generation: Generation(generation), remove_mupdate_override: remove_mupdate_override.map(From::from), + host_phase_2: host_phase_2.into(), + } + } +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_omicron_sled_config)] +pub struct DbHostPhase2DesiredSlots { + pub host_phase_2_desired_slot_a: Option, + pub host_phase_2_desired_slot_b: Option, +} + +impl From for DbHostPhase2DesiredSlots { + fn from(value: HostPhase2DesiredSlots) -> Self { + let remap = |desired| match desired { + HostPhase2DesiredContents::CurrentContents => None, + HostPhase2DesiredContents::Artifact { hash } => { + Some(ArtifactHash(hash)) + } + }; + Self { + host_phase_2_desired_slot_a: remap(value.slot_a), + host_phase_2_desired_slot_b: remap(value.slot_b), + } + } +} + +impl From for HostPhase2DesiredSlots { + fn from(value: DbHostPhase2DesiredSlots) -> Self { + let remap = |maybe_artifact| match maybe_artifact { + None => HostPhase2DesiredContents::CurrentContents, + Some(ArtifactHash(hash)) => { + HostPhase2DesiredContents::Artifact { hash } + } + }; + Self { + slot_a: remap(value.host_phase_2_desired_slot_a), + slot_b: remap(value.host_phase_2_desired_slot_b), } } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index d7dd6d19f36..3ac8c03c73e 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(158, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(159, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(159, "sled-config-desired-host-phase-2"), KnownVersion::new(158, "drop-builtin-roles"), KnownVersion::new(157, "user-data-export"), KnownVersion::new(156, "boot-partitions-inventory"), diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 925d6c1138a..8797fd0e520 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2774,6 +2774,7 @@ impl DataStore { disks: IdMap::default(), datasets: IdMap::default(), zones: IdMap::default(), + host_phase_2: sled_config.host_phase_2.into(), }, }); } @@ -3871,6 +3872,7 @@ impl ConfigReconcilerRows { sled_config_id, config.generation, config.remove_mupdate_override, + config.host_phase_2.clone(), )); self.disks.extend(config.disks.iter().map(|disk| { InvOmicronSledConfigDisk::new( diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 02e9927c655..d258a89d91c 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1780,6 +1780,8 @@ table! { generation -> Int8, remove_mupdate_override -> Nullable, + host_phase_2_desired_slot_a -> Nullable, + host_phase_2_desired_slot_b -> Nullable, } } diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 420c4b8e54d..9aac7d25833 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -32,6 +32,7 @@ sled-agent-types.workspace = true sled-agent-zone-images-examples.workspace = true slog.workspace = true strum.workspace = true +swrite.workspace = true thiserror.workspace = true tufaceous-artifact.workspace = true typed-rng.workspace = true diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 36aff0f2c8a..86ec9322682 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -410,6 +410,7 @@ mod test { use gateway_messages::SpPort; use id_map::IdMap; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; + use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; @@ -422,10 +423,12 @@ mod test { use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use slog::o; - use std::fmt::Write; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::sync::Arc; + use swrite::SWrite as _; + use swrite::swrite; + use swrite::swriteln; fn dump_sled_config(s: &mut String, config: &OmicronSledConfig) { let OmicronSledConfig { @@ -434,42 +437,44 @@ mod test { datasets, zones, remove_mupdate_override, + host_phase_2, } = config; - writeln!(s, " generation: {generation}").unwrap(); - writeln!( + swriteln!(s, " generation: {generation}"); + swriteln!( s, " remove_mupdate_override: {remove_mupdate_override:?}" - ) - .unwrap(); + ); + { + let HostPhase2DesiredSlots { slot_a, slot_b } = host_phase_2; + swriteln!(s, " host_phase_2.slot_a: {slot_a:?}"); + swriteln!(s, " host_phase_2.slot_b: {slot_b:?}"); + } for disk in disks { - writeln!( + swriteln!( s, " disk {}: {} / {} / {}", disk.id, disk.identity.vendor, disk.identity.model, disk.identity.serial - ) - .unwrap(); + ); } for dataset in datasets { - writeln!( + swriteln!( s, " dataset {}: {}", dataset.id, dataset.name.full_name() - ) - .unwrap(); + ); } for zone in zones { - writeln!( + swriteln!( s, " zone {} type {}", zone.id, zone.zone_type.kind().report_str(), - ) - .unwrap(); + ); } } @@ -482,143 +487,138 @@ mod test { // depends on what the serialization is for. It's easy enough to just // print what we want here. let mut s = String::new(); - write!(&mut s, "baseboards:\n").unwrap(); + swrite!(s, "baseboards:\n"); for b in &collection.baseboards { - write!( - &mut s, + swrite!( + s, " part {:?} serial {:?}\n", - b.part_number, b.serial_number - ) - .unwrap(); + b.part_number, + b.serial_number + ); } - write!(&mut s, "\ncabooses:\n").unwrap(); + swrite!(s, "\ncabooses:\n"); for c in &collection.cabooses { - write!( - &mut s, + swrite!( + s, " board {:?} name {:?} version {:?} git_commit {:?} sign {:?}\n", - c.board, c.name, c.version, c.git_commit, c.sign, - ) - .unwrap(); + c.board, + c.name, + c.version, + c.git_commit, + c.sign, + ); } - write!(&mut s, "\nrot pages:\n").unwrap(); + swrite!(s, "\nrot pages:\n"); for p in &collection.rot_pages { - write!(&mut s, " data_base64 {:?}\n", p.data_base64).unwrap(); + swrite!(s, " data_base64 {:?}\n", p.data_base64); } // All we really need to check here is that we're reporting the right // SPs, RoTs, and cabooses. The actual SP data, RoT data, and caboose // data comes straight from MGS. And proper handling of that data is // tested in the builder. - write!(&mut s, "\nSPs:\n").unwrap(); + swrite!(s, "\nSPs:\n"); for (bb, _) in &collection.sps { - write!( - &mut s, + swrite!( + s, " baseboard part {:?} serial {:?}\n", - bb.part_number, bb.serial_number, - ) - .unwrap(); + bb.part_number, + bb.serial_number, + ); } - write!(&mut s, "\nRoTs:\n").unwrap(); + swrite!(s, "\nRoTs:\n"); for (bb, _) in &collection.rots { - write!( - &mut s, + swrite!( + s, " baseboard part {:?} serial {:?}\n", - bb.part_number, bb.serial_number, - ) - .unwrap(); + bb.part_number, + bb.serial_number, + ); } - write!(&mut s, "\ncabooses found:\n").unwrap(); + swrite!(s, "\ncabooses found:\n"); for (kind, bb_to_found) in &collection.cabooses_found { for (bb, found) in bb_to_found { - write!( - &mut s, + swrite!( + s, " {:?} baseboard part {:?} serial {:?}: board {:?}\n", - kind, bb.part_number, bb.serial_number, found.caboose.board, - ) - .unwrap(); + kind, + bb.part_number, + bb.serial_number, + found.caboose.board, + ); } } - write!(&mut s, "\nrot pages found:\n").unwrap(); + swrite!(s, "\nrot pages found:\n"); for (kind, bb_to_found) in &collection.rot_pages_found { for (bb, found) in bb_to_found { - write!( - &mut s, + swrite!( + s, " {:?} baseboard part {:?} serial {:?}: \ data_base64 {:?}\n", kind, bb.part_number, bb.serial_number, found.page.data_base64 - ) - .unwrap(); + ); } } - write!(&mut s, "\nsled agents found:\n").unwrap(); + swrite!(s, "\nsled agents found:\n"); for sled_info in &collection.sled_agents { - write!( - &mut s, + swrite!( + s, " sled {} ({:?})\n", - sled_info.sled_id, sled_info.sled_role - ) - .unwrap(); - write!(&mut s, " baseboard {:?}\n", sled_info.baseboard_id) - .unwrap(); + sled_info.sled_id, + sled_info.sled_role + ); + swrite!(s, " baseboard {:?}\n", sled_info.baseboard_id); if let Some(config) = &sled_info.ledgered_sled_config { - writeln!(&mut s, " ledgered sled config:").unwrap(); + swriteln!(s, " ledgered sled config:"); dump_sled_config(&mut s, config); } else { - writeln!(&mut s, " no ledgered sled config").unwrap(); + swriteln!(s, " no ledgered sled config"); } if let Some(last_reconciliation) = &sled_info.last_reconciliation { - writeln!(&mut s, " last reconciled config:").unwrap(); + swriteln!(s, " last reconciled config:"); dump_sled_config( &mut s, &last_reconciliation.last_reconciled_config, ); for (id, result) in &last_reconciliation.external_disks { - writeln!(&mut s, " result for disk {id}: {result:?}") - .unwrap(); + swriteln!(s, " result for disk {id}: {result:?}"); } for (id, result) in &last_reconciliation.datasets { - writeln!(&mut s, " result for dataset {id}: {result:?}") - .unwrap(); + swriteln!(s, " result for dataset {id}: {result:?}"); } for (id, result) in &last_reconciliation.zones { - writeln!(&mut s, " result for zone {id}: {result:?}") - .unwrap(); + swriteln!(s, " result for zone {id}: {result:?}"); } } else { - writeln!(&mut s, " no completed reconciliation").unwrap(); + swriteln!(s, " no completed reconciliation"); } match &sled_info.reconciler_status { ConfigReconcilerInventoryStatus::NotYetRun => { - writeln!(&mut s, " reconciler task not yet run") - .unwrap(); + swriteln!(s, " reconciler task not yet run"); } ConfigReconcilerInventoryStatus::Running { config, .. } => { - writeln!( - &mut s, - " reconciler task running with config:" - ) - .unwrap(); + swriteln!(s, " reconciler task running with config:"); dump_sled_config(&mut s, config); } ConfigReconcilerInventoryStatus::Idle { .. } => { - writeln!(&mut s, " reconciler task idle").unwrap(); + swriteln!(s, " reconciler task idle"); } } } - write!(&mut s, "\nerrors:\n").unwrap(); + swrite!(s, "\nerrors:\n"); let os_error_re = regex::Regex::new(r"os error \d+").unwrap(); let comm_error_re = regex::Regex::new(r"Communication Error.*").unwrap(); @@ -635,7 +635,7 @@ mod test { // general sense. let message = comm_error_re .replace_all(&message, "Communication Error <>"); - write!(&mut s, "error: {}\n", message).unwrap(); + swrite!(s, "error: {}\n", message); } s @@ -687,6 +687,7 @@ mod test { .into_iter() .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }) .await .expect("failed to write initial zone version to fake sled agent"); diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index cbf035ea1ed..453380f8ecb 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -21,6 +21,7 @@ use nexus_sled_agent_shared::inventory::BootImageHeader; use nexus_sled_agent_shared::inventory::BootPartitionDetails; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; use nexus_sled_agent_shared::inventory::InventoryDisk; @@ -345,6 +346,7 @@ pub fn representative() -> Representative { datasets: Default::default(), zones: sled14.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; let sled16 = OmicronSledConfig { generation: sled16.generation, @@ -352,6 +354,7 @@ pub fn representative() -> Representative { datasets: Default::default(), zones: sled16.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; let sled17 = OmicronSledConfig { generation: sled17.generation, @@ -359,6 +362,7 @@ pub fn representative() -> Representative { datasets: Default::default(), zones: sled17.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; // Create iterator producing fixed IDs. diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index 3eed7f61adb..893e28e674d 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -92,6 +92,8 @@ sled agents found: ledgered sled config: generation: 3 remove_mupdate_override: None + host_phase_2.slot_a: CurrentContents + host_phase_2.slot_b: CurrentContents zone 8b88a56f-3eb6-4d80-ba42-75d867bc427d type oximeter no completed reconciliation reconciler task not yet run @@ -100,6 +102,8 @@ sled agents found: ledgered sled config: generation: 3 remove_mupdate_override: None + host_phase_2.slot_a: CurrentContents + host_phase_2.slot_b: CurrentContents zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter no completed reconciliation reconciler task not yet run diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt index e4f3efc8739..eabcf16bc51 100644 --- a/nexus/inventory/tests/output/collector_sled_agent_errors.txt +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -91,6 +91,8 @@ sled agents found: ledgered sled config: generation: 3 remove_mupdate_override: None + host_phase_2.slot_a: CurrentContents + host_phase_2.slot_b: CurrentContents zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter no completed reconciliation reconciler task not yet run diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index f512c40897a..24b53f8ab4f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -34,6 +34,7 @@ use nexus_config::MgdConfig; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_config::NexusConfig; use nexus_db_queries::db::pub_test_utils::crdb; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_sled_agent_shared::recovery_silo::RecoverySiloConfig; @@ -1147,6 +1148,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .map(From::from) .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }) .await .expect("Failed to configure sled agent with our zones"); @@ -1186,6 +1188,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { datasets: IdMap::default(), zones: IdMap::default(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }) .await .expect("Failed to configure sled agent with our zones"); diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index eca14e08e3a..72e35004e0a 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -20,6 +20,7 @@ pub use crate::inventory::ZpoolName; use blueprint_diff::ClickhouseClusterConfigDiffTablesForSingleBlueprint; use blueprint_display::BpDatasetsTableSchema; use daft::Diffable; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; @@ -743,6 +744,9 @@ impl BlueprintSledConfig { }) .collect(), remove_mupdate_override: self.remove_mupdate_override, + // TODO BlueprintSledConfig should have a corresponding field. + // https://github.com/oxidecomputer/omicron/issues/8542 + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 6f3ea02b617..78a9fd34c23 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -4722,6 +4722,62 @@ } ] }, + "HostPhase2DesiredContents": { + "description": "Describes the desired contents of a host phase 2 slot (i.e., the boot partition on one of the internal M.2 drives).", + "oneOf": [ + { + "description": "Do not change the current contents.\n\nWe use this value when we've detected a sled has been mupdated (and we don't want to overwrite phase 2 images until we understand how to recover from that mupdate) and as the default value when reading an [`OmicronSledConfig`] that was ledgered before this concept existed.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "current_contents" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Set the phase 2 slot to the given artifact.\n\nThe artifact will come from an unpacked and distributed TUF repo.", + "type": "object", + "properties": { + "hash": { + "type": "string", + "format": "hex string (32 bytes)" + }, + "type": { + "type": "string", + "enum": [ + "artifact" + ] + } + }, + "required": [ + "hash", + "type" + ] + } + ] + }, + "HostPhase2DesiredSlots": { + "description": "Describes the desired contents for both host phase 2 slots.", + "type": "object", + "properties": { + "slot_a": { + "$ref": "#/components/schemas/HostPhase2DesiredContents" + }, + "slot_b": { + "$ref": "#/components/schemas/HostPhase2DesiredContents" + } + }, + "required": [ + "slot_a", + "slot_b" + ] + }, "HostPortConfig": { "type": "object", "properties": { @@ -5874,6 +5930,21 @@ "generation": { "$ref": "#/components/schemas/Generation" }, + "host_phase_2": { + "default": { + "slot_a": { + "type": "current_contents" + }, + "slot_b": { + "type": "current_contents" + } + }, + "allOf": [ + { + "$ref": "#/components/schemas/HostPhase2DesiredSlots" + } + ] + }, "remove_mupdate_override": { "nullable": true, "allOf": [ diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 5bb6914e0ee..a0604839de1 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3872,6 +3872,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( -- remove mupdate override ID, if set remove_mupdate_override UUID, + -- desired artifact hash for internal disk slots' boot partitions + -- NULL is translated to `HostPhase2DesiredContents::CurrentContents` + host_phase_2_desired_slot_a STRING(64), + host_phase_2_desired_slot_b STRING(64), + PRIMARY KEY (inv_collection_id, id) ); @@ -6170,7 +6175,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '158.0.0', NULL) + (TRUE, NOW(), NOW(), '159.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/sled-config-desired-host-phase-2/up1.sql b/schema/crdb/sled-config-desired-host-phase-2/up1.sql new file mode 100644 index 00000000000..176cebe1ebc --- /dev/null +++ b/schema/crdb/sled-config-desired-host-phase-2/up1.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_omicron_sled_config +ADD COLUMN IF NOT EXISTS host_phase_2_desired_slot_a STRING(64); diff --git a/schema/crdb/sled-config-desired-host-phase-2/up2.sql b/schema/crdb/sled-config-desired-host-phase-2/up2.sql new file mode 100644 index 00000000000..02d9f98f696 --- /dev/null +++ b/schema/crdb/sled-config-desired-host-phase-2/up2.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_omicron_sled_config +ADD COLUMN IF NOT EXISTS host_phase_2_desired_slot_b STRING(64); diff --git a/sled-agent/config-reconciler/src/ledger.rs b/sled-agent/config-reconciler/src/ledger.rs index e2710359f8f..39410ab949f 100644 --- a/sled-agent/config-reconciler/src/ledger.rs +++ b/sled-agent/config-reconciler/src/ledger.rs @@ -648,6 +648,7 @@ mod tests { use camino_tempfile::Utf8TempDir; use id_map::IdMap; use illumos_utils::zpool::ZpoolName; + use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use nexus_sled_agent_shared::inventory::OmicronZoneType; @@ -861,6 +862,7 @@ mod tests { datasets: IdMap::default(), zones: IdMap::default(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } @@ -1062,6 +1064,7 @@ mod tests { .into_iter() .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; // The ledger task should reject this config due to a missing artifact. @@ -1088,8 +1091,11 @@ mod tests { .into_iter() .collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; + // TODO-john also test host phase 2 artifacts! + test_harness .task_handle .set_new_config(config) @@ -1152,6 +1158,8 @@ mod tests { .expect("no ledger task error") .expect("config is valid"); + // TODO-john also test host phase 2 artifacts! + logctx.cleanup_successful(); } diff --git a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs index 587303397ad..04a535bec8c 100644 --- a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs +++ b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs @@ -6,6 +6,7 @@ //! datasets, and zones) into the current unified [`OmicronSledConfig`]. use camino::Utf8PathBuf; +use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use omicron_common::api::external::Generation; @@ -213,6 +214,8 @@ fn merge_old_configs( zones: zones.zones.into_iter().map(|z| z.zone).collect(), // Old configs are pre-mupdate overrides. remove_mupdate_override: None, + // Old configs are pre-host-phase-2 knowledge. + host_phase_2: HostPhase2DesiredSlots::current_contents(), } } diff --git a/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json b/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json index 3c238f0190b..c985a09af81 100644 --- a/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json +++ b/sled-agent/config-reconciler/test-data/expectorate/merged-sled-config.json @@ -810,5 +810,13 @@ } } }, - "remove_mupdate_override": null + "remove_mupdate_override": null, + "host_phase_2": { + "slot_a": { + "type": "current_contents" + }, + "slot_b": { + "type": "current_contents" + } + } } \ No newline at end of file diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 55d5eb15e76..b70875802ce 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -89,8 +89,8 @@ use nexus_client::{ Client as NexusClient, Error as NexusError, types as NexusTypes, }; use nexus_sled_agent_shared::inventory::{ - ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, - OmicronZoneType, OmicronZonesConfig, + ConfigReconcilerInventoryResult, HostPhase2DesiredSlots, OmicronSledConfig, + OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::deployment::{ Blueprint, BlueprintDatasetConfig, BlueprintDatasetDisposition, @@ -592,6 +592,7 @@ impl ServiceInner { datasets: config.datasets.values().cloned().collect(), zones: zones_config.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; self.set_config_on_sled(*sled_address, sled_config).await?; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 3ded1197d25..fdfce7eb2da 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -24,9 +24,9 @@ use dropshot::Body; use dropshot::HttpError; use futures::Stream; use nexus_sled_agent_shared::inventory::{ - ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, - InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronZonesConfig, - SledRole, ZoneImageResolverInventory, + ConfigReconcilerInventoryStatus, HostPhase2DesiredSlots, Inventory, + InventoryDataset, InventoryDisk, InventoryZpool, OmicronSledConfig, + OmicronZonesConfig, SledRole, ZoneImageResolverInventory, }; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -741,6 +741,7 @@ impl SledAgent { datasets: datasets_config.datasets.into_values().collect(), zones: zones_config.zones.into_iter().collect(), remove_mupdate_override: None, + host_phase_2: HostPhase2DesiredSlots::current_contents(), }; Ok(Inventory {