Skip to content

Commit 28bb4b0

Browse files
authored
[sled-agent] Report boot partition contents of M.2 drives in inventory (PR 2/2) (#8451)
Add a `BootPartitionContents` structure to the inventory reported by the sled-agent-config-reconciler which contains: * which slot we booted from (A or B) * the contents of each slot each of which is a `Result` in case we failed to determine any of those three items. In practice, I'd expect us to basically never fail to report which slot we booted from, and to only fail on the contents of a slot if that slot doesn't have a valid phase 2 image in it.
1 parent 765dbb0 commit 28bb4b0

File tree

18 files changed

+1248
-143
lines changed

18 files changed

+1248
-143
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev-tools/omdb/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ supports-color.workspace = true
7171
tabled.workspace = true
7272
textwrap.workspace = true
7373
tokio = { workspace = true, features = [ "full" ] }
74+
tufaceous-artifact.workspace = true
7475
unicode-width.workspace = true
7576
update-engine.workspace = true
7677
url.workspace = true

dev-tools/omdb/src/bin/omdb/db.rs

Lines changed: 75 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ use nexus_db_queries::db::pagination::Paginator;
125125
use nexus_db_queries::db::pagination::paginated;
126126
use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL;
127127
use nexus_db_queries::db::queries::region_allocation;
128+
use nexus_sled_agent_shared::inventory::BootImageHeader;
129+
use nexus_sled_agent_shared::inventory::BootPartitionContents;
130+
use nexus_sled_agent_shared::inventory::BootPartitionDetails;
131+
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory;
128132
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult;
129133
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus;
130134
use nexus_sled_agent_shared::inventory::OmicronSledConfig;
@@ -178,6 +182,7 @@ use std::sync::Arc;
178182
use std::sync::LazyLock;
179183
use strum::IntoEnumIterator;
180184
use tabled::Tabled;
185+
use tufaceous_artifact::ArtifactHash;
181186
use uuid::Uuid;
182187

183188
mod alert;
@@ -7361,34 +7366,41 @@ fn inv_collection_print_sleds(collection: &Collection) {
73617366
}
73627367

73637368
if let Some(last_reconciliation) = &sled.last_reconciliation {
7364-
if Some(&last_reconciliation.last_reconciled_config)
7369+
let ConfigReconcilerInventory {
7370+
last_reconciled_config,
7371+
external_disks,
7372+
datasets,
7373+
orphaned_datasets,
7374+
zones,
7375+
boot_partitions,
7376+
} = last_reconciliation;
7377+
7378+
inv_print_boot_partition_contents(" ", boot_partitions);
7379+
7380+
if Some(last_reconciled_config)
73657381
== sled.ledgered_sled_config.as_ref()
73667382
{
73677383
println!(" last reconciled config: matches ledgered config");
73687384
} else {
73697385
inv_collection_print_sled_config(
73707386
"LAST RECONCILED CONFIG",
7371-
&last_reconciliation.last_reconciled_config,
7387+
&last_reconciled_config,
73727388
);
73737389
}
7374-
if last_reconciliation.orphaned_datasets.is_empty() {
7390+
if orphaned_datasets.is_empty() {
73757391
println!(" no orphaned datasets");
73767392
} else {
73777393
println!(
73787394
" {} orphaned dataset(s):",
7379-
last_reconciliation.orphaned_datasets.len()
7395+
orphaned_datasets.len()
73807396
);
7381-
for orphan in &last_reconciliation.orphaned_datasets {
7397+
for orphan in orphaned_datasets {
73827398
print_one_orphaned_dataset(" ", orphan);
73837399
}
73847400
}
7385-
let disk_errs = collect_config_reconciler_errors(
7386-
&last_reconciliation.external_disks,
7387-
);
7388-
let dataset_errs =
7389-
collect_config_reconciler_errors(&last_reconciliation.datasets);
7390-
let zone_errs =
7391-
collect_config_reconciler_errors(&last_reconciliation.zones);
7401+
let disk_errs = collect_config_reconciler_errors(&external_disks);
7402+
let dataset_errs = collect_config_reconciler_errors(&datasets);
7403+
let zone_errs = collect_config_reconciler_errors(&zones);
73927404
for (label, errs) in [
73937405
("disk", disk_errs),
73947406
("dataset", dataset_errs),
@@ -7438,6 +7450,57 @@ fn inv_collection_print_sleds(collection: &Collection) {
74387450
}
74397451
}
74407452

7453+
fn inv_print_boot_partition_contents(
7454+
indent: &str,
7455+
boot_partitions: &BootPartitionContents,
7456+
) {
7457+
let BootPartitionContents { boot_disk, slot_a, slot_b } = &boot_partitions;
7458+
print!("{indent}boot disk slot: ");
7459+
match boot_disk {
7460+
Ok(slot) => println!("{slot:?}"),
7461+
Err(err) => println!("FAILED TO DETERMINE: {err}"),
7462+
}
7463+
match slot_a {
7464+
Ok(details) => {
7465+
println!("{indent}slot A details:");
7466+
inv_print_boot_partition_details(&format!("{indent} "), details);
7467+
}
7468+
Err(err) => {
7469+
println!("{indent}slot A details UNAVAILABLE: {err}");
7470+
}
7471+
}
7472+
match slot_b {
7473+
Ok(details) => {
7474+
println!("{indent}slot B details:");
7475+
inv_print_boot_partition_details(&format!("{indent} "), details);
7476+
}
7477+
Err(err) => {
7478+
println!("{indent}slot B details UNAVAILABLE: {err}");
7479+
}
7480+
}
7481+
}
7482+
7483+
fn inv_print_boot_partition_details(
7484+
indent: &str,
7485+
details: &BootPartitionDetails,
7486+
) {
7487+
let BootPartitionDetails { header, artifact_hash, artifact_size } = details;
7488+
7489+
// Not sure it's useful to print all the header details? We'll omit for now.
7490+
let BootImageHeader {
7491+
flags: _,
7492+
data_size: _,
7493+
image_size: _,
7494+
target_size: _,
7495+
sha256,
7496+
image_name,
7497+
} = header;
7498+
7499+
println!("{indent}artifact: {artifact_hash} ({artifact_size} bytes)");
7500+
println!("{indent}image name: {image_name}");
7501+
println!("{indent}phase 2 hash: {}", ArtifactHash(*sha256));
7502+
}
7503+
74417504
fn inv_collection_print_orphaned_datasets(collection: &Collection) {
74427505
// Helper for `unwrap_or()` passing borrow check below
74437506
static EMPTY_SET: LazyLock<IdOrdMap<OrphanedDataset>> =

nexus-sled-agent-shared/src/inventory.rs

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,17 @@ use id_map::IdMappable;
1616
use iddqd::IdOrdItem;
1717
use iddqd::IdOrdMap;
1818
use iddqd::id_upcast;
19-
use omicron_common::disk::{DatasetKind, DatasetName};
19+
use omicron_common::disk::{DatasetKind, DatasetName, M2Slot};
2020
use omicron_common::ledger::Ledgerable;
21+
use omicron_common::snake_case_result;
22+
use omicron_common::snake_case_result::SnakeCaseResult;
2123
use omicron_common::update::OmicronZoneManifestSource;
2224
use omicron_common::{
2325
api::{
2426
external::{ByteCount, Generation},
2527
internal::shared::{NetworkInterface, SourceNatConfig},
2628
},
2729
disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig},
28-
snake_case_result::{self, SnakeCaseResult},
2930
update::ArtifactId,
3031
zpool_name::ZpoolName,
3132
};
@@ -139,6 +140,7 @@ pub struct ConfigReconcilerInventory {
139140
pub datasets: BTreeMap<DatasetUuid, ConfigReconcilerInventoryResult>,
140141
pub orphaned_datasets: IdOrdMap<OrphanedDataset>,
141142
pub zones: BTreeMap<OmicronZoneUuid, ConfigReconcilerInventoryResult>,
143+
pub boot_partitions: BootPartitionContents,
142144
}
143145

144146
impl ConfigReconcilerInventory {
@@ -202,10 +204,57 @@ impl ConfigReconcilerInventory {
202204
datasets,
203205
orphaned_datasets: IdOrdMap::new(),
204206
zones,
207+
boot_partitions: {
208+
// None of our callers care about this; if that changes, we
209+
// could pass in boot partition contents.
210+
let err = "constructed via debug_assume_success()".to_string();
211+
BootPartitionContents {
212+
boot_disk: Err(err.clone()),
213+
slot_a: Err(err.clone()),
214+
slot_b: Err(err),
215+
}
216+
},
205217
}
206218
}
207219
}
208220

221+
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)]
222+
pub struct BootPartitionContents {
223+
#[serde(with = "snake_case_result")]
224+
#[schemars(schema_with = "SnakeCaseResult::<M2Slot, String>::json_schema")]
225+
pub boot_disk: Result<M2Slot, String>,
226+
#[serde(with = "snake_case_result")]
227+
#[schemars(
228+
schema_with = "SnakeCaseResult::<BootPartitionDetails, String>::json_schema"
229+
)]
230+
pub slot_a: Result<BootPartitionDetails, String>,
231+
#[serde(with = "snake_case_result")]
232+
#[schemars(
233+
schema_with = "SnakeCaseResult::<BootPartitionDetails, String>::json_schema"
234+
)]
235+
pub slot_b: Result<BootPartitionDetails, String>,
236+
}
237+
238+
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)]
239+
pub struct BootPartitionDetails {
240+
pub header: BootImageHeader,
241+
pub artifact_hash: ArtifactHash,
242+
pub artifact_size: usize,
243+
}
244+
245+
// There are several other fields in the header that we either parse and discard
246+
// or ignore completely; see https://github.com/oxidecomputer/boot-image-tools
247+
// for more thorough support.
248+
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)]
249+
pub struct BootImageHeader {
250+
pub flags: u64,
251+
pub data_size: u64,
252+
pub image_size: u64,
253+
pub target_size: u64,
254+
pub sha256: [u8; 32],
255+
pub image_name: String,
256+
}
257+
209258
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)]
210259
pub struct OrphanedDataset {
211260
pub name: DatasetName,

0 commit comments

Comments
 (0)