Skip to content

Commit b6766b6

Browse files
authored
sled-agent: validate host phase 2 artifacts when ledgering configs (PR 2/4) (#8539)
When ledgering a new `OmicronSledConfig`, prior to this PR we already check that we have all the zone artifacts in our local TUF repo depot. This PR extends that to confirm we also have any OS phase 2 artifacts specified in the incoming config.
1 parent 77d097a commit b6766b6

File tree

2 files changed

+126
-50
lines changed

2 files changed

+126
-50
lines changed

nexus-sled-agent-shared/src/inventory.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,16 @@ pub enum HostPhase2DesiredContents {
597597
Artifact { hash: ArtifactHash },
598598
}
599599

600+
impl HostPhase2DesiredContents {
601+
/// The artifact hash described by `self`, if it has one.
602+
pub fn artifact_hash(&self) -> Option<ArtifactHash> {
603+
match self {
604+
Self::CurrentContents => None,
605+
Self::Artifact { hash } => Some(*hash),
606+
}
607+
}
608+
}
609+
600610
/// Describes the desired contents for both host phase 2 slots.
601611
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)]
602612
#[serde(rename_all = "snake_case")]

sled-agent/config-reconciler/src/ledger.rs

Lines changed: 116 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use slog::error;
1717
use slog::info;
1818
use slog::warn;
1919
use slog_error_chain::InlineErrorChain;
20-
use std::collections::BTreeMap;
20+
use std::collections::BTreeSet;
2121
use std::convert::Infallible;
2222
use std::ops::Deref as _;
2323
use tokio::sync::mpsc;
@@ -98,7 +98,7 @@ pub enum LedgerArtifactConfigError {
9898
"Artifacts in use by ledgered sled config are not present \
9999
in new artifact config: {0:?}"
100100
)]
101-
InUseArtifactedMissing(BTreeMap<ArtifactHash, &'static str>),
101+
InUseArtifactedMissing(BTreeSet<ArtifactHash>),
102102
}
103103

104104
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -442,10 +442,7 @@ impl<T: SledAgentArtifactStore> LedgerTask<T> {
442442
// thing we confirm is that any referenced artifacts are present in the
443443
// artifact store.
444444
let mut artifact_validation_errors = Vec::new();
445-
for zone in &new_config.zones {
446-
let Some(artifact_hash) = zone.image_source.artifact_hash() else {
447-
continue;
448-
};
445+
for artifact_hash in config_artifact_hashes(new_config) {
449446
match self
450447
.artifact_store
451448
.validate_artifact_exists_in_storage(artifact_hash)
@@ -491,12 +488,10 @@ impl<T: SledAgentArtifactStore> LedgerTask<T> {
491488
CurrentSledConfig::Ledgered(sled_config) => sled_config,
492489
};
493490

494-
let mut missing = BTreeMap::new();
495-
for zone in &sled_config.zones {
496-
if let Some(hash) = zone.image_source.artifact_hash() {
497-
if !new_config.artifacts.contains(&hash) {
498-
missing.insert(hash, "current zone configuration");
499-
}
491+
let mut missing = BTreeSet::new();
492+
for artifact_hash in config_artifact_hashes(sled_config) {
493+
if !new_config.artifacts.contains(&artifact_hash) {
494+
missing.insert(artifact_hash);
500495
}
501496
}
502497

@@ -589,6 +584,18 @@ impl<T: SledAgentArtifactStore> LedgerTask<T> {
589584
}
590585
}
591586

587+
// Helper to return all the `ArtifactHash`es contained in a config.
588+
fn config_artifact_hashes(
589+
config: &OmicronSledConfig,
590+
) -> impl Iterator<Item = ArtifactHash> + '_ {
591+
config
592+
.zones
593+
.iter()
594+
.filter_map(|zone| zone.image_source.artifact_hash())
595+
.chain(config.host_phase_2.slot_a.artifact_hash())
596+
.chain(config.host_phase_2.slot_b.artifact_hash())
597+
}
598+
592599
async fn load_sled_config(
593600
config_datasets: &[Utf8PathBuf],
594601
log: &Logger,
@@ -648,6 +655,7 @@ mod tests {
648655
use camino_tempfile::Utf8TempDir;
649656
use id_map::IdMap;
650657
use illumos_utils::zpool::ZpoolName;
658+
use nexus_sled_agent_shared::inventory::HostPhase2DesiredContents;
651659
use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots;
652660
use nexus_sled_agent_shared::inventory::OmicronZoneConfig;
653661
use nexus_sled_agent_shared::inventory::OmicronZoneImageSource;
@@ -1054,7 +1062,7 @@ mod tests {
10541062

10551063
// Create a config that references a zone with a different artifact
10561064
// hash.
1057-
let config = OmicronSledConfig {
1065+
let mut config = OmicronSledConfig {
10581066
generation: Generation::new().next(),
10591067
disks: IdMap::new(),
10601068
datasets: IdMap::new(),
@@ -1070,7 +1078,7 @@ mod tests {
10701078
// The ledger task should reject this config due to a missing artifact.
10711079
let err = test_harness
10721080
.task_handle
1073-
.set_new_config(config)
1081+
.set_new_config(config.clone())
10741082
.await
10751083
.expect("can communicate with task")
10761084
.expect_err("config should fail");
@@ -1081,20 +1089,48 @@ mod tests {
10811089

10821090
// Change the config to reference the artifact that does exist; this one
10831091
// should be accepted.
1084-
let config = OmicronSledConfig {
1085-
generation: Generation::new().next(),
1086-
disks: IdMap::new(),
1087-
datasets: IdMap::new(),
1088-
zones: [make_dummy_zone_config_using_artifact_hash(
1089-
existing_artifact_hash,
1090-
)]
1091-
.into_iter()
1092-
.collect(),
1093-
remove_mupdate_override: None,
1094-
host_phase_2: HostPhase2DesiredSlots::current_contents(),
1092+
config.zones = [make_dummy_zone_config_using_artifact_hash(
1093+
existing_artifact_hash,
1094+
)]
1095+
.into_iter()
1096+
.collect();
1097+
1098+
test_harness
1099+
.task_handle
1100+
.set_new_config(config.clone())
1101+
.await
1102+
.expect("can communicate with task")
1103+
.expect("config should be ledgered");
1104+
1105+
// Try a config that references a host phase 2 artifact that isn't in
1106+
// the store; this should be rejected.
1107+
config.generation = config.generation.next();
1108+
config.zones = IdMap::new();
1109+
config.host_phase_2 = HostPhase2DesiredSlots {
1110+
slot_a: HostPhase2DesiredContents::CurrentContents,
1111+
slot_b: HostPhase2DesiredContents::Artifact {
1112+
hash: nonexisting_artifact_hash,
1113+
},
10951114
};
1115+
let err = test_harness
1116+
.task_handle
1117+
.set_new_config(config.clone())
1118+
.await
1119+
.expect("can communicate with task")
1120+
.expect_err("config should fail");
1121+
match err {
1122+
LedgerNewConfigError::ArtifactStoreValidationFailed(_) => (),
1123+
_ => panic!("unexpected error {}", InlineErrorChain::new(&err)),
1124+
}
10961125

1097-
// TODO-john also test host phase 2 artifacts!
1126+
// Change the config to reference the artifact that does exist; this one
1127+
// should be accepted.
1128+
config.host_phase_2 = HostPhase2DesiredSlots {
1129+
slot_a: HostPhase2DesiredContents::CurrentContents,
1130+
slot_b: HostPhase2DesiredContents::Artifact {
1131+
hash: existing_artifact_hash,
1132+
},
1133+
};
10981134

10991135
test_harness
11001136
.task_handle
@@ -1112,54 +1148,84 @@ mod tests {
11121148
"reject_artifact_configs_removing_referenced_artifacts",
11131149
);
11141150

1115-
// Claim we have a zone using the all-zeroes hash.
1116-
let used_artifact_hash = ArtifactHash([0; 32]);
1117-
let unused_artifact_hash = ArtifactHash([1; 32]);
1151+
// Construct some artifacts hashes we'll claim to be using and one we
1152+
// won't.
1153+
let used_zone_artifact_hash = ArtifactHash([0; 32]);
1154+
let used_host_artifact_hash = ArtifactHash([1; 32]);
1155+
let unused_artifact_hash = ArtifactHash([2; 32]);
11181156

1157+
// Claim we have a host phase 2
11191158
// Set up the ledger task with an initial config.
11201159
let mut sled_config = make_nonempty_sled_config();
11211160
sled_config.zones.insert(make_dummy_zone_config_using_artifact_hash(
1122-
used_artifact_hash,
1161+
used_zone_artifact_hash,
11231162
));
1163+
sled_config.host_phase_2.slot_a = HostPhase2DesiredContents::Artifact {
1164+
hash: used_host_artifact_hash,
1165+
};
1166+
11241167
let test_harness =
11251168
TestHarness::with_initial_config(logctx.log.clone(), &sled_config)
11261169
.await;
11271170

1128-
// Construct an `ArtifactConfig` that doesn't contain
1129-
// `used_artifact_hash`; this should be rejected.
11301171
let mut artifact_config = ArtifactConfig {
11311172
generation: Generation::new(),
1132-
artifacts: [unused_artifact_hash].into_iter().collect(),
1173+
artifacts: BTreeSet::new(),
11331174
};
1134-
let err = test_harness
1135-
.task_handle
1136-
.validate_artifact_config(artifact_config.clone())
1137-
.await
1138-
.expect("no ledger task error")
1139-
.expect_err("config should be rejected");
11401175

1141-
match err {
1142-
LedgerArtifactConfigError::InUseArtifactedMissing(artifacts) => {
1143-
assert!(
1144-
artifacts.contains_key(&used_artifact_hash),
1145-
"unexpected error contents: {artifacts:?}"
1146-
);
1176+
// Try some ArtifactConfigs that don't contain both `used_*` artifact
1177+
// hashes; all of these should be rejected.
1178+
for incomplete_artifacts in [
1179+
&[used_zone_artifact_hash] as &[ArtifactHash],
1180+
&[used_host_artifact_hash],
1181+
&[unused_artifact_hash],
1182+
&[used_zone_artifact_hash, unused_artifact_hash],
1183+
&[used_host_artifact_hash, unused_artifact_hash],
1184+
] {
1185+
artifact_config.artifacts =
1186+
incomplete_artifacts.iter().cloned().collect();
1187+
1188+
let err = test_harness
1189+
.task_handle
1190+
.validate_artifact_config(artifact_config.clone())
1191+
.await
1192+
.expect("no ledger task error")
1193+
.expect_err("config should be rejected");
1194+
1195+
match err {
1196+
LedgerArtifactConfigError::InUseArtifactedMissing(
1197+
artifacts,
1198+
) => {
1199+
if !incomplete_artifacts.contains(&used_zone_artifact_hash)
1200+
{
1201+
assert!(
1202+
artifacts.contains(&used_zone_artifact_hash),
1203+
"unexpected error contents: {artifacts:?}"
1204+
);
1205+
}
1206+
if !incomplete_artifacts.contains(&used_host_artifact_hash)
1207+
{
1208+
assert!(
1209+
artifacts.contains(&used_host_artifact_hash),
1210+
"unexpected error contents: {artifacts:?}"
1211+
);
1212+
}
1213+
}
1214+
_ => panic!("unexpected error {}", InlineErrorChain::new(&err)),
11471215
}
1148-
_ => panic!("unexpected error {}", InlineErrorChain::new(&err)),
11491216
}
11501217

1151-
// Put the used artifact hash into the artifact config; now it should
1218+
// Put both used artifact hashes into the artifact config; now it should
11521219
// pass validation.
1153-
artifact_config.artifacts.insert(used_artifact_hash);
1220+
artifact_config.artifacts.insert(used_zone_artifact_hash);
1221+
artifact_config.artifacts.insert(used_host_artifact_hash);
11541222
test_harness
11551223
.task_handle
11561224
.validate_artifact_config(artifact_config)
11571225
.await
11581226
.expect("no ledger task error")
11591227
.expect("config is valid");
11601228

1161-
// TODO-john also test host phase 2 artifacts!
1162-
11631229
logctx.cleanup_successful();
11641230
}
11651231

0 commit comments

Comments
 (0)