Skip to content

Commit 9f755f3

Browse files
committed
sled-agent: validate host phase 2 artifacts when ledgering configs
1 parent 77d097a commit 9f755f3

File tree

2 files changed

+126
-50
lines changed

2 files changed

+126
-50
lines changed

nexus-sled-agent-shared/src/inventory.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,16 @@ pub enum HostPhase2DesiredContents {
597597
Artifact { hash: ArtifactHash },
598598
}
599599

600+
impl HostPhase2DesiredContents {
601+
/// The artifact hash described by `self`, if it has one.
602+
pub fn artifact_hash(&self) -> Option<ArtifactHash> {
603+
match self {
604+
Self::CurrentContents => None,
605+
Self::Artifact { hash } => Some(*hash),
606+
}
607+
}
608+
}
609+
600610
/// Describes the desired contents for both host phase 2 slots.
601611
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)]
602612
#[serde(rename_all = "snake_case")]

sled-agent/config-reconciler/src/ledger.rs

Lines changed: 116 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use slog::error;
1717
use slog::info;
1818
use slog::warn;
1919
use slog_error_chain::InlineErrorChain;
20-
use std::collections::BTreeMap;
20+
use std::collections::BTreeSet;
2121
use std::convert::Infallible;
2222
use std::ops::Deref as _;
2323
use tokio::sync::mpsc;
@@ -98,7 +98,7 @@ pub enum LedgerArtifactConfigError {
9898
"Artifacts in use by ledgered sled config are not present \
9999
in new artifact config: {0:?}"
100100
)]
101-
InUseArtifactedMissing(BTreeMap<ArtifactHash, &'static str>),
101+
InUseArtifactedMissing(BTreeSet<ArtifactHash>),
102102
}
103103

104104
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -442,10 +442,7 @@ impl<T: SledAgentArtifactStore> LedgerTask<T> {
442442
// thing we confirm is that any referenced artifacts are present in the
443443
// artifact store.
444444
let mut artifact_validation_errors = Vec::new();
445-
for zone in &new_config.zones {
446-
let Some(artifact_hash) = zone.image_source.artifact_hash() else {
447-
continue;
448-
};
445+
for artifact_hash in config_artifact_hashes(new_config) {
449446
match self
450447
.artifact_store
451448
.validate_artifact_exists_in_storage(artifact_hash)
@@ -491,12 +488,10 @@ impl<T: SledAgentArtifactStore> LedgerTask<T> {
491488
CurrentSledConfig::Ledgered(sled_config) => sled_config,
492489
};
493490

494-
let mut missing = BTreeMap::new();
495-
for zone in &sled_config.zones {
496-
if let Some(hash) = zone.image_source.artifact_hash() {
497-
if !new_config.artifacts.contains(&hash) {
498-
missing.insert(hash, "current zone configuration");
499-
}
491+
let mut missing = BTreeSet::new();
492+
for artifact_hash in config_artifact_hashes(sled_config) {
493+
if !new_config.artifacts.contains(&artifact_hash) {
494+
missing.insert(artifact_hash);
500495
}
501496
}
502497

@@ -589,6 +584,18 @@ impl<T: SledAgentArtifactStore> LedgerTask<T> {
589584
}
590585
}
591586

587+
// Helper to return all the `ArtifactHash`es contained in a config.
588+
fn config_artifact_hashes(
589+
config: &OmicronSledConfig,
590+
) -> impl Iterator<Item = ArtifactHash> + '_ {
591+
config
592+
.zones
593+
.iter()
594+
.filter_map(|zone| zone.image_source.artifact_hash())
595+
.chain(config.host_phase_2.slot_a.artifact_hash())
596+
.chain(config.host_phase_2.slot_b.artifact_hash())
597+
}
598+
592599
async fn load_sled_config(
593600
config_datasets: &[Utf8PathBuf],
594601
log: &Logger,
@@ -648,6 +655,7 @@ mod tests {
648655
use camino_tempfile::Utf8TempDir;
649656
use id_map::IdMap;
650657
use illumos_utils::zpool::ZpoolName;
658+
use nexus_sled_agent_shared::inventory::HostPhase2DesiredContents;
651659
use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots;
652660
use nexus_sled_agent_shared::inventory::OmicronZoneConfig;
653661
use nexus_sled_agent_shared::inventory::OmicronZoneImageSource;
@@ -1054,7 +1062,7 @@ mod tests {
10541062

10551063
// Create a config that references a zone with a different artifact
10561064
// hash.
1057-
let config = OmicronSledConfig {
1065+
let mut config = OmicronSledConfig {
10581066
generation: Generation::new().next(),
10591067
disks: IdMap::new(),
10601068
datasets: IdMap::new(),
@@ -1070,7 +1078,7 @@ mod tests {
10701078
// The ledger task should reject this config due to a missing artifact.
10711079
let err = test_harness
10721080
.task_handle
1073-
.set_new_config(config)
1081+
.set_new_config(config.clone())
10741082
.await
10751083
.expect("can communicate with task")
10761084
.expect_err("config should fail");
@@ -1081,20 +1089,48 @@ mod tests {
10811089

10821090
// Change the config to reference the artifact that does exist; this one
10831091
// should be accepted.
1084-
let config = OmicronSledConfig {
1085-
generation: Generation::new().next(),
1086-
disks: IdMap::new(),
1087-
datasets: IdMap::new(),
1088-
zones: [make_dummy_zone_config_using_artifact_hash(
1089-
existing_artifact_hash,
1090-
)]
1091-
.into_iter()
1092-
.collect(),
1093-
remove_mupdate_override: None,
1094-
host_phase_2: HostPhase2DesiredSlots::current_contents(),
1092+
config.zones = [make_dummy_zone_config_using_artifact_hash(
1093+
existing_artifact_hash,
1094+
)]
1095+
.into_iter()
1096+
.collect();
1097+
1098+
test_harness
1099+
.task_handle
1100+
.set_new_config(config.clone())
1101+
.await
1102+
.expect("can communicate with task")
1103+
.expect("config should be ledgered");
1104+
1105+
// Try a config that references a host phase 2 artifact that isn't in
1106+
// the store; this should be rejected.
1107+
config.generation = config.generation.next();
1108+
config.zones = IdMap::new();
1109+
config.host_phase_2 = HostPhase2DesiredSlots {
1110+
slot_a: HostPhase2DesiredContents::CurrentContents,
1111+
slot_b: HostPhase2DesiredContents::Artifact {
1112+
hash: nonexisting_artifact_hash,
1113+
},
10951114
};
1115+
let err = test_harness
1116+
.task_handle
1117+
.set_new_config(config.clone())
1118+
.await
1119+
.expect("can communicate with task")
1120+
.expect_err("config should fail");
1121+
match err {
1122+
LedgerNewConfigError::ArtifactStoreValidationFailed(_) => (),
1123+
_ => panic!("unexpected error {}", InlineErrorChain::new(&err)),
1124+
}
10961125

1097-
// TODO-john also test host phase 2 artifacts!
1126+
// Change the config to reference the artifact that does exist; this one
1127+
// should be accepted.
1128+
config.host_phase_2 = HostPhase2DesiredSlots {
1129+
slot_a: HostPhase2DesiredContents::CurrentContents,
1130+
slot_b: HostPhase2DesiredContents::Artifact {
1131+
hash: existing_artifact_hash,
1132+
},
1133+
};
10981134

10991135
test_harness
11001136
.task_handle
@@ -1112,54 +1148,84 @@ mod tests {
11121148
"reject_artifact_configs_removing_referenced_artifacts",
11131149
);
11141150

1115-
// Claim we have a zone using the all-zeroes hash.
1116-
let used_artifact_hash = ArtifactHash([0; 32]);
1117-
let unused_artifact_hash = ArtifactHash([1; 32]);
1151+
// Construct some artifacts hashes we'll claim to be using and one we
1152+
// won't.
1153+
let used_zone_artifact_hash = ArtifactHash([0; 32]);
1154+
let used_host_artifact_hash = ArtifactHash([1; 32]);
1155+
let unused_artifact_hash = ArtifactHash([2; 32]);
11181156

1157+
// Claim we have a host phase 2
11191158
// Set up the ledger task with an initial config.
11201159
let mut sled_config = make_nonempty_sled_config();
11211160
sled_config.zones.insert(make_dummy_zone_config_using_artifact_hash(
1122-
used_artifact_hash,
1161+
used_zone_artifact_hash,
11231162
));
1163+
sled_config.host_phase_2.slot_a = HostPhase2DesiredContents::Artifact {
1164+
hash: used_host_artifact_hash,
1165+
};
1166+
11241167
let test_harness =
11251168
TestHarness::with_initial_config(logctx.log.clone(), &sled_config)
11261169
.await;
11271170

1128-
// Construct an `ArtifactConfig` that doesn't contain
1129-
// `used_artifact_hash`; this should be rejected.
11301171
let mut artifact_config = ArtifactConfig {
11311172
generation: Generation::new(),
1132-
artifacts: [unused_artifact_hash].into_iter().collect(),
1173+
artifacts: BTreeSet::new(),
11331174
};
1134-
let err = test_harness
1135-
.task_handle
1136-
.validate_artifact_config(artifact_config.clone())
1137-
.await
1138-
.expect("no ledger task error")
1139-
.expect_err("config should be rejected");
11401175

1141-
match err {
1142-
LedgerArtifactConfigError::InUseArtifactedMissing(artifacts) => {
1143-
assert!(
1144-
artifacts.contains_key(&used_artifact_hash),
1145-
"unexpected error contents: {artifacts:?}"
1146-
);
1176+
// Try some ArtifactConfigs that don't contain both `used_*` artifact
1177+
// hashes; all of these should be rejected.
1178+
for incomplete_artifacts in [
1179+
&[used_zone_artifact_hash] as &[ArtifactHash],
1180+
&[used_host_artifact_hash],
1181+
&[unused_artifact_hash],
1182+
&[used_zone_artifact_hash, unused_artifact_hash],
1183+
&[used_host_artifact_hash, unused_artifact_hash],
1184+
] {
1185+
artifact_config.artifacts =
1186+
incomplete_artifacts.iter().cloned().collect();
1187+
1188+
let err = test_harness
1189+
.task_handle
1190+
.validate_artifact_config(artifact_config.clone())
1191+
.await
1192+
.expect("no ledger task error")
1193+
.expect_err("config should be rejected");
1194+
1195+
match err {
1196+
LedgerArtifactConfigError::InUseArtifactedMissing(
1197+
artifacts,
1198+
) => {
1199+
if !incomplete_artifacts.contains(&used_zone_artifact_hash)
1200+
{
1201+
assert!(
1202+
artifacts.contains(&used_zone_artifact_hash),
1203+
"unexpected error contents: {artifacts:?}"
1204+
);
1205+
}
1206+
if !incomplete_artifacts.contains(&used_host_artifact_hash)
1207+
{
1208+
assert!(
1209+
artifacts.contains(&used_host_artifact_hash),
1210+
"unexpected error contents: {artifacts:?}"
1211+
);
1212+
}
1213+
}
1214+
_ => panic!("unexpected error {}", InlineErrorChain::new(&err)),
11471215
}
1148-
_ => panic!("unexpected error {}", InlineErrorChain::new(&err)),
11491216
}
11501217

1151-
// Put the used artifact hash into the artifact config; now it should
1218+
// Put both used artifact hashes into the artifact config; now it should
11521219
// pass validation.
1153-
artifact_config.artifacts.insert(used_artifact_hash);
1220+
artifact_config.artifacts.insert(used_zone_artifact_hash);
1221+
artifact_config.artifacts.insert(used_host_artifact_hash);
11541222
test_harness
11551223
.task_handle
11561224
.validate_artifact_config(artifact_config)
11571225
.await
11581226
.expect("no ledger task error")
11591227
.expect("config is valid");
11601228

1161-
// TODO-john also test host phase 2 artifacts!
1162-
11631229
logctx.cleanup_successful();
11641230
}
11651231

0 commit comments

Comments
 (0)