Skip to content

Commit d4cf01a

Browse files
authored
Planner/builder split: Move image source decisions to the planner (#8472)
* `BlueprintBuilder` no longer decides on image sources for zones. All the `sled_add_zone_*` functions now take an extra `image_source` argument. * `is_zone_ready_for_update()` is now a method in the planner, not the builder. This is used both when deciding whether a zone can be updated and also when choosing the image source for new zones to be added. * `can_zone_be_shut_down_safely()` is now a separate method in the planner. This is only used when deciding whether a zone can be updated. * `zone_image_source` is now a method on `TargetReleaseDescription`. It returns an error if we have a TUF repo that doesn't have exactly one matching artifact for a given `ZoneKind`. In the planner, this bubbles out a few ways: * we'll fail planning if we try to add a zone that isn't in the target release TUF repo * we'll refuse to update a zone if it isn't in the target release TUF repo (but this won't fail planning) * we'll refuse to update Nexus (because it won't be able to tell whether whatever zone type is missing has itself been updated)
1 parent d848827 commit d4cf01a

File tree

9 files changed

+538
-266
lines changed

9 files changed

+538
-266
lines changed

dev-tools/reconfigurator-cli/src/lib.rs

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use nexus_reconfigurator_planning::system::{SledBuilder, SystemDescription};
2323
use nexus_reconfigurator_simulation::SimStateBuilder;
2424
use nexus_reconfigurator_simulation::Simulator;
2525
use nexus_reconfigurator_simulation::{BlueprintId, SimState};
26+
use nexus_sled_agent_shared::inventory::ZoneKind;
2627
use nexus_types::deployment::PlanningInput;
2728
use nexus_types::deployment::SledFilter;
2829
use nexus_types::deployment::execution;
@@ -458,9 +459,26 @@ enum BlueprintEditCommands {
458459
AddNexus {
459460
/// sled on which to deploy the new instance
460461
sled_id: SledOpt,
462+
/// image source for the new zone
463+
///
464+
/// The image source is required if the planning input of the system
465+
/// being edited has a TUF repo; otherwise, it will default to the
466+
/// install dataset.
467+
#[clap(subcommand)]
468+
image_source: Option<ImageSourceArgs>,
461469
},
462470
/// add a CockroachDB instance to a particular sled
463-
AddCockroach { sled_id: SledOpt },
471+
AddCockroach {
472+
/// sled on which to deploy the new instance
473+
sled_id: SledOpt,
474+
/// image source for the new zone
475+
///
476+
/// The image source is required if the planning input of the system
477+
/// being edited has a TUF repo; otherwise, it will default to the
478+
/// install dataset.
479+
#[clap(subcommand)]
480+
image_source: Option<ImageSourceArgs>,
481+
},
464482
/// set the image source for a zone
465483
SetZoneImage {
466484
/// id of zone whose image to set
@@ -714,6 +732,60 @@ enum ImageSourceArgs {
714732
},
715733
}
716734

735+
/// Adding a new zone to a blueprint needs to choose an image source for that
736+
/// zone. Subcommands that add a zone take an optional [`ImageSourceArgs`]
737+
/// parameter. In the (common in test) case where the planning input has no TUF
738+
/// repo at all, the new and old TUF repo policy are identical (i.e., "use the
739+
/// install dataset"), and therefore we have only one logical choice for the
740+
/// image source for any new zone (the install dataset). If a TUF repo _is_
741+
/// involved, we have two choices: use the artifact from the newest TUF repo, or
742+
/// use the artifact from the previous TUF repo policy (which might itself be
743+
/// another TUF repo, or might be the install dataset).
744+
fn image_source_unwrap_or(
745+
image_source: Option<ImageSourceArgs>,
746+
planning_input: &PlanningInput,
747+
zone_kind: ZoneKind,
748+
) -> anyhow::Result<BlueprintZoneImageSource> {
749+
if let Some(image_source) = image_source {
750+
Ok(image_source.into())
751+
} else if planning_input.tuf_repo() == planning_input.old_repo() {
752+
planning_input
753+
.tuf_repo()
754+
.description()
755+
.zone_image_source(zone_kind)
756+
.context("could not determine image source")
757+
} else {
758+
let mut options = vec!["`install-dataset`".to_string()];
759+
for (name, repo) in [
760+
("previous", planning_input.old_repo()),
761+
("current", planning_input.tuf_repo()),
762+
] {
763+
match repo.description().zone_image_source(zone_kind) {
764+
// Install dataset is already covered, and if either TUF repo is
765+
// missing an artifact of this kind, it's not an option.
766+
Ok(BlueprintZoneImageSource::InstallDataset) | Err(_) => (),
767+
Ok(BlueprintZoneImageSource::Artifact { version, hash }) => {
768+
let version = match version {
769+
BlueprintZoneImageVersion::Available { version } => {
770+
version.to_string()
771+
}
772+
BlueprintZoneImageVersion::Unknown => {
773+
"unknown".to_string()
774+
}
775+
};
776+
options.push(format!(
777+
"`artifact {version} {hash}` (from {name} TUF repo)"
778+
));
779+
}
780+
}
781+
}
782+
bail!(
783+
"must specify image source for new zone; options: {}",
784+
options.join(", ")
785+
)
786+
}
787+
}
788+
717789
impl From<ImageSourceArgs> for BlueprintZoneImageSource {
718790
fn from(value: ImageSourceArgs) -> Self {
719791
match value {
@@ -1327,17 +1399,27 @@ fn cmd_blueprint_edit(
13271399
}
13281400

13291401
let label = match args.edit_command {
1330-
BlueprintEditCommands::AddNexus { sled_id } => {
1402+
BlueprintEditCommands::AddNexus { sled_id, image_source } => {
13311403
let sled_id = sled_id.to_sled_id(system.description())?;
1404+
let image_source = image_source_unwrap_or(
1405+
image_source,
1406+
&planning_input,
1407+
ZoneKind::Nexus,
1408+
)?;
13321409
builder
1333-
.sled_add_zone_nexus(sled_id)
1410+
.sled_add_zone_nexus(sled_id, image_source)
13341411
.context("failed to add Nexus zone")?;
13351412
format!("added Nexus zone to sled {}", sled_id)
13361413
}
1337-
BlueprintEditCommands::AddCockroach { sled_id } => {
1414+
BlueprintEditCommands::AddCockroach { sled_id, image_source } => {
13381415
let sled_id = sled_id.to_sled_id(system.description())?;
1416+
let image_source = image_source_unwrap_or(
1417+
image_source,
1418+
&planning_input,
1419+
ZoneKind::CockroachDb,
1420+
)?;
13391421
builder
1340-
.sled_add_zone_cockroachdb(sled_id)
1422+
.sled_add_zone_cockroachdb(sled_id, image_source)
13411423
.context("failed to add CockroachDB zone")?;
13421424
format!("added CockroachDB zone to sled {}", sled_id)
13431425
}

live-tests/tests/test_nexus_add_remove.rs

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder;
1818
use nexus_reconfigurator_planning::planner::Planner;
1919
use nexus_reconfigurator_preparation::PlanningInputFromDb;
2020
use nexus_sled_agent_shared::inventory::ZoneKind;
21+
use nexus_types::deployment::BlueprintZoneDisposition;
2122
use nexus_types::deployment::SledFilter;
2223
use omicron_common::address::NEXUS_INTERNAL_PORT;
2324
use omicron_test_utils::dev::poll::CondCheckError;
@@ -54,19 +55,49 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) {
5455
let nexus = initial_nexus_clients.first().expect("internal Nexus client");
5556

5657
// First, deploy a new Nexus zone to an arbitrary sled.
57-
let sled_id = planning_input
58+
let commissioned_sled_ids = planning_input
5859
.all_sled_ids(SledFilter::Commissioned)
59-
.next()
60-
.expect("any sled id");
60+
.collect::<Vec<_>>();
61+
let sled_id = *commissioned_sled_ids.first().expect("any sled id");
6162
let (blueprint1, blueprint2) = blueprint_edit_current_target(
6263
log,
6364
&planning_input,
6465
&collection,
6566
&nexus,
6667
&|builder: &mut BlueprintBuilder| {
68+
// We have to tell the builder what image source to use for the new
69+
// Nexus zone. If we were the planner, we'd check whether we have a
70+
// TUF repo (or two) then decide whether to use the image from one
71+
// of those or the install dataset. Instead of duplicating all of
72+
// that logic, we'll just find an existing Nexus zone and copy its
73+
// image source. This should always be right in this context; it
74+
// would only be wrong if there are existing Nexus zones with
75+
// different image sources, which would only be true in the middle
76+
// of an update.
77+
let image_source = commissioned_sled_ids
78+
.iter()
79+
.find_map(|&sled_id| {
80+
builder
81+
.current_sled_zones(
82+
sled_id,
83+
BlueprintZoneDisposition::is_in_service,
84+
)
85+
.find_map(|zone| {
86+
if zone.zone_type.is_nexus() {
87+
Some(zone.image_source.clone())
88+
} else {
89+
None
90+
}
91+
})
92+
})
93+
.context(
94+
"could not find in-service Nexus in parent blueprint",
95+
)?;
96+
6797
builder
68-
.sled_add_zone_nexus(sled_id)
98+
.sled_add_zone_nexus(sled_id, image_source)
6999
.context("adding Nexus zone")?;
100+
70101
Ok(())
71102
},
72103
)

nexus/db-queries/src/db/datastore/deployment.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2717,13 +2717,22 @@ mod tests {
27172717

27182718
// Add zones to our new sled.
27192719
assert_eq!(
2720-
builder.sled_ensure_zone_ntp(new_sled_id).unwrap(),
2720+
builder
2721+
.sled_ensure_zone_ntp(
2722+
new_sled_id,
2723+
BlueprintZoneImageSource::InstallDataset
2724+
)
2725+
.unwrap(),
27212726
Ensure::Added
27222727
);
27232728
for zpool_id in new_sled_zpools.keys() {
27242729
assert_eq!(
27252730
builder
2726-
.sled_ensure_zone_crucible(new_sled_id, *zpool_id)
2731+
.sled_ensure_zone_crucible(
2732+
new_sled_id,
2733+
*zpool_id,
2734+
BlueprintZoneImageSource::InstallDataset
2735+
)
27272736
.unwrap(),
27282737
Ensure::Added
27292738
);

nexus/db-queries/src/db/datastore/vpc.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2973,6 +2973,7 @@ mod tests {
29732973
use nexus_types::deployment::BlueprintTarget;
29742974
use nexus_types::deployment::BlueprintZoneConfig;
29752975
use nexus_types::deployment::BlueprintZoneDisposition;
2976+
use nexus_types::deployment::BlueprintZoneImageSource;
29762977
use nexus_types::external_api::params;
29772978
use nexus_types::identity::Asset;
29782979
use omicron_common::api::external;
@@ -3328,7 +3329,12 @@ mod tests {
33283329
.expect("ensured disks");
33293330
}
33303331
builder
3331-
.sled_add_zone_nexus_with_config(sled_ids[2], false, Vec::new())
3332+
.sled_add_zone_nexus_with_config(
3333+
sled_ids[2],
3334+
false,
3335+
Vec::new(),
3336+
BlueprintZoneImageSource::InstallDataset,
3337+
)
33323338
.expect("added nexus to third sled");
33333339
builder.build()
33343340
};
@@ -3397,7 +3403,12 @@ mod tests {
33973403
.expect("created blueprint builder");
33983404
for &sled_id in &sled_ids {
33993405
builder
3400-
.sled_add_zone_nexus_with_config(sled_id, false, Vec::new())
3406+
.sled_add_zone_nexus_with_config(
3407+
sled_id,
3408+
false,
3409+
Vec::new(),
3410+
BlueprintZoneImageSource::InstallDataset,
3411+
)
34013412
.expect("added nexus to third sled");
34023413
}
34033414
builder.build()

nexus/reconfigurator/execution/src/dns.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1523,7 +1523,12 @@ mod test {
15231523
.unwrap();
15241524
let sled_id =
15251525
blueprint.sleds().next().expect("expected at least one sled");
1526-
builder.sled_add_zone_nexus(sled_id).unwrap();
1526+
builder
1527+
.sled_add_zone_nexus(
1528+
sled_id,
1529+
BlueprintZoneImageSource::InstallDataset,
1530+
)
1531+
.unwrap();
15271532
let blueprint2 = builder.build();
15281533
eprintln!("blueprint2: {}", blueprint2.display());
15291534
// Figure out the id of the new zone.

0 commit comments

Comments
 (0)