From 4478590beaa5e976b029426d90206f4bf92919da Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 15 Jul 2025 00:19:29 +0000 Subject: [PATCH 1/2] [spr] initial version Created using spr 1.3.6-beta.1 --- Cargo.lock | 1 + .../output/cmds-noop-image-source-stdout | 61 ++- .../tests/output/cmds-target-release-stdout | 36 +- nexus/reconfigurator/planning/Cargo.toml | 1 + nexus/reconfigurator/planning/src/planner.rs | 241 +++------ .../planning/src/planner/image_source.rs | 483 ++++++++++++++++++ 6 files changed, 614 insertions(+), 209 deletions(-) create mode 100644 nexus/reconfigurator/planning/src/planner/image_source.rs diff --git a/Cargo.lock b/Cargo.lock index 0a28ac69b4f..9860cea13b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6655,6 +6655,7 @@ dependencies = [ "expectorate", "gateway-client", "id-map", + "iddqd", "illumos-utils", "indexmap 2.10.0", "internal-dns-resolver", diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout index 6ca20a8347c..a941d10012e 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout @@ -156,24 +156,28 @@ generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configu > blueprint-plan latest latest WARN skipping zones eligible for cleanup check (sled not present in latest inventory collection), sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e -WARN skipping noop image source check since sled-agent encountered error retrieving zone manifest (this is abnormal), sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, error: reconfigurator-sim simulated error: simulated error obtaining zone manifest -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, tuf_artifact_id: nexus v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, tuf_artifact_id: internal-dns v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, tuf_artifact_id: crucible-zone v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, tuf_artifact_id: ntp v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, tuf_artifact_id: external-dns v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, tuf_artifact_id: crucible-pantry-zone v1.0.0 (zone) -INFO noop converting 6/6 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, tuf_artifact_id: nexus v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, tuf_artifact_id: external-dns v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, tuf_artifact_id: crucible-zone v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, tuf_artifact_id: internal-dns v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, tuf_artifact_id: crucible-pantry-zone v1.0.0 (zone) -WARN zone manifest inventory indicated install dataset artifact is invalid, not using artifact (this is abnormal), sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: e8fe709c-725f-4bb2-b714-ffcda13a9e54, kind: internal_ntp, file_name: ntp.tar.gz, error: reconfigurator-sim: simulated error validating zone image -INFO noop converting 5/6 install-dataset zones to artifact store, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34 -INFO noop converting 0/2 install-dataset zones to artifact store, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7 -INFO skipping noop image source check on sled (blueprint has get_remove_mupdate_override set for sled), sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, bp_remove_mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff -INFO skipping noop image source check (sled not present in latest inventory collection), sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e +WARN skipped noop image source check since sled-agent encountered error retrieving zone manifest (this is abnormal), sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, error: reconfigurator-sim simulated error: simulated error obtaining zone manifest +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 6, num_already_artifact: 0, num_eligible: 6, num_ineligible: 0 +INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 0c71b3b2-6ceb-4e8f-b020-b08675e83038, kind: nexus, file_name: nexus.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 427ec88f-f467-42fa-9bbb-66a91a36103c, kind: internal_dns, file_name: internal_dns.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 5199c033-4cf9-4ab6-8ae7-566bd7606363, kind: crucible, file_name: crucible.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 6444f8a5-6465-4f0b-a549-1993c113569c, kind: internal_ntp, file_name: ntp.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 803bfb63-c246-41db-b0da-d3b87ddfc63d, kind: external_dns, file_name: external_dns.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: ba4994a8-23f9-4b1a-a84f-a08d74591389, kind: crucible_pantry, file_name: crucible_pantry.tar.gz, new_image_source: artifact: version 1.0.0 +INFO performed noop image source checks on sled, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, num_total: 6, num_already_artifact: 0, num_eligible: 5, num_ineligible: 1 +INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 33862f97-2897-4d53-a9a6-78a80f7eb13f, kind: nexus, file_name: nexus.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 43a0588f-5b57-469b-a173-db6cb6105e4c, kind: external_dns, file_name: external_dns.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 8e3dd7a4-75a3-4917-a6f4-0991bbdef7ea, kind: crucible, file_name: crucible.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 97753dbd-5a0f-4273-b1be-db6bb2b69381, kind: internal_dns, file_name: internal_dns.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: d07a1fed-4235-4821-a1e5-f7eb2646ff33, kind: crucible_pantry, file_name: crucible_pantry.tar.gz, new_image_source: artifact: version 1.0.0 +WARN zone manifest inventory indicated install dataset artifact is invalid, not using artifact (this is abnormal), sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: e8fe709c-725f-4bb2-b714-ffcda13a9e54, kind: internal_ntp, file_name: ntp.tar.gz, message: reconfigurator-sim: simulated error validating zone image +INFO performed noop image source checks on sled, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, num_total: 2, num_already_artifact: 0, num_eligible: 0, num_ineligible: 2 +INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: b910534b-2a53-4335-a3d9-5311d2f3186a, kind: internal_ntp, file_name: ntp.tar.gz, expected_hash: d76e26198daed69cdae04490d7477f8c842e0dbe37d463eac0d0a8d3fb803095 +INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: ecbe0b3d-1acc-44b2-b6d4-f4d2770516e4, kind: crucible, file_name: crucible.tar.gz, expected_hash: 866f6a7c2e51c056fb722b5113e80181cc9cd8b712a0d3dbf1edc4ce29e5229e +INFO skipped noop image source check on sled (blueprint has get_remove_mupdate_override set for sled), sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, bp_remove_mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff +INFO skipped noop image source check (sled not present in latest inventory collection), sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e +INFO noop converting 6/6 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 6, num_already_artifact: 0 +INFO noop converting 5/6 install-dataset zones to artifact store, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, num_total: 6, num_already_artifact: 0 INFO parent blueprint contains NTP zone, but it's not in inventory yet, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 @@ -511,15 +515,18 @@ set sled e96e226f-4ed9-4c01-91b9-69a9cd076c9e inventory visibility: hidden -> vi generated inventory collection 61f451b3-2121-4ed6-91c7-a550054f6c21 from configured sleds > blueprint-plan latest latest -WARN skipping noop image source check since sled-agent encountered error retrieving zone manifest (this is abnormal), sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, error: reconfigurator-sim simulated error: simulated error obtaining zone manifest -INFO noop converting 0/0 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -WARN zone manifest inventory indicated install dataset artifact is invalid, not using artifact (this is abnormal), sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: e8fe709c-725f-4bb2-b714-ffcda13a9e54, kind: internal_ntp, file_name: ntp.tar.gz, error: reconfigurator-sim: simulated error validating zone image -INFO noop converting 0/1 install-dataset zones to artifact store, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34 -INFO noop converting 0/2 install-dataset zones to artifact store, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7 -INFO skipping noop image source check on sled (blueprint has get_remove_mupdate_override set for sled), sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, bp_remove_mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, tuf_artifact_id: crucible-zone v1.0.0 (zone) -INFO install dataset artifact hash matches TUF repo, switching out the zone image source to Artifact, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, tuf_artifact_id: ntp v1.0.0 (zone) -INFO noop converting 2/2 install-dataset zones to artifact store, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e +WARN skipped noop image source check since sled-agent encountered error retrieving zone manifest (this is abnormal), sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, error: reconfigurator-sim simulated error: simulated error obtaining zone manifest +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 6, num_already_artifact: 6, num_eligible: 0, num_ineligible: 0 +INFO performed noop image source checks on sled, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, num_total: 6, num_already_artifact: 5, num_eligible: 0, num_ineligible: 1 +WARN zone manifest inventory indicated install dataset artifact is invalid, not using artifact (this is abnormal), sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: e8fe709c-725f-4bb2-b714-ffcda13a9e54, kind: internal_ntp, file_name: ntp.tar.gz, message: reconfigurator-sim: simulated error validating zone image +INFO performed noop image source checks on sled, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, num_total: 2, num_already_artifact: 0, num_eligible: 0, num_ineligible: 2 +INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: b910534b-2a53-4335-a3d9-5311d2f3186a, kind: internal_ntp, file_name: ntp.tar.gz, expected_hash: d76e26198daed69cdae04490d7477f8c842e0dbe37d463eac0d0a8d3fb803095 +INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: ecbe0b3d-1acc-44b2-b6d4-f4d2770516e4, kind: crucible, file_name: crucible.tar.gz, expected_hash: 866f6a7c2e51c056fb722b5113e80181cc9cd8b712a0d3dbf1edc4ce29e5229e +INFO skipped noop image source check on sled (blueprint has get_remove_mupdate_override set for sled), sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, bp_remove_mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff +INFO performed noop image source checks on sled, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, num_total: 2, num_already_artifact: 0, num_eligible: 2, num_ineligible: 0 +INFO zone is eligible for noop image source conversion, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, zone_id: c1467d0e-b3de-4bd8-b36a-d8b36626badc, kind: crucible, file_name: crucible.tar.gz, new_image_source: artifact: version 1.0.0 +INFO zone is eligible for noop image source conversion, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, zone_id: c800ba17-240e-4b72-8ae6-afc30b6baa96, kind: internal_ntp, file_name: ntp.tar.gz, new_image_source: artifact: version 1.0.0 +INFO noop converting 2/2 install-dataset zones to artifact store, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, num_total: 2, num_already_artifact: 0 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout index 0f253e5c83a..fbe25e880a2 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout @@ -192,9 +192,9 @@ f45ba181-4b56-42cc-a762-874d90184a43 0 > # First step: upgrade one SP. > blueprint-plan dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 f45ba181-4b56-42cc-a762-874d90184a43 -INFO noop converting 0/9 install-dataset zones to artifact store, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 +INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 +INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 @@ -377,9 +377,9 @@ external DNS: > # If we generate another plan, there should be no change. > blueprint-plan 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 f45ba181-4b56-42cc-a762-874d90184a43 -INFO noop converting 0/9 install-dataset zones to artifact store, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 +INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 +INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 @@ -563,9 +563,9 @@ set sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 SP versions: active -> 1.0.0 generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configured sleds > blueprint-plan 58d5e830-0884-47d8-a7cd-b2b3751adeb4 eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 -INFO noop converting 0/9 install-dataset zones to artifact store, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 +INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 +INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 @@ -759,9 +759,9 @@ set sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c SP versions: inactive -> 0.5.0 generated inventory collection 61f451b3-2121-4ed6-91c7-a550054f6c21 from configured sleds > blueprint-plan af934083-59b5-4bf6-8966-6fb5292c29e1 61f451b3-2121-4ed6-91c7-a550054f6c21 -INFO noop converting 0/9 install-dataset zones to artifact store, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 +INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 +INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 @@ -953,9 +953,9 @@ set sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c SP versions: active -> 1.0.0 generated inventory collection b1bda47d-2c19-4fba-96e3-d9df28db7436 from configured sleds > blueprint-plan df06bb57-ad42-4431-9206-abff322896c7 b1bda47d-2c19-4fba-96e3-d9df28db7436 -INFO noop converting 0/9 install-dataset zones to artifact store, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 +INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 +INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 @@ -1149,9 +1149,9 @@ set sled d81c6a84-79b8-4958-ae41-ea46c9b19763 SP versions: active -> 1.0.0 generated inventory collection a71f7a73-35a6-45e8-acbe-f1c5925eed69 from configured sleds > blueprint-plan 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba a71f7a73-35a6-45e8-acbe-f1c5925eed69 -INFO noop converting 0/9 install-dataset zones to artifact store, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 -INFO noop converting 0/8 install-dataset zones to artifact store, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 +INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 +INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 +INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index 8a05a9ca8c8..6d4725b6706 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -13,6 +13,7 @@ chrono.workspace = true debug-ignore.workspace = true daft.workspace = true gateway-client.workspace = true +iddqd.workspace = true id-map.workspace = true illumos-utils.workspace = true indexmap.workspace = true diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 89285f59650..804ee7b7a03 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -14,6 +14,10 @@ use crate::blueprint_builder::Operation; use crate::blueprint_editor::DisksEditError; use crate::blueprint_editor::SledEditError; use crate::mgs_updates::plan_mgs_updates; +use crate::planner::image_source::NoopConvertInfo; +use crate::planner::image_source::NoopConvertSledStatus; +use crate::planner::image_source::NoopConvertZoneCounts; +use crate::planner::image_source::NoopConvertZoneStatus; use crate::planner::omicron_zone_placement::PlacementError; use gateway_client::types::SpType; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; @@ -32,7 +36,6 @@ use nexus_types::deployment::DiskFilter; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledDetails; use nexus_types::deployment::SledFilter; -use nexus_types::deployment::TargetReleaseDescription; use nexus_types::deployment::TufRepoContentsError; use nexus_types::deployment::ZpoolFilter; use nexus_types::external_api::views::PhysicalDiskPolicy; @@ -50,7 +53,6 @@ use slog::{Logger, info, warn}; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::collections::HashMap; use std::str::FromStr; pub(crate) use self::omicron_zone_placement::DiscretionaryOmicronZone; @@ -59,6 +61,7 @@ use self::omicron_zone_placement::OmicronZonePlacementSledState; pub use self::rng::PlannerRng; pub use self::rng::SledPlannerRng; +mod image_source; mod omicron_zone_placement; pub(crate) mod rng; @@ -157,7 +160,12 @@ impl<'a> Planner<'a> { fn do_plan(&mut self) -> Result<(), Error> { self.do_plan_expunge()?; self.do_plan_decommission()?; - self.do_plan_noop_image_source()?; + + let noop_info = + NoopConvertInfo::new(self.input, self.inventory, &self.blueprint)?; + noop_info.log_to(&self.log); + + self.do_plan_noop_image_source(noop_info)?; self.do_plan_add()?; if let UpdateStepResult::ContinueToNextStep = self.do_plan_mgs_updates() { @@ -504,180 +512,85 @@ impl<'a> Planner<'a> { Ok(()) } - fn do_plan_noop_image_source(&mut self) -> Result<(), Error> { - let TargetReleaseDescription::TufRepo(current_artifacts) = - self.input.tuf_repo().description() - else { - info!( - self.log, - "skipping noop image source check for all sleds \ - (no current TUF repo)", - ); - return Ok(()); + fn do_plan_noop_image_source( + &mut self, + noop_info: NoopConvertInfo, + ) -> Result<(), Error> { + let sleds = match noop_info { + NoopConvertInfo::GlobalEligible { sleds } => sleds, + NoopConvertInfo::GlobalIneligible { .. } => return Ok(()), }; - let artifacts_by_hash: HashMap<_, _> = current_artifacts - .artifacts - .iter() - .map(|artifact| (artifact.hash, artifact)) - .collect(); - - for sled_id in self.input.all_sled_ids(SledFilter::InService) { - let Some(inv_sled) = self.inventory.sled_agents.get(&sled_id) - else { - info!( - self.log, - "skipping noop image source check \ - (sled not present in latest inventory collection)"; - "sled_id" => %sled_id, - ); - continue; - }; + for sled in sleds { + let zones = match &sled.status { + NoopConvertSledStatus::Ineligible(_) => continue, + NoopConvertSledStatus::MaybeEligible { + mupdate_override_id, + zones, + } => { + // If the mupdate override ID is set, we can't do noop + // conversions. Log this fact. + if let Some(override_id) = mupdate_override_id { + info!( + self.log, + "sled is ineligible for noop conversions due to \ + mupdate override"; + "sled_id" => %sled.sled_id, + "mupdate_override_id" => %override_id, + ); + continue; + } - let zone_manifest = match &inv_sled - .zone_image_resolver - .zone_manifest - .boot_inventory - { - Ok(zm) => zm, - Err(message) => { - // This is a string so we don't use InlineErrorChain::new. - let message: &str = message; - warn!( - self.log, - "skipping noop image source check since \ - sled-agent encountered error retrieving zone manifest \ - (this is abnormal)"; - "sled_id" => %sled_id, - "error" => %message, - ); - continue; + zones } }; - // Does the blueprint have the remove_mupdate_override field set for - // this sled? If it does, we don't want to touch the zones on this - // sled (they should all be InstallDataset until the - // remove_mupdate_override field is cleared). - if let Some(id) = - self.blueprint.sled_get_remove_mupdate_override(sled_id)? - { - info!( + let zone_counts = NoopConvertZoneCounts::new(zones); + let num_install_dataset = + zone_counts.num_maybe_eligible + zone_counts.num_ineligible; + + if num_install_dataset == 0 { + debug!( self.log, - "skipping noop image source check on sled \ - (blueprint has get_remove_mupdate_override set for sled)"; - "sled_id" => %sled_id, - "bp_remove_mupdate_override_id" => %id, + "all zones are already Artifact, so \ + no noop image source action required"; + "num_total" => zone_counts.num_total, ); continue; } + if zone_counts.num_maybe_eligible > 0 { + info!( + self.log, + "noop converting {}/{} install-dataset zones to artifact store", + // If we're here, then the count is of actually eligible zones, + // not just maybe-eligible ones. + zone_counts.num_maybe_eligible, + num_install_dataset; + "sled_id" => %sled.sled_id, + "num_total" => zone_counts.num_total, + "num_already_artifact" => zone_counts.num_already_artifact, + ); + } - // Which zones have image sources set to InstallDataset? - let install_dataset_zones = self - .blueprint - .current_sled_zones( - sled_id, - BlueprintZoneDisposition::is_in_service, - ) - .filter(|z| { - z.image_source == BlueprintZoneImageSource::InstallDataset - }); - - // Out of these, which zones' hashes (as reported in the zone - // manifest) match the corresponding ones in the TUF repo? - let mut install_dataset_zone_count = 0; - let matching_zones: Vec<_> = install_dataset_zones - .inspect(|_| { - install_dataset_zone_count += 1; - }) - .filter_map(|z| { - let file_name = z.kind().artifact_in_install_dataset(); - let Some(artifact) = zone_manifest.artifacts.get(file_name) - else { - // The blueprint indicates that a zone should be present - // that isn't in the install dataset. This might be an old - // install dataset with a zone kind known to this version of - // Nexus that isn't present in it. Not normally a cause for - // concern. - debug!( - self.log, - "blueprint zone not found in zone manifest, \ - ignoring for noop checks"; - "sled_id" => %sled_id, - "zone_id" => %z.id, - "kind" => z.kind().report_str(), - "file_name" => file_name, - ); - return None; - }; - if let Err(message) = &artifact.status { - // The artifact is somehow invalid and corrupt -- definitely - // something to warn about and not proceed. - warn!( - self.log, - "zone manifest inventory indicated install dataset \ - artifact is invalid, not using artifact (this is \ - abnormal)"; - "sled_id" => %sled_id, - "zone_id" => %z.id, - "kind" => z.kind().report_str(), - "file_name" => file_name, - "error" => %message, - ); - return None; + for zone in zones { + match &zone.status { + NoopConvertZoneStatus::Eligible(new_image_source) => { + self.blueprint.sled_set_zone_source( + sled.sled_id, + zone.zone_id, + new_image_source.clone(), + )?; } + NoopConvertZoneStatus::AlreadyArtifact { .. } + | NoopConvertZoneStatus::Ineligible(_) => {} + } + } - // Does the hash match what's in the TUF repo? - let Some(tuf_artifact) = - artifacts_by_hash.get(&artifact.expected_hash) - else { - debug!( - self.log, - "install dataset artifact hash not found in TUF repo, \ - ignoring for noop checks"; - "sled_id" => %sled_id, - "zone_id" => %z.id, - "kind" => z.kind().report_str(), - "file_name" => file_name, - ); - return None; - }; - - info!( - self.log, - "install dataset artifact hash matches TUF repo, \ - switching out the zone image source to Artifact"; - "sled_id" => %sled_id, - "tuf_artifact_id" => %tuf_artifact.id, - ); - Some((z.id, tuf_artifact)) - }) - .collect(); - - info!( - self.log, - "noop converting {}/{} install-dataset zones to artifact store", - matching_zones.len(), - install_dataset_zone_count; - "sled_id" => %sled_id, + self.blueprint.record_operation( + Operation::SledNoopZoneImageSourcesUpdated { + sled_id: sled.sled_id, + count: zone_counts.num_maybe_eligible, + }, ); - - // Set all these zones' image sources to the corresponding - // blueprint. - for (zone_id, tuf_artifact) in &matching_zones { - self.blueprint.sled_set_zone_source( - sled_id, - *zone_id, - BlueprintZoneImageSource::from_available_artifact( - tuf_artifact, - ), - )?; - self.blueprint.record_operation( - Operation::SledNoopZoneImageSourcesUpdated { - sled_id, - count: matching_zones.len(), - }, - ); - } } Ok(()) diff --git a/nexus/reconfigurator/planning/src/planner/image_source.rs b/nexus/reconfigurator/planning/src/planner/image_source.rs new file mode 100644 index 00000000000..dee7924cdac --- /dev/null +++ b/nexus/reconfigurator/planning/src/planner/image_source.rs @@ -0,0 +1,483 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; +use nexus_sled_agent_shared::inventory::ZoneKind; +use nexus_types::{ + deployment::{ + BlueprintZoneDisposition, BlueprintZoneImageSource, + BlueprintZoneImageVersion, PlanningInput, SledFilter, + TargetReleaseDescription, + }, + inventory::Collection, +}; +use omicron_uuid_kinds::{MupdateOverrideUuid, OmicronZoneUuid, SledUuid}; +use slog::{debug, info, o, warn}; +use tufaceous_artifact::ArtifactHash; + +use crate::blueprint_builder::{BlueprintBuilder, Error}; + +/// Information about zones eligible for noop conversion from `InstallDataset` +/// to `Artifact`. +#[derive(Clone, Debug)] +pub(crate) enum NoopConvertInfo { + /// There's a global reason due to which no-op conversions cannot occur. + GlobalIneligible { reason: NoopConvertGlobalIneligibleReason }, + + /// Global checks have passed. + GlobalEligible { sleds: IdOrdMap }, +} + +impl NoopConvertInfo { + pub(crate) fn new( + input: &PlanningInput, + inventory: &Collection, + blueprint: &BlueprintBuilder<'_>, + ) -> Result { + let TargetReleaseDescription::TufRepo(current_artifacts) = + input.tuf_repo().description() + else { + return Ok(Self::GlobalIneligible { + reason: NoopConvertGlobalIneligibleReason::NoTufRepo, + }); + }; + + let mut sleds = IdOrdMap::new(); + + let artifacts_by_hash: HashMap<_, _> = current_artifacts + .artifacts + .iter() + .map(|artifact| (artifact.hash, artifact)) + .collect(); + + for sled_id in input.all_sled_ids(SledFilter::InService) { + let Some(inv_sled) = inventory.sled_agents.get(&sled_id) else { + sleds + .insert_unique(NoopConvertSledInfo { + sled_id, + status: NoopConvertSledStatus::Ineligible( + NoopConvertSledIneligibleReason::NotInInventory, + ), + }) + .expect("sled IDs are unique"); + continue; + }; + + let zone_manifest = match &inv_sled + .zone_image_resolver + .zone_manifest + .boot_inventory + { + Ok(zm) => zm, + Err(message) => { + sleds + .insert_unique(NoopConvertSledInfo { + sled_id, + status: NoopConvertSledStatus::Ineligible( + NoopConvertSledIneligibleReason::ManifestError { + message: message.to_owned(), + }, + ), + }) + .expect("sled IDs are unique"); + continue; + } + }; + + // Does the blueprint have the remove_mupdate_override field set for + // this sled? If it does, we don't want to touch the zones on this + // sled (they should all be InstallDataset until the + // remove_mupdate_override field is cleared). + if let Some(override_id) = + blueprint.sled_get_remove_mupdate_override(sled_id)? + { + sleds + .insert_unique(NoopConvertSledInfo { + sled_id, + status: NoopConvertSledStatus::Ineligible( + NoopConvertSledIneligibleReason::BpMupdateOverride { + override_id, + }, + ), + }) + .expect("sled IDs are unique"); + continue; + } + + // Out of these, which zones' hashes (as reported in the zone + // manifest) match the corresponding ones in the TUF repo? + let zones = blueprint + .current_sled_zones( + sled_id, + BlueprintZoneDisposition::is_in_service, + ) + .map(|z| { + let file_name = z.kind().artifact_in_install_dataset(); + + match &z.image_source { + BlueprintZoneImageSource::InstallDataset => {} + BlueprintZoneImageSource::Artifact { + version, + hash, + } => { + return NoopConvertZoneInfo { + zone_id: z.id, + kind: z.kind(), + status: + NoopConvertZoneStatus::AlreadyArtifact { + version: version.clone(), + hash: *hash, + }, + }; + } + } + + let Some(artifact) = zone_manifest.artifacts.get(file_name) + else { + // The blueprint indicates that a zone should be present + // that isn't in the install dataset. This might be an old + // install dataset with a zone kind known to this version of + // Nexus that isn't present in it. + return NoopConvertZoneInfo { + zone_id: z.id, + kind: z.kind(), + status: NoopConvertZoneStatus::Ineligible( + NoopConvertZoneIneligibleReason::NotInManifest, + ), + }; + }; + if let Err(message) = &artifact.status { + // The artifact is somehow invalid and corrupt. + return NoopConvertZoneInfo { + zone_id: z.id, + kind: z.kind(), + status: NoopConvertZoneStatus::Ineligible( + NoopConvertZoneIneligibleReason::ArtifactError { + message: message.to_owned(), + }, + ), + }; + } + + // Does the hash match what's in the TUF repo? + let Some(&tuf_artifact) = + artifacts_by_hash.get(&artifact.expected_hash) + else { + return NoopConvertZoneInfo { + zone_id: z.id, + kind: z.kind(), + status: NoopConvertZoneStatus::Ineligible( + NoopConvertZoneIneligibleReason::NotInTufRepo { + expected_hash: artifact.expected_hash, + }, + ), + }; + }; + + NoopConvertZoneInfo { + zone_id: z.id, + kind: z.kind(), + status: NoopConvertZoneStatus::Eligible( + BlueprintZoneImageSource::from_available_artifact( + tuf_artifact, + ), + ), + } + }) + .collect(); + + sleds + .insert_unique(NoopConvertSledInfo { + sled_id, + status: NoopConvertSledStatus::MaybeEligible { + mupdate_override_id: blueprint + .sled_get_remove_mupdate_override(sled_id)?, + zones, + }, + }) + .expect("sled IDs are unique"); + } + + Ok(Self::GlobalEligible { sleds }) + } + + pub(crate) fn log_to(&self, log: &slog::Logger) { + match self { + Self::GlobalIneligible { + reason: NoopConvertGlobalIneligibleReason::NoTufRepo, + } => { + info!( + log, + "skipping noop image source check for all sleds \ + (no current TUF repo)", + ); + } + Self::GlobalEligible { sleds } => { + for sled in sleds { + let log = + log.new(o!("sled_id" => sled.sled_id.to_string())); + sled.status.log_to(&log); + } + } + } + } +} + +#[derive(Clone, Debug)] +pub(crate) enum NoopConvertGlobalIneligibleReason { + /// No TUF repository is available. + NoTufRepo, +} + +#[derive(Clone, Debug)] +pub(crate) struct NoopConvertSledInfo { + pub(crate) sled_id: SledUuid, + pub(crate) status: NoopConvertSledStatus, +} + +impl IdOrdItem for NoopConvertSledInfo { + type Key<'a> = SledUuid; + + fn key(&self) -> Self::Key<'_> { + self.sled_id + } + + id_upcast!(); +} + +#[derive(Clone, Debug)] +pub(crate) enum NoopConvertSledStatus { + /// The sled is ineligible for conversion. + Ineligible(NoopConvertSledIneligibleReason), + + /// The sled might be eligible for conversion in case `mupdate_override_id` + /// is `None`. + MaybeEligible { + mupdate_override_id: Option, + zones: IdOrdMap, + }, +} + +impl NoopConvertSledStatus { + fn log_to(&self, log: &slog::Logger) { + match self { + Self::Ineligible( + NoopConvertSledIneligibleReason::NotInInventory, + ) => { + info!( + log, + "skipped noop image source check \ + (sled not present in latest inventory collection)" + ); + } + Self::Ineligible( + NoopConvertSledIneligibleReason::ManifestError { message }, + ) => { + // This is a string so we don't use InlineErrorChain::new. + let message: &str = message; + warn!( + log, + "skipped noop image source check since \ + sled-agent encountered error retrieving zone manifest \ + (this is abnormal)"; + "error" => %message, + ); + } + Self::Ineligible( + NoopConvertSledIneligibleReason::BpMupdateOverride { + override_id, + }, + ) => { + info!( + log, + "skipped noop image source check on sled \ + (blueprint has get_remove_mupdate_override set for sled)"; + "bp_remove_mupdate_override_id" => %override_id, + ); + } + Self::MaybeEligible { mupdate_override_id, zones } => { + let zone_counts = NoopConvertZoneCounts::new(zones); + + if let Some(override_id) = mupdate_override_id { + info!( + log, + "performed noop image source checks on sled \ + (maybe_eligible_zones will become eligible if \ + mupdate_override_id is cleared in a planning step)"; + "mupdate_override_id" => %override_id, + "num_total" => zone_counts.num_total, + "num_already_artifact" => zone_counts.num_already_artifact, + "num_maybe_eligible" => zone_counts.num_maybe_eligible, + "num_ineligible" => zone_counts.num_ineligible, + ); + } else { + info!( + log, + "performed noop image source checks on sled"; + "num_total" => zone_counts.num_total, + "num_already_artifact" => zone_counts.num_already_artifact, + // Since mupdate_override_id is None, eligible zones are + // truly eligible. + "num_eligible" => zone_counts.num_maybe_eligible, + "num_ineligible" => zone_counts.num_ineligible, + ); + } + + for zone in zones { + zone.log_to(log); + } + } + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct NoopConvertZoneCounts { + pub(crate) num_total: usize, + pub(crate) num_already_artifact: usize, + pub(crate) num_maybe_eligible: usize, + pub(crate) num_ineligible: usize, +} + +impl NoopConvertZoneCounts { + pub(crate) fn new(zones: &IdOrdMap) -> Self { + let mut num_already_artifact = 0; + let mut num_maybe_eligible = 0; + let mut num_ineligible = 0; + + for zone in zones { + match &zone.status { + NoopConvertZoneStatus::AlreadyArtifact { .. } => { + num_already_artifact += 1; + } + NoopConvertZoneStatus::Eligible(_) => { + num_maybe_eligible += 1; + } + NoopConvertZoneStatus::Ineligible(_) => { + num_ineligible += 1; + } + } + } + + Self { + num_total: zones.len(), + num_already_artifact, + num_maybe_eligible, + num_ineligible, + } + } +} + +#[derive(Clone, Debug)] +pub(crate) enum NoopConvertSledIneligibleReason { + /// This sled is missing from inventory. + NotInInventory, + + /// An error occurred retrieving the sled's install dataset zone manifest. + ManifestError { message: String }, + + /// The blueprint for this sled has remove_mupdate_override set. + BpMupdateOverride { override_id: MupdateOverrideUuid }, +} + +#[derive(Clone, Debug)] +pub(crate) struct NoopConvertZoneInfo { + pub(crate) zone_id: OmicronZoneUuid, + pub(crate) kind: ZoneKind, + pub(crate) status: NoopConvertZoneStatus, +} + +impl NoopConvertZoneInfo { + fn log_to(&self, log: &slog::Logger) { + let log = log.new(o!( + "zone_id" => self.zone_id.to_string(), + "kind" => self.kind.report_str(), + "file_name" => self.kind.artifact_in_install_dataset(), + )); + match &self.status { + NoopConvertZoneStatus::AlreadyArtifact { version, hash } => { + // Use debug to avoid spamming reconfigurator-cli output for + // this generally expected case. + debug!( + log, + "zone has its image source set to Artifact already"; + "version" => %version, + "hash" => %hash, + ); + } + NoopConvertZoneStatus::Eligible(new_image_source) => { + info!( + log, + "zone is eligible for noop image source conversion"; + "new_image_source" => %new_image_source, + ); + } + NoopConvertZoneStatus::Ineligible( + NoopConvertZoneIneligibleReason::NotInManifest, + ) => { + // This case shouldn't generally happen, but it can currently + // happen with rkadm, as well as in the reconfigurator-cli. Mark + // it as debug to avoid spamming reconfigurator-cli output. + debug!( + log, + "blueprint zone not found in zone manifest, \ + ignoring for noop checks (how is the zone set to \ + InstallDataset in the blueprint then?)", + ); + } + NoopConvertZoneStatus::Ineligible( + NoopConvertZoneIneligibleReason::ArtifactError { message }, + ) => { + warn!( + log, + "zone manifest inventory indicated install dataset \ + artifact is invalid, not using artifact (this is \ + abnormal)"; + "message" => %message, + ); + } + NoopConvertZoneStatus::Ineligible( + NoopConvertZoneIneligibleReason::NotInTufRepo { expected_hash }, + ) => { + // Sleds should all be MUPdated to the same version, so the TUF + // repo is expected to contain all the hashes. The only time + // that isn't the case is right after a MUPdate when the TUF + // repo hasn't been uploaded yet. This isn't quite a warning or + // error case, so log this at INFO level. + info!( + log, + "install dataset artifact hash not found in TUF repo, \ + ignoring for noop checks"; + "expected_hash" => %expected_hash, + ); + } + } + } +} + +impl IdOrdItem for NoopConvertZoneInfo { + type Key<'a> = OmicronZoneUuid; + + fn key(&self) -> Self::Key<'_> { + self.zone_id + } + + id_upcast!(); +} + +#[derive(Clone, Debug)] +pub(crate) enum NoopConvertZoneStatus { + AlreadyArtifact { version: BlueprintZoneImageVersion, hash: ArtifactHash }, + Ineligible(NoopConvertZoneIneligibleReason), + Eligible(BlueprintZoneImageSource), +} + +#[derive(Clone, Debug)] +pub(crate) enum NoopConvertZoneIneligibleReason { + NotInManifest, + ArtifactError { message: String }, + NotInTufRepo { expected_hash: ArtifactHash }, +} From 05791695b65f19dd5863466585b0456e29f8f583 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 15 Jul 2025 05:42:28 +0000 Subject: [PATCH 2/2] updates Created using spr 1.3.6-beta.1 --- .../output/cmds-add-sled-no-disks-stdout | 2 +- .../tests/output/cmds-example-stdout | 4 +- ...ds-expunge-newly-added-external-dns-stdout | 2 +- ...ds-expunge-newly-added-internal-dns-stdout | 2 +- .../output/cmds-noop-image-source-stdout | 23 +- nexus/reconfigurator/planning/src/planner.rs | 53 ++-- .../planning/src/planner/image_source.rs | 269 +++++++++--------- 7 files changed, 170 insertions(+), 185 deletions(-) diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout index 065cb018b31..f5619b87f09 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout @@ -36,7 +36,7 @@ generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configu > # Try to plan a new blueprint; this should be okay even though the sled > # we added has no disks. > blueprint-plan dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 -INFO skipping noop image source check for all sleds (no current TUF repo) +INFO skipping noop image source check for all sleds, reason: no target release is currently set INFO skipping sled (no zpools in service), sled_id: 00320471-945d-413c-85e7-03e091a70b3c INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index da7b3fe5acd..75c196741d4 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -493,7 +493,7 @@ T ENA ID PARENT * yes ade5749d-bdf3-4fab-a8ae-00bea01b3a5a 02697f74-b14a-4418-90f0-c28b2a3a6aa9 > blueprint-plan ade5749d-bdf3-4fab-a8ae-00bea01b3a5a -INFO skipping noop image source check for all sleds (no current TUF repo) +INFO skipping noop image source check for all sleds, reason: no target release is currently set INFO found sled missing NTP zone (will add one), sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 WARN failed to place all new desired Clickhouse zones, placed: 0, wanted_to_place: 1 @@ -937,7 +937,7 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9 > # Plan a blueprint run -- this will cause zones and disks on the expunged > # sled to be expunged. > blueprint-plan latest -INFO skipping noop image source check for all sleds (no current TUF repo) +INFO skipping noop image source check for all sleds, reason: no target release is currently set INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout index f683fff823f..a0ec34ae32b 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout @@ -969,7 +969,7 @@ parent: 3f00b694-1b16-4aaa-8f78-e6b3a527b434 > # blueprint-plan will place a new external DNS zone, diff DNS to see the new zone has `ns` and NS records. > blueprint-plan 366b0b68-d80e-4bc1-abd3-dc69837847e0 -INFO skipping noop image source check for all sleds (no current TUF repo) +INFO skipping noop image source check for all sleds, reason: no target release is currently set INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout index f96d88791bb..db4d2cf4d90 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout @@ -1002,7 +1002,7 @@ external DNS: > # Planning a new blueprint will now replace the expunged zone, with new records for its replacement. > blueprint-plan 58d5e830-0884-47d8-a7cd-b2b3751adeb4 -INFO skipping noop image source check for all sleds (no current TUF repo) +INFO skipping noop image source check for all sleds, reason: no target release is currently set INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout index a941d10012e..0f5f121bc3d 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout @@ -156,26 +156,15 @@ generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configu > blueprint-plan latest latest WARN skipping zones eligible for cleanup check (sled not present in latest inventory collection), sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e -WARN skipped noop image source check since sled-agent encountered error retrieving zone manifest (this is abnormal), sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, error: reconfigurator-sim simulated error: simulated error obtaining zone manifest +WARN skipped noop image source check on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, reason: error retrieving zone manifest: reconfigurator-sim simulated error: simulated error obtaining zone manifest INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 6, num_already_artifact: 0, num_eligible: 6, num_ineligible: 0 -INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 0c71b3b2-6ceb-4e8f-b020-b08675e83038, kind: nexus, file_name: nexus.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 427ec88f-f467-42fa-9bbb-66a91a36103c, kind: internal_dns, file_name: internal_dns.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 5199c033-4cf9-4ab6-8ae7-566bd7606363, kind: crucible, file_name: crucible.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 6444f8a5-6465-4f0b-a549-1993c113569c, kind: internal_ntp, file_name: ntp.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: 803bfb63-c246-41db-b0da-d3b87ddfc63d, kind: external_dns, file_name: external_dns.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, zone_id: ba4994a8-23f9-4b1a-a84f-a08d74591389, kind: crucible_pantry, file_name: crucible_pantry.tar.gz, new_image_source: artifact: version 1.0.0 INFO performed noop image source checks on sled, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, num_total: 6, num_already_artifact: 0, num_eligible: 5, num_ineligible: 1 -INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 33862f97-2897-4d53-a9a6-78a80f7eb13f, kind: nexus, file_name: nexus.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 43a0588f-5b57-469b-a173-db6cb6105e4c, kind: external_dns, file_name: external_dns.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 8e3dd7a4-75a3-4917-a6f4-0991bbdef7ea, kind: crucible, file_name: crucible.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: 97753dbd-5a0f-4273-b1be-db6bb2b69381, kind: internal_dns, file_name: internal_dns.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: d07a1fed-4235-4821-a1e5-f7eb2646ff33, kind: crucible_pantry, file_name: crucible_pantry.tar.gz, new_image_source: artifact: version 1.0.0 WARN zone manifest inventory indicated install dataset artifact is invalid, not using artifact (this is abnormal), sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: e8fe709c-725f-4bb2-b714-ffcda13a9e54, kind: internal_ntp, file_name: ntp.tar.gz, message: reconfigurator-sim: simulated error validating zone image INFO performed noop image source checks on sled, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, num_total: 2, num_already_artifact: 0, num_eligible: 0, num_ineligible: 2 INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: b910534b-2a53-4335-a3d9-5311d2f3186a, kind: internal_ntp, file_name: ntp.tar.gz, expected_hash: d76e26198daed69cdae04490d7477f8c842e0dbe37d463eac0d0a8d3fb803095 INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: ecbe0b3d-1acc-44b2-b6d4-f4d2770516e4, kind: crucible, file_name: crucible.tar.gz, expected_hash: 866f6a7c2e51c056fb722b5113e80181cc9cd8b712a0d3dbf1edc4ce29e5229e -INFO skipped noop image source check on sled (blueprint has get_remove_mupdate_override set for sled), sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, bp_remove_mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff -INFO skipped noop image source check (sled not present in latest inventory collection), sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e +INFO performed noop image source checks on sled, but no conversions will occur because remove_mupdate_override is set in the blueprint, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff, num_total: 2, num_already_artifact: 0, num_would_be_eligible: 2, num_ineligible: 0 +INFO skipped noop image source check on sled, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, reason: sled not found in inventory INFO noop converting 6/6 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 6, num_already_artifact: 0 INFO noop converting 5/6 install-dataset zones to artifact store, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, num_total: 6, num_already_artifact: 0 INFO parent blueprint contains NTP zone, but it's not in inventory yet, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e @@ -515,17 +504,15 @@ set sled e96e226f-4ed9-4c01-91b9-69a9cd076c9e inventory visibility: hidden -> vi generated inventory collection 61f451b3-2121-4ed6-91c7-a550054f6c21 from configured sleds > blueprint-plan latest latest -WARN skipped noop image source check since sled-agent encountered error retrieving zone manifest (this is abnormal), sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, error: reconfigurator-sim simulated error: simulated error obtaining zone manifest +WARN skipped noop image source check on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, reason: error retrieving zone manifest: reconfigurator-sim simulated error: simulated error obtaining zone manifest INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 6, num_already_artifact: 6, num_eligible: 0, num_ineligible: 0 INFO performed noop image source checks on sled, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, num_total: 6, num_already_artifact: 5, num_eligible: 0, num_ineligible: 1 WARN zone manifest inventory indicated install dataset artifact is invalid, not using artifact (this is abnormal), sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, zone_id: e8fe709c-725f-4bb2-b714-ffcda13a9e54, kind: internal_ntp, file_name: ntp.tar.gz, message: reconfigurator-sim: simulated error validating zone image INFO performed noop image source checks on sled, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, num_total: 2, num_already_artifact: 0, num_eligible: 0, num_ineligible: 2 INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: b910534b-2a53-4335-a3d9-5311d2f3186a, kind: internal_ntp, file_name: ntp.tar.gz, expected_hash: d76e26198daed69cdae04490d7477f8c842e0dbe37d463eac0d0a8d3fb803095 INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: ecbe0b3d-1acc-44b2-b6d4-f4d2770516e4, kind: crucible, file_name: crucible.tar.gz, expected_hash: 866f6a7c2e51c056fb722b5113e80181cc9cd8b712a0d3dbf1edc4ce29e5229e -INFO skipped noop image source check on sled (blueprint has get_remove_mupdate_override set for sled), sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, bp_remove_mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff +INFO performed noop image source checks on sled, but no conversions will occur because remove_mupdate_override is set in the blueprint, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, mupdate_override_id: ffffffff-ffff-ffff-ffff-ffffffffffff, num_total: 2, num_already_artifact: 0, num_would_be_eligible: 2, num_ineligible: 0 INFO performed noop image source checks on sled, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, num_total: 2, num_already_artifact: 0, num_eligible: 2, num_ineligible: 0 -INFO zone is eligible for noop image source conversion, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, zone_id: c1467d0e-b3de-4bd8-b36a-d8b36626badc, kind: crucible, file_name: crucible.tar.gz, new_image_source: artifact: version 1.0.0 -INFO zone is eligible for noop image source conversion, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, zone_id: c800ba17-240e-4b72-8ae6-afc30b6baa96, kind: internal_ntp, file_name: ntp.tar.gz, new_image_source: artifact: version 1.0.0 INFO noop converting 2/2 install-dataset zones to artifact store, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, num_total: 2, num_already_artifact: 0 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 804ee7b7a03..43caf9b644c 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -16,7 +16,6 @@ use crate::blueprint_editor::SledEditError; use crate::mgs_updates::plan_mgs_updates; use crate::planner::image_source::NoopConvertInfo; use crate::planner::image_source::NoopConvertSledStatus; -use crate::planner::image_source::NoopConvertZoneCounts; use crate::planner::image_source::NoopConvertZoneStatus; use crate::planner::omicron_zone_placement::PlacementError; use gateway_client::types::SpType; @@ -521,34 +520,22 @@ impl<'a> Planner<'a> { NoopConvertInfo::GlobalIneligible { .. } => return Ok(()), }; for sled in sleds { - let zones = match &sled.status { + let eligible = match &sled.status { NoopConvertSledStatus::Ineligible(_) => continue, - NoopConvertSledStatus::MaybeEligible { - mupdate_override_id, - zones, - } => { + NoopConvertSledStatus::MaybeEligible(maybe_eligible) => { // If the mupdate override ID is set, we can't do noop - // conversions. Log this fact. - if let Some(override_id) = mupdate_override_id { - info!( - self.log, - "sled is ineligible for noop conversions due to \ - mupdate override"; - "sled_id" => %sled.sled_id, - "mupdate_override_id" => %override_id, - ); + // conversions. This information is already logged so we + // don't need to log it again. + if maybe_eligible.mupdate_override_id.is_some() { continue; } - zones + maybe_eligible } }; - let zone_counts = NoopConvertZoneCounts::new(zones); - let num_install_dataset = - zone_counts.num_maybe_eligible + zone_counts.num_ineligible; - - if num_install_dataset == 0 { + let zone_counts = eligible.zone_counts(); + if zone_counts.num_install_dataset() == 0 { debug!( self.log, "all zones are already Artifact, so \ @@ -561,17 +548,17 @@ impl<'a> Planner<'a> { info!( self.log, "noop converting {}/{} install-dataset zones to artifact store", - // If we're here, then the count is of actually eligible zones, - // not just maybe-eligible ones. + // If we're here, then num_maybe_eligible represents + // actually eligible zones. zone_counts.num_maybe_eligible, - num_install_dataset; + zone_counts.num_install_dataset(); "sled_id" => %sled.sled_id, "num_total" => zone_counts.num_total, "num_already_artifact" => zone_counts.num_already_artifact, ); } - for zone in zones { + for zone in &eligible.zones { match &zone.status { NoopConvertZoneStatus::Eligible(new_image_source) => { self.blueprint.sled_set_zone_source( @@ -585,12 +572,16 @@ impl<'a> Planner<'a> { } } - self.blueprint.record_operation( - Operation::SledNoopZoneImageSourcesUpdated { - sled_id: sled.sled_id, - count: zone_counts.num_maybe_eligible, - }, - ); + // Again, if we're here, then num_maybe_eligible represents actually + // eligible zones. + if zone_counts.num_maybe_eligible > 0 { + self.blueprint.record_operation( + Operation::SledNoopZoneImageSourcesUpdated { + sled_id: sled.sled_id, + count: zone_counts.num_maybe_eligible, + }, + ); + } } Ok(()) diff --git a/nexus/reconfigurator/planning/src/planner/image_source.rs b/nexus/reconfigurator/planning/src/planner/image_source.rs index dee7924cdac..d2b71ae09ac 100644 --- a/nexus/reconfigurator/planning/src/planner/image_source.rs +++ b/nexus/reconfigurator/planning/src/planner/image_source.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::collections::HashMap; +use std::{collections::HashMap, fmt}; use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; use nexus_sled_agent_shared::inventory::ZoneKind; @@ -25,7 +25,7 @@ use crate::blueprint_builder::{BlueprintBuilder, Error}; #[derive(Clone, Debug)] pub(crate) enum NoopConvertInfo { /// There's a global reason due to which no-op conversions cannot occur. - GlobalIneligible { reason: NoopConvertGlobalIneligibleReason }, + GlobalIneligible(NoopConvertGlobalIneligibleReason), /// Global checks have passed. GlobalEligible { sleds: IdOrdMap }, @@ -40,9 +40,9 @@ impl NoopConvertInfo { let TargetReleaseDescription::TufRepo(current_artifacts) = input.tuf_repo().description() else { - return Ok(Self::GlobalIneligible { - reason: NoopConvertGlobalIneligibleReason::NoTufRepo, - }); + return Ok(Self::GlobalIneligible( + NoopConvertGlobalIneligibleReason::NoTargetRelease, + )); }; let mut sleds = IdOrdMap::new(); @@ -87,26 +87,6 @@ impl NoopConvertInfo { } }; - // Does the blueprint have the remove_mupdate_override field set for - // this sled? If it does, we don't want to touch the zones on this - // sled (they should all be InstallDataset until the - // remove_mupdate_override field is cleared). - if let Some(override_id) = - blueprint.sled_get_remove_mupdate_override(sled_id)? - { - sleds - .insert_unique(NoopConvertSledInfo { - sled_id, - status: NoopConvertSledStatus::Ineligible( - NoopConvertSledIneligibleReason::BpMupdateOverride { - override_id, - }, - ), - }) - .expect("sled IDs are unique"); - continue; - } - // Out of these, which zones' hashes (as reported in the zone // manifest) match the corresponding ones in the TUF repo? let zones = blueprint @@ -137,10 +117,6 @@ impl NoopConvertInfo { let Some(artifact) = zone_manifest.artifacts.get(file_name) else { - // The blueprint indicates that a zone should be present - // that isn't in the install dataset. This might be an old - // install dataset with a zone kind known to this version of - // Nexus that isn't present in it. return NoopConvertZoneInfo { zone_id: z.id, kind: z.kind(), @@ -192,11 +168,13 @@ impl NoopConvertInfo { sleds .insert_unique(NoopConvertSledInfo { sled_id, - status: NoopConvertSledStatus::MaybeEligible { - mupdate_override_id: blueprint - .sled_get_remove_mupdate_override(sled_id)?, - zones, - }, + status: NoopConvertSledStatus::MaybeEligible( + NoopConvertSledMaybeEligible { + mupdate_override_id: blueprint + .sled_get_remove_mupdate_override(sled_id)?, + zones, + }, + ), }) .expect("sled IDs are unique"); } @@ -206,13 +184,11 @@ impl NoopConvertInfo { pub(crate) fn log_to(&self, log: &slog::Logger) { match self { - Self::GlobalIneligible { - reason: NoopConvertGlobalIneligibleReason::NoTufRepo, - } => { + Self::GlobalIneligible(reason) => { info!( log, - "skipping noop image source check for all sleds \ - (no current TUF repo)", + "skipping noop image source check for all sleds"; + "reason" => %reason, ); } Self::GlobalEligible { sleds } => { @@ -228,8 +204,18 @@ impl NoopConvertInfo { #[derive(Clone, Debug)] pub(crate) enum NoopConvertGlobalIneligibleReason { - /// No TUF repository is available. - NoTufRepo, + /// No target release was set. + NoTargetRelease, +} + +impl fmt::Display for NoopConvertGlobalIneligibleReason { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NoTargetRelease => { + write!(f, "no target release is currently set") + } + } + } } #[derive(Clone, Debug)] @@ -255,100 +241,61 @@ pub(crate) enum NoopConvertSledStatus { /// The sled might be eligible for conversion in case `mupdate_override_id` /// is `None`. - MaybeEligible { - mupdate_override_id: Option, - zones: IdOrdMap, - }, + MaybeEligible(NoopConvertSledMaybeEligible), } impl NoopConvertSledStatus { fn log_to(&self, log: &slog::Logger) { match self { - Self::Ineligible( - NoopConvertSledIneligibleReason::NotInInventory, - ) => { - info!( - log, - "skipped noop image source check \ - (sled not present in latest inventory collection)" - ); - } - Self::Ineligible( - NoopConvertSledIneligibleReason::ManifestError { message }, - ) => { - // This is a string so we don't use InlineErrorChain::new. - let message: &str = message; - warn!( - log, - "skipped noop image source check since \ - sled-agent encountered error retrieving zone manifest \ - (this is abnormal)"; - "error" => %message, - ); - } - Self::Ineligible( - NoopConvertSledIneligibleReason::BpMupdateOverride { - override_id, - }, - ) => { - info!( - log, - "skipped noop image source check on sled \ - (blueprint has get_remove_mupdate_override set for sled)"; - "bp_remove_mupdate_override_id" => %override_id, - ); - } - Self::MaybeEligible { mupdate_override_id, zones } => { - let zone_counts = NoopConvertZoneCounts::new(zones); - - if let Some(override_id) = mupdate_override_id { - info!( - log, - "performed noop image source checks on sled \ - (maybe_eligible_zones will become eligible if \ - mupdate_override_id is cleared in a planning step)"; - "mupdate_override_id" => %override_id, - "num_total" => zone_counts.num_total, - "num_already_artifact" => zone_counts.num_already_artifact, - "num_maybe_eligible" => zone_counts.num_maybe_eligible, - "num_ineligible" => zone_counts.num_ineligible, - ); - } else { - info!( - log, - "performed noop image source checks on sled"; - "num_total" => zone_counts.num_total, - "num_already_artifact" => zone_counts.num_already_artifact, - // Since mupdate_override_id is None, eligible zones are - // truly eligible. - "num_eligible" => zone_counts.num_maybe_eligible, - "num_ineligible" => zone_counts.num_ineligible, - ); - } - - for zone in zones { - zone.log_to(log); + Self::Ineligible(reason) => { + // The slog macros require that the log level is determined at + // compile time, but we want the different enum variants here to + // be logged at different levels. Hence this mess. + match reason { + NoopConvertSledIneligibleReason::NotInInventory => { + info!( + log, + "skipped noop image source check on sled"; + "reason" => %reason, + ) + } + NoopConvertSledIneligibleReason::ManifestError { + .. + } => { + warn!( + log, + "skipped noop image source check on sled"; + "reason" => %reason, + ) + } } } + Self::MaybeEligible(sled) => { + sled.log_to(log); + } } } } #[derive(Clone, Debug)] -pub(crate) struct NoopConvertZoneCounts { - pub(crate) num_total: usize, - pub(crate) num_already_artifact: usize, - pub(crate) num_maybe_eligible: usize, - pub(crate) num_ineligible: usize, +pub(crate) struct NoopConvertSledMaybeEligible { + // A sled is eligible for conversion if the mupdate_override_id is None + // + // The code is structured in this manner because the planner's mupdate + // override step requires access to the sled's zones, and is responsible for + // clearing this field if it also clears the mupdate override in the + // blueprint. + pub(crate) mupdate_override_id: Option, + pub(crate) zones: IdOrdMap, } -impl NoopConvertZoneCounts { - pub(crate) fn new(zones: &IdOrdMap) -> Self { +impl NoopConvertSledMaybeEligible { + pub(crate) fn zone_counts(&self) -> NoopConvertZoneCounts { let mut num_already_artifact = 0; let mut num_maybe_eligible = 0; let mut num_ineligible = 0; - for zone in zones { + for zone in &self.zones { match &zone.status { NoopConvertZoneStatus::AlreadyArtifact { .. } => { num_already_artifact += 1; @@ -362,13 +309,62 @@ impl NoopConvertZoneCounts { } } - Self { - num_total: zones.len(), + NoopConvertZoneCounts { + num_total: self.zones.len(), num_already_artifact, num_maybe_eligible, num_ineligible, } } + + fn log_to(&self, log: &slog::Logger) { + let zone_counts = self.zone_counts(); + + if let Some(override_id) = self.mupdate_override_id { + info!( + log, + "performed noop image source checks on sled, \ + but no conversions will occur because \ + remove_mupdate_override is set in the blueprint"; + "mupdate_override_id" => %override_id, + "num_total" => zone_counts.num_total, + "num_already_artifact" => zone_counts.num_already_artifact, + // This counts the number of zones that would be eligible if + // remove_mupdate_override were not set. + "num_would_be_eligible" => zone_counts.num_maybe_eligible, + "num_ineligible" => zone_counts.num_ineligible, + ); + } else { + info!( + log, + "performed noop image source checks on sled"; + "num_total" => zone_counts.num_total, + "num_already_artifact" => zone_counts.num_already_artifact, + // Since mupdate_override_id is None, maybe-eligible zones are + // truly eligible. + "num_eligible" => zone_counts.num_maybe_eligible, + "num_ineligible" => zone_counts.num_ineligible, + ); + } + + for zone in &self.zones { + zone.log_to(log); + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct NoopConvertZoneCounts { + pub(crate) num_total: usize, + pub(crate) num_already_artifact: usize, + pub(crate) num_maybe_eligible: usize, + pub(crate) num_ineligible: usize, +} + +impl NoopConvertZoneCounts { + pub(crate) fn num_install_dataset(&self) -> usize { + self.num_maybe_eligible + self.num_ineligible + } } #[derive(Clone, Debug)] @@ -378,9 +374,17 @@ pub(crate) enum NoopConvertSledIneligibleReason { /// An error occurred retrieving the sled's install dataset zone manifest. ManifestError { message: String }, +} - /// The blueprint for this sled has remove_mupdate_override set. - BpMupdateOverride { override_id: MupdateOverrideUuid }, +impl fmt::Display for NoopConvertSledIneligibleReason { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NotInInventory => write!(f, "sled not found in inventory"), + Self::ManifestError { message } => { + write!(f, "error retrieving zone manifest: {}", message) + } + } + } } #[derive(Clone, Debug)] @@ -409,18 +413,20 @@ impl NoopConvertZoneInfo { ); } NoopConvertZoneStatus::Eligible(new_image_source) => { - info!( + debug!( log, - "zone is eligible for noop image source conversion"; + "zone may be eligible for noop image source conversion"; "new_image_source" => %new_image_source, ); } NoopConvertZoneStatus::Ineligible( NoopConvertZoneIneligibleReason::NotInManifest, ) => { - // This case shouldn't generally happen, but it can currently - // happen with rkadm, as well as in the reconfigurator-cli. Mark - // it as debug to avoid spamming reconfigurator-cli output. + // This case shouldn't generally happen in production, but it + // can currently occur in the reconfigurator-cli since our + // simulated systems don't have a zone manifest without them + // being initialized. Log this at the DEBUG level to avoid + // spamming reconfigurator-cli output. debug!( log, "blueprint zone not found in zone manifest, \ @@ -442,11 +448,12 @@ impl NoopConvertZoneInfo { NoopConvertZoneStatus::Ineligible( NoopConvertZoneIneligibleReason::NotInTufRepo { expected_hash }, ) => { - // Sleds should all be MUPdated to the same version, so the TUF - // repo is expected to contain all the hashes. The only time - // that isn't the case is right after a MUPdate when the TUF - // repo hasn't been uploaded yet. This isn't quite a warning or - // error case, so log this at INFO level. + // If a MUPdate happens, sleds should all be MUPdated to the + // same version, so the TUF repo is expected to contain all the + // hashes. The only time that isn't the case is right after a + // MUPdate when the TUF repo hasn't been uploaded yet. This + // isn't quite a warning or error case, so log this at the INFO + // level. info!( log, "install dataset artifact hash not found in TUF repo, \