From dc659ce3c11af4866d02cc2af7d6eb4065356275 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 9 Jul 2025 11:57:50 -0700 Subject: [PATCH 1/2] Support Bundles: Multi-part upload --- .../tasks/support_bundle_collector.rs | 76 ++- openapi/sled-agent.json | 162 +++++- sled-agent/api/src/lib.rs | 50 +- .../config-reconciler/src/dump_setup.rs | 12 +- sled-agent/src/http_entrypoints.rs | 48 +- sled-agent/src/sim/http_entrypoints.rs | 53 +- sled-agent/src/sim/sled_agent.rs | 39 +- sled-agent/src/support_bundle/storage.rs | 483 +++++++++++------- sled-agent/types/src/support_bundle.rs | 2 +- 9 files changed, 656 insertions(+), 269 deletions(-) diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs index 40138cd2ad9..78fc121cf96 100644 --- a/nexus/src/app/background/tasks/support_bundle_collector.rs +++ b/nexus/src/app/background/tasks/support_bundle_collector.rs @@ -494,8 +494,12 @@ impl BundleCollection { // Create the zipfile as a temporary file let mut zipfile = tokio::fs::File::from_std(bundle_to_zipfile(&dir)?); + let total_len = zipfile.metadata().await?.len(); - // Verify the hash locally before we send it over the network + // Collect the hash locally before we send it over the network + // + // We'll use this later during finalization to confirm the bundle + // has been stored successfully. zipfile.seek(SeekFrom::Start(0)).await?; let hash = sha2_hash(&mut zipfile).await?; @@ -515,23 +519,65 @@ impl BundleCollection { ) .await?; - // Stream the zipfile to the sled where it should be kept - zipfile.seek(SeekFrom::Start(0)).await?; - let file_access = hyper_staticfile::vfs::TokioFileAccess::new(zipfile); - let file_stream = - hyper_staticfile::util::FileBytesStream::new(file_access); - let body = - reqwest::Body::wrap(hyper_staticfile::Body::Full(file_stream)); + let zpool = ZpoolUuid::from(self.bundle.zpool_id); + let dataset = DatasetUuid::from(self.bundle.dataset_id); + let support_bundle = SupportBundleUuid::from(self.bundle.id); + + // Tell this sled to create the bundle. + let creation_result = sled_client + .support_bundle_start_creation(&zpool, &dataset, &support_bundle) + .await + .with_context(|| "Support bundle failed to start creation")?; + + if matches!( + creation_result.state, + sled_agent_client::types::SupportBundleState::Complete + ) { + // Early exit case: the bundle was already created -- we must have either + // crashed or failed between "finalizing" and "writing to the database that we + // finished". + info!(&self.log, "Support bundle was already collected"; "bundle" => %self.bundle.id); + return Ok(report); + } + info!(&self.log, "Support bundle creation started"; "bundle" => %self.bundle.id); + + const CHUNK_SIZE: u64 = 1024 * 1024 * 1024; + + let mut offset = 0; + while offset < total_len { + // Stream the zipfile to the sled where it should be kept + let mut file = zipfile + .try_clone() + .await + .with_context(|| "Failed to clone zipfile")?; + file.seek(SeekFrom::Start(offset)).await.with_context(|| { + format!("Failed to seek to offset {offset} / {total_len} within zipfile") + })?; + + // Only stream at most CHUNK_SIZE bytes at once + let remaining = std::cmp::min(CHUNK_SIZE, total_len - offset); + let limited_file = file.take(remaining); + let stream = tokio_util::io::ReaderStream::new(limited_file); + let body = reqwest::Body::wrap_stream(stream); + + sled_client.support_bundle_transfer( + &zpool, &dataset, &support_bundle, offset, body + ).await.with_context(|| { + format!("Failed to transfer bundle: {remaining}@{offset} of {total_len} to sled") + })?; + + offset += CHUNK_SIZE; + } sled_client - .support_bundle_create( - &ZpoolUuid::from(self.bundle.zpool_id), - &DatasetUuid::from(self.bundle.dataset_id), - &SupportBundleUuid::from(self.bundle.id), + .support_bundle_finalize( + &zpool, + &dataset, + &support_bundle, &hash.to_string(), - body, ) - .await?; + .await + .with_context(|| "Failed to finalize bundle")?; // Returning from this method should drop all temporary storage // allocated locally for this support bundle. @@ -795,7 +841,7 @@ impl BackgroundTask for SupportBundleCollector { Ok(report) => collection_report = Some(report), Err(err) => { collection_err = - Some(json!({ "collect_error": err.to_string() })) + Some(json!({ "collect_error": InlineErrorChain::new(err.as_ref()).to_string() })) } }; diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index f5b2ae0cdb0..cb566ac7928 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -875,8 +875,9 @@ }, "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}": { "post": { - "summary": "Create a support bundle within a particular dataset", - "operationId": "support_bundle_create", + "summary": "Starts creation of a support bundle within a particular dataset", + "description": "Callers should transfer chunks of the bundle with \"support_bundle_transfer\", and then call \"support_bundle_finalize\" once the bundle has finished transferring.\n\nIf a support bundle was previously created without being finalized successfully, this endpoint will reset the state.\n\nIf a support bundle was previously created and finalized successfully, this endpoint will return metadata indicating that it already exists.", + "operationId": "support_bundle_start_creation", "parameters": [ { "in": "path", @@ -904,28 +905,8 @@ "schema": { "$ref": "#/components/schemas/TypedUuidForZpoolKind" } - }, - { - "in": "query", - "name": "hash", - "required": true, - "schema": { - "type": "string", - "format": "hex string (32 bytes)" - } } ], - "requestBody": { - "content": { - "application/octet-stream": { - "schema": { - "type": "string", - "format": "binary" - } - } - }, - "required": true - }, "responses": { "201": { "description": "successful creation", @@ -1216,6 +1197,69 @@ } } }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/finalize": { + "post": { + "summary": "Finalizes the creation of a support bundle", + "description": "If the requested hash matched the bundle, the bundle is created. Otherwise, an error is returned.", + "operationId": "support_bundle_finalize", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + }, + { + "in": "query", + "name": "hash", + "required": true, + "schema": { + "type": "string", + "format": "hex string (32 bytes)" + } + } + ], + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleMetadata" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index": { "get": { "summary": "Fetch the index (list of files within a support bundle)", @@ -1320,6 +1364,80 @@ } } }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/transfer": { + "put": { + "summary": "Transfers a chunk of a support bundle within a particular dataset", + "operationId": "support_bundle_transfer", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + }, + { + "in": "query", + "name": "offset", + "required": true, + "schema": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + } + ], + "requestBody": { + "content": { + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleMetadata" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/switch-ports": { "post": { "operationId": "uplink_ensure", diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index ba0f0eacb2d..1dffb0da107 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -166,19 +166,53 @@ pub trait SledAgentApi { path_params: Path, ) -> Result>, HttpError>; - /// Create a support bundle within a particular dataset + /// Starts creation of a support bundle within a particular dataset + /// + /// Callers should transfer chunks of the bundle with + /// "support_bundle_transfer", and then call "support_bundle_finalize" + /// once the bundle has finished transferring. + /// + /// If a support bundle was previously created without being finalized + /// successfully, this endpoint will reset the state. + /// + /// If a support bundle was previously created and finalized successfully, + /// this endpoint will return metadata indicating that it already exists. #[endpoint { method = POST, - path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}", + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}" + }] + async fn support_bundle_start_creation( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Transfers a chunk of a support bundle within a particular dataset + #[endpoint { + method = PUT, + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/transfer", request_body_max_bytes = SUPPORT_BUNDLE_MAX_BYTES, }] - async fn support_bundle_create( + async fn support_bundle_transfer( rqctx: RequestContext, path_params: Path, - query_params: Query, + query_params: Query, body: StreamingBody, ) -> Result, HttpError>; + /// Finalizes the creation of a support bundle + /// + /// If the requested hash matched the bundle, the bundle is created. + /// Otherwise, an error is returned. + #[endpoint { + method = POST, + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/finalize" + }] + async fn support_bundle_finalize( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + /// Fetch a support bundle from a particular dataset #[endpoint { method = GET, @@ -760,9 +794,15 @@ pub struct SupportBundleFilePathParam { pub file: String, } +/// Metadata about a support bundle transfer +#[derive(Deserialize, Serialize, JsonSchema)] +pub struct SupportBundleTransferQueryParams { + pub offset: u64, +} + /// Metadata about a support bundle #[derive(Deserialize, Serialize, JsonSchema)] -pub struct SupportBundleCreateQueryParams { +pub struct SupportBundleFinalizeQueryParams { pub hash: ArtifactHash, } diff --git a/sled-agent/config-reconciler/src/dump_setup.rs b/sled-agent/config-reconciler/src/dump_setup.rs index 6e3bd8fc26c..ca0e085c915 100644 --- a/sled-agent/config-reconciler/src/dump_setup.rs +++ b/sled-agent/config-reconciler/src/dump_setup.rs @@ -97,7 +97,7 @@ use illumos_utils::zone::ZONE_PREFIX; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; use omicron_common::disk::DiskVariant; use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; -use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; +use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME; use sled_storage::config::MountConfig; use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; use sled_storage::disk::Disk; @@ -527,11 +527,7 @@ fn safe_to_delete(path: &Utf8Path, meta: &std::fs::Metadata) -> bool { return false; }; // Ignore support bundles - if file_name == BUNDLE_FILE_NAME { - return false; - } - // Ignore support bundle "temp files" as they're being created. - if file_name.ends_with(BUNDLE_TMP_FILE_NAME_SUFFIX) { + if file_name == BUNDLE_FILE_NAME || file_name == BUNDLE_TMP_FILE_NAME { return false; } return true; @@ -2127,9 +2123,7 @@ mod tests { .set_size(100) .make_much_older(); files - .add_file( - "c4640fac-c67c-4480-b736-5d9a7fe336ba/abcd-bundle.zip.tmp", - ) + .add_file("c4640fac-c67c-4480-b736-5d9a7fe336ba/bundle.zip.tmp") .set_size(100) .make_much_older(); diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 1d0c4593991..e196ebbe574 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -219,25 +219,43 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseOk(bundles)) } - async fn support_bundle_create( + async fn support_bundle_start_creation( rqctx: RequestContext, path_params: Path, - query_params: Query, + ) -> Result, HttpError> { + let sa = rqctx.context(); + + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + + let metadata = sa + .as_support_bundle_storage() + .start_creation(zpool_id, dataset_id, support_bundle_id) + .await?; + + Ok(HttpResponseCreated(metadata)) + } + + async fn support_bundle_transfer( + rqctx: RequestContext, + path_params: Path, + query_params: Query, body: StreamingBody, ) -> Result, HttpError> { let sa = rqctx.context(); let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); - let SupportBundleCreateQueryParams { hash } = query_params.into_inner(); + let SupportBundleTransferQueryParams { offset } = + query_params.into_inner(); let metadata = sa .as_support_bundle_storage() - .create( + .transfer( zpool_id, dataset_id, support_bundle_id, - hash, + offset, body.into_stream(), ) .await?; @@ -245,6 +263,26 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseCreated(metadata)) } + async fn support_bundle_finalize( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let sa = rqctx.context(); + + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + let SupportBundleFinalizeQueryParams { hash } = + query_params.into_inner(); + + let metadata = sa + .as_support_bundle_storage() + .finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await?; + + Ok(HttpResponseCreated(metadata)) + } + async fn support_bundle_download( rqctx: RequestContext, headers: Header, diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index f206e374edd..0f198d18ac0 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -379,30 +379,73 @@ impl SledAgentApi for SledAgentSimImpl { Ok(HttpResponseOk(bundles)) } - async fn support_bundle_create( + async fn support_bundle_start_creation( rqctx: RequestContext, path_params: Path, - query_params: Query, + ) -> Result, HttpError> { + let sa = rqctx.context(); + + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + + Ok(HttpResponseCreated( + sa.support_bundle_start_creation( + zpool_id, + dataset_id, + support_bundle_id, + ) + .await?, + )) + } + + async fn support_bundle_transfer( + rqctx: RequestContext, + path_params: Path, + query_params: Query, body: StreamingBody, ) -> Result, HttpError> { let sa = rqctx.context(); let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); - let SupportBundleCreateQueryParams { hash } = query_params.into_inner(); + let SupportBundleTransferQueryParams { offset } = + query_params.into_inner(); Ok(HttpResponseCreated( - sa.support_bundle_create( + sa.support_bundle_transfer( zpool_id, dataset_id, support_bundle_id, - hash, + offset, body.into_stream(), ) .await?, )) } + async fn support_bundle_finalize( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let sa = rqctx.context(); + + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + let SupportBundleFinalizeQueryParams { hash } = + query_params.into_inner(); + + Ok(HttpResponseCreated( + sa.support_bundle_finalize( + zpool_id, + dataset_id, + support_bundle_id, + hash, + ) + .await?, + )) + } + async fn support_bundle_download( rqctx: RequestContext, headers: Header, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index fdfce7eb2da..43010c18014 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -825,23 +825,44 @@ impl SledAgent { .map_err(|err| err.into()) } - pub async fn support_bundle_create( + pub async fn support_bundle_start_creation( &self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, support_bundle_id: SupportBundleUuid, - expected_hash: ArtifactHash, + ) -> Result { + self.storage + .as_support_bundle_storage(&self.log) + .start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .map_err(|err| err.into()) + } + + pub async fn support_bundle_transfer( + &self, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + support_bundle_id: SupportBundleUuid, + offset: u64, stream: impl Stream>, ) -> Result { self.storage .as_support_bundle_storage(&self.log) - .create( - zpool_id, - dataset_id, - support_bundle_id, - expected_hash, - stream, - ) + .transfer(zpool_id, dataset_id, support_bundle_id, offset, stream) + .await + .map_err(|err| err.into()) + } + + pub async fn support_bundle_finalize( + &self, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + support_bundle_id: SupportBundleUuid, + expected_hash: ArtifactHash, + ) -> Result { + self.storage + .as_support_bundle_storage(&self.log) + .finalize(zpool_id, dataset_id, support_bundle_id, expected_hash) .await .map_err(|err| err.into()) } diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 34e6bf37480..67fdd35437d 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -22,11 +22,9 @@ use omicron_common::disk::SharedDatasetConfig; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::SupportBundleUuid; use omicron_uuid_kinds::ZpoolUuid; -use rand::distributions::Alphanumeric; -use rand::{Rng, thread_rng}; use range_requests::PotentialRange; use range_requests::SingleRange; -use sha2::{Digest, Sha256}; +use sha2::Digest; use sled_agent_api::*; use sled_agent_config_reconciler::ConfigReconcilerHandle; use sled_agent_config_reconciler::DatasetTaskError; @@ -36,7 +34,7 @@ use sled_agent_config_reconciler::NestedDatasetEnsureError; use sled_agent_config_reconciler::NestedDatasetListError; use sled_agent_config_reconciler::NestedDatasetMountError; use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; -use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; +use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME; use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; use sled_storage::manager::NestedDatasetLocation; @@ -80,6 +78,12 @@ pub enum Error { )] DatasetExistsOnWrongZpool { wanted: ZpoolUuid, actual: ZpoolUuid }, + #[error("Bundle not found")] + BundleNotFound, + + #[error(transparent)] + TryFromInt(#[from] std::num::TryFromIntError), + #[error(transparent)] Storage(#[from] sled_storage::error::Error), @@ -588,46 +592,29 @@ impl<'a> SupportBundleManager<'a> { } // A helper function which streams the contents of a bundle to a file. - // - // If at any point this function fails, the temporary file still exists, - // and should be removed. - async fn write_and_finalize_bundle( + async fn stream_bundle( mut tmp_file: tokio::fs::File, - from: &Utf8Path, - to: &Utf8Path, - expected_hash: ArtifactHash, stream: impl Stream>, ) -> Result<(), Error> { futures::pin_mut!(stream); // Write the body to the file - let mut hasher = Sha256::new(); while let Some(chunk) = stream.next().await { let chunk = chunk?; - hasher.update(&chunk); tmp_file.write_all(&chunk).await?; } - let digest = hasher.finalize(); - if digest.as_slice() != expected_hash.as_ref() { - return Err(Error::HashMismatch); - } - - // Rename the file to indicate it's ready - tokio::fs::rename(from, to).await?; Ok(()) } - /// Creates a new support bundle on a dataset. - pub async fn create( + /// Start creating a new support bundle on a dataset. + pub async fn start_creation( &self, zpool_id: ZpoolUuid, dataset_id: DatasetUuid, support_bundle_id: SupportBundleUuid, - expected_hash: ArtifactHash, - stream: impl Stream>, ) -> Result { let log = self.log.new(o!( - "operation" => "support_bundle_create", + "operation" => "support_bundle_start_creation", "zpool_id" => zpool_id.to_string(), "dataset_id" => dataset_id.to_string(), "bundle_id" => support_bundle_id.to_string(), @@ -659,27 +646,11 @@ impl<'a> SupportBundleManager<'a> { let support_bundle_dir = self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; let support_bundle_path = support_bundle_dir.join(BUNDLE_FILE_NAME); - let support_bundle_path_tmp = support_bundle_dir.join(format!( - "{}-{BUNDLE_TMP_FILE_NAME_SUFFIX}", - thread_rng() - .sample_iter(Alphanumeric) - .take(6) - .map(char::from) - .collect::() - )); + let support_bundle_path_tmp = + support_bundle_dir.join(BUNDLE_TMP_FILE_NAME); // Exit early if the support bundle already exists if tokio::fs::try_exists(&support_bundle_path).await? { - if !Self::sha2_checksum_matches( - &support_bundle_path, - &expected_hash, - ) - .await? - { - warn!(log, "Support bundle exists, but the hash doesn't match"); - return Err(Error::HashMismatch); - } - info!(log, "Support bundle already exists"); let metadata = SupportBundleMetadata { support_bundle_id, @@ -688,6 +659,55 @@ impl<'a> SupportBundleManager<'a> { return Ok(metadata); } + // Create the temporary file for access by subsequent transfer calls. + // + // Note that this truncates the tempfile if it already existed, for any + // reason (e.g., incomplete transfer). + info!( + log, + "Creating temp storage for support bundle"; + "path" => ?support_bundle_path_tmp, + ); + let _ = tokio::fs::File::create(&support_bundle_path_tmp).await?; + + let metadata = SupportBundleMetadata { + support_bundle_id, + state: SupportBundleState::Incomplete, + }; + Ok(metadata) + } + + /// Transfer a new support bundle to a dataset + pub async fn transfer( + &self, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + support_bundle_id: SupportBundleUuid, + offset: u64, + stream: impl Stream>, + ) -> Result { + let log = self.log.new(o!( + "operation" => "support_bundle_transfer", + "zpool_id" => zpool_id.to_string(), + "dataset_id" => dataset_id.to_string(), + "bundle_id" => support_bundle_id.to_string(), + "offset" => offset, + )); + info!(log, "transferring support bundle"); + + // Access the parent dataset (presumably "crypt/debug") + // where the support bundled will be mounted. + let root = + self.get_mounted_dataset_config(zpool_id, dataset_id).await?.name; + let dataset = + NestedDatasetLocation { path: support_bundle_id.to_string(), root }; + + // The mounted root of the support bundle dataset + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; + let support_bundle_path_tmp = + support_bundle_dir.join(BUNDLE_TMP_FILE_NAME); + // Stream the file into the dataset, first as a temporary file, // and then renaming to the final location. info!( @@ -695,18 +715,22 @@ impl<'a> SupportBundleManager<'a> { "Streaming bundle to storage"; "path" => ?support_bundle_path_tmp, ); - let tmp_file = - tokio::fs::File::create(&support_bundle_path_tmp).await?; - if let Err(err) = Self::write_and_finalize_bundle( - tmp_file, - &support_bundle_path_tmp, - &support_bundle_path, - expected_hash, - stream, - ) - .await - { + // Open the file which should have been created for us during "start + // creation". + let mut tmp_file = tokio::fs::OpenOptions::new() + .read(true) + .write(true) + .create(false) + .truncate(false) + .open(&support_bundle_path_tmp) + .await?; + + tmp_file + .seek(tokio::io::SeekFrom::Current(i64::try_from(offset)?)) + .await?; + + if let Err(err) = Self::stream_bundle(tmp_file, stream).await { warn!(log, "Failed to write bundle to storage"; "error" => ?err); if let Err(unlink_err) = tokio::fs::remove_file(support_bundle_path_tmp).await @@ -724,6 +748,83 @@ impl<'a> SupportBundleManager<'a> { Ok(metadata) } + /// Finishes transferring a new support bundle to a dataset + pub async fn finalize( + &self, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + support_bundle_id: SupportBundleUuid, + expected_hash: ArtifactHash, + ) -> Result { + let log = self.log.new(o!( + "operation" => "support_bundle_finalize", + "zpool_id" => zpool_id.to_string(), + "dataset_id" => dataset_id.to_string(), + "bundle_id" => support_bundle_id.to_string(), + )); + info!(log, "finalizing support bundle"); + + // Access the parent dataset (presumably "crypt/debug") + // where the support bundled will be mounted. + let root = + self.get_mounted_dataset_config(zpool_id, dataset_id).await?.name; + let dataset = + NestedDatasetLocation { path: support_bundle_id.to_string(), root }; + + // The mounted root of the support bundle dataset + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; + let support_bundle_path = support_bundle_dir.join(BUNDLE_FILE_NAME); + let support_bundle_path_tmp = + support_bundle_dir.join(BUNDLE_TMP_FILE_NAME); + + let metadata = SupportBundleMetadata { + support_bundle_id, + state: SupportBundleState::Complete, + }; + + // Deal with idempotency if the bundle has already been finalized. + if tokio::fs::try_exists(&support_bundle_path).await? { + if !Self::sha2_checksum_matches( + &support_bundle_path_tmp, + &expected_hash, + ) + .await? + { + warn!( + log, + "Finalized support bundle exists, but the hash doesn't match" + ); + return Err(Error::HashMismatch); + } + info!(log, "Support bundle already finalized"); + return Ok(metadata); + } + + // Otherwise, finalize the "temporary" -> "permanent" bundle transfer. + // + // (This is the normal case) + if !tokio::fs::try_exists(&support_bundle_path_tmp).await? { + return Err(Error::BundleNotFound); + } + if !Self::sha2_checksum_matches( + &support_bundle_path_tmp, + &expected_hash, + ) + .await? + { + warn!( + log, + "In-progress support bundle exists, but the hash doesn't match" + ); + return Err(Error::HashMismatch); + } + + // Finalize the transfer of the bundle + tokio::fs::rename(support_bundle_path_tmp, support_bundle_path).await?; + return Ok(metadata); + } + /// Destroys a support bundle that exists on a dataset. pub async fn delete( &self, @@ -965,6 +1066,7 @@ mod tests { use omicron_common::disk::DatasetsConfig; use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; + use sha2::Sha256; use sled_storage::manager::StorageHandle; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::collections::BTreeMap; @@ -1155,6 +1257,44 @@ mod tests { data } + async fn start_transfer_and_finalize( + mgr: &SupportBundleManager<'_>, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + support_bundle_id: SupportBundleUuid, + hash: ArtifactHash, + stream: impl Stream>, + ) -> SupportBundleMetadata { + mgr.start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have started creation"); + mgr.transfer(zpool_id, dataset_id, support_bundle_id, 0, stream) + .await + .expect("Should have transferred bundle"); + mgr.finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await + .expect("Should have finalized bundle") + } + + async fn start_transfer_and_finalize_expect_finalize_err( + mgr: &SupportBundleManager<'_>, + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + support_bundle_id: SupportBundleUuid, + hash: ArtifactHash, + stream: impl Stream>, + ) -> Error { + mgr.start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have started creation"); + mgr.transfer(zpool_id, dataset_id, support_bundle_id, 0, stream) + .await + .expect("Should have transferred bundle"); + mgr.finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await + .expect_err("Should have failed to finalize bundle") + } + #[tokio::test] async fn basic_crud() { let logctx = test_setup_log("basic_crud"); @@ -1184,18 +1324,17 @@ mod tests { ); // Create a new bundle - let bundle = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) - .await - .expect("Should have created support bundle"); + let bundle = start_transfer_and_finalize( + &mgr, + harness.zpool_id, + dataset_id, + support_bundle_id, + hash, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await; assert_eq!(bundle.support_bundle_id, support_bundle_id); assert_eq!(bundle.state, SupportBundleState::Complete); @@ -1404,15 +1543,7 @@ mod tests { // Storing a bundle without a dataset should throw an error. let dataset_id = DatasetUuid::new_v4(); let err = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) + .start_creation(harness.zpool_id, dataset_id, support_bundle_id) .await .expect_err("Bundle creation should fail without dataset"); assert!(matches!(err, Error::Storage(_)), "Unexpected error: {err:?}"); @@ -1421,7 +1552,8 @@ mod tests { // Configure the dataset now, so it'll exist for future requests. harness.configure_dataset(dataset_id, DatasetKind::Debug).await; - mgr.create( + start_transfer_and_finalize( + &mgr, harness.zpool_id, dataset_id, support_bundle_id, @@ -1430,8 +1562,7 @@ mod tests { Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) }), ) - .await - .expect("Should have created support bundle"); + .await; harness.cleanup().await; logctx.cleanup_successful(); @@ -1473,18 +1604,17 @@ mod tests { ); // Creating the bundle with a bad hash should fail. - let err = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - bad_hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) - .await - .expect_err("Bundle creation should fail with bad hash"); + let err = start_transfer_and_finalize_expect_finalize_err( + &mgr, + harness.zpool_id, + dataset_id, + support_bundle_id, + bad_hash, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await; assert!( matches!(err, Error::HashMismatch), "Unexpected error: {err:?}" @@ -1499,18 +1629,15 @@ mod tests { assert_eq!(bundles[0].state, SupportBundleState::Incomplete); // Creating the bundle with bad data should fail - let err = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::from_static(b"Not a zipfile")) - }), - ) - .await - .expect_err("Bundle creation should fail with bad hash"); + let err = start_transfer_and_finalize_expect_finalize_err( + &mgr, + harness.zpool_id, + dataset_id, + support_bundle_id, + hash, + stream::once(async { Ok(Bytes::from_static(b"Not a zipfile")) }), + ) + .await; assert!( matches!(err, Error::HashMismatch), "Unexpected error: {err:?}" @@ -1523,7 +1650,8 @@ mod tests { assert_eq!(bundles[0].state, SupportBundleState::Incomplete); // Good hash + Good data -> creation should succeed - mgr.create( + start_transfer_and_finalize( + &mgr, harness.zpool_id, dataset_id, support_bundle_id, @@ -1532,8 +1660,7 @@ mod tests { Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) }), ) - .await - .expect("Should have created support bundle"); + .await; // The bundle should now appear "Complete" let bundles = mgr.list(harness.zpool_id, dataset_id).await.unwrap(); @@ -1582,18 +1709,17 @@ mod tests { ); // Creating the bundle with a bad hash should fail. - let err = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - bad_hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) - .await - .expect_err("Bundle creation should fail with bad hash"); + let err = start_transfer_and_finalize_expect_finalize_err( + &mgr, + harness.zpool_id, + dataset_id, + support_bundle_id, + bad_hash, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await; assert!( matches!(err, Error::HashMismatch), "Unexpected error: {err:?}" @@ -1655,13 +1781,6 @@ mod tests { harness.storage_test_harness.handle(), ); let support_bundle_id = SupportBundleUuid::new_v4(); - let zipfile_data = example_zipfile(); - let hash = ArtifactHash( - Sha256::digest(zipfile_data.as_slice()) - .as_slice() - .try_into() - .unwrap(), - ); // Before we actually create the bundle: // @@ -1680,15 +1799,7 @@ mod tests { // Create a new bundle let err = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) + .start_creation(harness.zpool_id, dataset_id, support_bundle_id) .await .expect_err("Should not have been able to create support bundle"); let Error::DatasetNotMounted { dataset } = err else { @@ -1730,18 +1841,17 @@ mod tests { ); // Create a new bundle - let _ = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) - .await - .expect("Should have created support bundle"); + let _ = start_transfer_and_finalize( + &mgr, + harness.zpool_id, + dataset_id, + support_bundle_id, + hash, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await; // Peek under the hood: We should be able to observe the support // bundle as a nested dataset. @@ -1799,18 +1909,17 @@ mod tests { ); // Create a new bundle - let _ = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) - .await - .expect("Should have created support bundle"); + let _ = start_transfer_and_finalize( + &mgr, + harness.zpool_id, + dataset_id, + support_bundle_id, + hash, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await; // Peek under the hood: We should be able to observe the support // bundle as a nested dataset. @@ -1876,7 +1985,8 @@ mod tests { harness.configure_dataset(dataset_id, DatasetKind::Debug).await; // Create the bundle - mgr.create( + start_transfer_and_finalize( + &mgr, harness.zpool_id, dataset_id, support_bundle_id, @@ -1885,37 +1995,15 @@ mod tests { Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) }), ) - .await - .expect("Should have created support bundle"); + .await; // Creating the dataset again should work. - mgr.create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) - .await - .expect("Support bundle should already exist"); + let bundle = mgr + .start_creation(harness.zpool_id, dataset_id, support_bundle_id) + .await + .expect("Support bundle should already exist"); - // This is an edge-case, but just to make sure the behavior - // is codified: If we are creating a bundle that already exists, - // we'll skip reading the body. - mgr.create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - // NOTE: This is different from the call above. - Ok(Bytes::from_static(b"Ignored")) - }), - ) - .await - .expect("Support bundle should already exist"); + assert_eq!(bundle.state, SupportBundleState::Complete); harness.cleanup().await; logctx.cleanup_successful(); @@ -1950,18 +2038,17 @@ mod tests { ); // Create a new bundle - let bundle = mgr - .create( - harness.zpool_id, - dataset_id, - support_bundle_id, - hash, - stream::once(async { - Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) - }), - ) - .await - .expect("Should have created support bundle"); + let bundle = start_transfer_and_finalize( + &mgr, + harness.zpool_id, + dataset_id, + support_bundle_id, + hash, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await; assert_eq!(bundle.support_bundle_id, support_bundle_id); assert_eq!(bundle.state, SupportBundleState::Complete); diff --git a/sled-agent/types/src/support_bundle.rs b/sled-agent/types/src/support_bundle.rs index 2cf6916834a..ab8c66bfc1d 100644 --- a/sled-agent/types/src/support_bundle.rs +++ b/sled-agent/types/src/support_bundle.rs @@ -17,4 +17,4 @@ // not removed. If the files used here change in the future, DumpSetupWorker should also be // updated. pub const BUNDLE_FILE_NAME: &str = "bundle.zip"; -pub const BUNDLE_TMP_FILE_NAME_SUFFIX: &str = "bundle.zip.tmp"; +pub const BUNDLE_TMP_FILE_NAME: &str = "bundle.zip.tmp"; From 84be721af6efe102cb1dc09469b46e09ffd40371 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 10 Jul 2025 11:14:34 -0700 Subject: [PATCH 2/2] expanding test support --- .../tasks/support_bundle_collector.rs | 163 ++++++++- sled-agent/src/support_bundle/storage.rs | 319 +++++++++++++++++- 2 files changed, 461 insertions(+), 21 deletions(-) diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs index 78fc121cf96..988e170239d 100644 --- a/nexus/src/app/background/tasks/support_bundle_collector.rs +++ b/nexus/src/app/background/tasks/support_bundle_collector.rs @@ -46,6 +46,7 @@ use sha2::{Digest, Sha256}; use slog_error_chain::InlineErrorChain; use std::future::Future; use std::io::Write; +use std::num::NonZeroU64; use std::sync::Arc; use tokio::io::AsyncReadExt; use tokio::io::AsyncSeekExt; @@ -59,6 +60,10 @@ use zip::write::FullFileOptions; // rather than "/tmp", which would keep this collected data in-memory. const TEMPDIR: &str = "/var/tmp"; +// The size of piece of a support bundle to transfer to the sled agent +// within a single streaming request. +const CHUNK_SIZE: NonZeroU64 = NonZeroU64::new(1024 * 1024 * 1024).unwrap(); + fn authz_support_bundle_from_id(id: SupportBundleUuid) -> authz::SupportBundle { authz::SupportBundle::new( authz::FLEET, @@ -68,10 +73,22 @@ fn authz_support_bundle_from_id(id: SupportBundleUuid) -> authz::SupportBundle { } // Specifies the data to be collected within the Support Bundle. -#[derive(Clone, Default)] +#[derive(Clone)] struct BundleRequest { // If "false": Skip collecting host-specific info from each sled. skip_sled_info: bool, + + // The size of chunks to use when transferring a bundle from Nexus + // to a sled agent. + // + // Typically, this is CHUNK_SIZE, but can be modified for testing. + transfer_chunk_size: NonZeroU64, +} + +impl Default for BundleRequest { + fn default() -> Self { + Self { skip_sled_info: false, transfer_chunk_size: CHUNK_SIZE } + } } // Result of asking a sled agent to clean up a bundle @@ -390,6 +407,7 @@ impl SupportBundleCollector { opctx: opctx.child(std::collections::BTreeMap::new()), request: request.clone(), bundle: bundle.clone(), + transfer_chunk_size: request.transfer_chunk_size, }); let authz_bundle = authz_support_bundle_from_id(bundle.id.into()); @@ -434,6 +452,7 @@ struct BundleCollection { opctx: OpContext, request: BundleRequest, bundle: SupportBundle, + transfer_chunk_size: NonZeroU64, } impl BundleCollection { @@ -445,6 +464,20 @@ impl BundleCollection { // as it's being collected. let dir = tempdir_in(TEMPDIR)?; + let report = self.collect_bundle_locally(&dir).await?; + self.store_bundle_on_sled(dir).await?; + Ok(report) + } + + // Create the support bundle, placing the contents into a user-specified + // directory. + // + // Does not attempt to convert the contents into a zipfile, nor send them + // to any durable storage. + async fn collect_bundle_locally( + self: &Arc, + dir: &Utf8TempDir, + ) -> anyhow::Result { let mut collection = Box::pin(self.collect_bundle_as_file(&dir)); // We periodically check the state of the support bundle - if a user @@ -456,7 +489,7 @@ impl BundleCollection { work_duration, ); - let report = loop { + loop { tokio::select! { // Timer fired mid-collection - let's check if we should stop. _ = yield_interval.tick() => { @@ -487,11 +520,16 @@ impl BundleCollection { "Bundle Collection completed"; "bundle" => %self.bundle.id ); - break report?; + return report; }, } - }; + } + } + async fn store_bundle_on_sled( + &self, + dir: Utf8TempDir, + ) -> anyhow::Result<()> { // Create the zipfile as a temporary file let mut zipfile = tokio::fs::File::from_std(bundle_to_zipfile(&dir)?); let total_len = zipfile.metadata().await?.len(); @@ -537,12 +575,10 @@ impl BundleCollection { // crashed or failed between "finalizing" and "writing to the database that we // finished". info!(&self.log, "Support bundle was already collected"; "bundle" => %self.bundle.id); - return Ok(report); + return Ok(()); } info!(&self.log, "Support bundle creation started"; "bundle" => %self.bundle.id); - const CHUNK_SIZE: u64 = 1024 * 1024 * 1024; - let mut offset = 0; while offset < total_len { // Stream the zipfile to the sled where it should be kept @@ -554,19 +590,30 @@ impl BundleCollection { format!("Failed to seek to offset {offset} / {total_len} within zipfile") })?; - // Only stream at most CHUNK_SIZE bytes at once - let remaining = std::cmp::min(CHUNK_SIZE, total_len - offset); + // Only stream at most "transfer_chunk_size" bytes at once + let remaining = std::cmp::min( + self.transfer_chunk_size.get(), + total_len - offset, + ); let limited_file = file.take(remaining); let stream = tokio_util::io::ReaderStream::new(limited_file); let body = reqwest::Body::wrap_stream(stream); + info!( + &self.log, + "Streaming bundle chunk"; + "bundle" => %self.bundle.id, + "offset" => offset, + "length" => remaining, + ); + sled_client.support_bundle_transfer( &zpool, &dataset, &support_bundle, offset, body ).await.with_context(|| { format!("Failed to transfer bundle: {remaining}@{offset} of {total_len} to sled") })?; - offset += CHUNK_SIZE; + offset += self.transfer_chunk_size.get(); } sled_client @@ -581,7 +628,7 @@ impl BundleCollection { // Returning from this method should drop all temporary storage // allocated locally for this support bundle. - Ok(report) + Ok(()) } // Perform the work of collecting the support bundle into a temporary directory @@ -1122,7 +1169,9 @@ async fn save_sp_dumps( mod test { use super::*; + use crate::app::support_bundles::SupportBundleQueryType; use camino_tempfile::tempdir; + use http_body_util::BodyExt; use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::RendezvousDebugDataset; @@ -1402,6 +1451,7 @@ mod test { // NOTE: The support bundle querying interface isn't supported on // the simulated sled agent (yet?) so we're skipping this step. skip_sled_info: true, + ..Default::default() }; let report = collector .collect_bundle(&opctx, &request) @@ -1428,6 +1478,85 @@ mod test { assert!(report.is_none()); } + #[nexus_test(server = crate::Server)] + async fn test_collect_chunked(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let resolver = nexus.resolver(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Before we can create any bundles, we need to create the + // space for them to be provisioned. + let _datasets = + TestDataset::setup(cptestctx, &datastore, &opctx, 1).await; + + let bundle = datastore + .support_bundle_create(&opctx, "For collection testing", nexus.id()) + .await + .expect("Couldn't allocate a support bundle"); + assert_eq!(bundle.state, SupportBundleState::Collecting); + + let collector = SupportBundleCollector::new( + datastore.clone(), + resolver.clone(), + false, + nexus.id(), + ); + + // The bundle collection should complete successfully. + // + // We're going to use a really small chunk size here to force the bundle + // to get split up. + let request = BundleRequest { + skip_sled_info: true, + transfer_chunk_size: NonZeroU64::new(16).unwrap(), + }; + + let report = collector + .collect_bundle(&opctx, &request) + .await + .expect("Collection should have succeeded under test") + .expect("Collecting the bundle should have generated a report"); + assert_eq!(report.bundle, bundle.id.into()); + assert!(report.listed_in_service_sleds); + assert!(report.listed_sps); + assert!(report.activated_in_db_ok); + + let observed_bundle = datastore + .support_bundle_get(&opctx, bundle.id.into()) + .await + .expect("Bundle should definitely be in db by this point"); + assert_eq!(observed_bundle.state, SupportBundleState::Active); + + // Download a file from the bundle, to verify that it was trasnferred + // successfully. + let head = false; + let range = None; + let response = nexus + .support_bundle_download( + &opctx, + observed_bundle.id.into(), + SupportBundleQueryType::Path { + file_path: "bundle_id.txt".to_string(), + }, + head, + range, + ) + .await + .unwrap(); + + // Read the body to bytes, then convert to string + let body_bytes = + response.into_body().collect().await.unwrap().to_bytes(); + let body_string = String::from_utf8(body_bytes.to_vec()).unwrap(); + + // Verify the content matches the bundle ID + assert_eq!(body_string, observed_bundle.id.to_string()); + } + #[nexus_test(server = crate::Server)] async fn test_collect_many(cptestctx: &ControlPlaneTestContext) { let nexus = &cptestctx.server.server_context().nexus; @@ -1461,7 +1590,8 @@ mod test { ); // Each time we call "collect_bundle", we collect a SINGLE bundle. - let request = BundleRequest { skip_sled_info: true }; + let request = + BundleRequest { skip_sled_info: true, ..Default::default() }; let report = collector .collect_bundle(&opctx, &request) .await @@ -1599,7 +1729,8 @@ mod test { false, nexus.id(), ); - let request = BundleRequest { skip_sled_info: true }; + let request = + BundleRequest { skip_sled_info: true, ..Default::default() }; let report = collector .collect_bundle(&opctx, &request) .await @@ -1735,7 +1866,8 @@ mod test { false, nexus.id(), ); - let request = BundleRequest { skip_sled_info: true }; + let request = + BundleRequest { skip_sled_info: true, ..Default::default() }; let report = collector .collect_bundle(&opctx, &request) .await @@ -1814,7 +1946,8 @@ mod test { false, nexus.id(), ); - let request = BundleRequest { skip_sled_info: true }; + let request = + BundleRequest { skip_sled_info: true, ..Default::default() }; let report = collector .collect_bundle(&opctx, &request) .await diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 67fdd35437d..27f44f59ab4 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -1507,10 +1507,317 @@ mod tests { .expect_err("Should not be able to HEAD directory"); assert!(matches!(err, Error::NotAFile), "Unexpected error: {err:?}"); - // DELETE the bundle on the dataset + // delete the bundle on the dataset mgr.delete(harness.zpool_id, dataset_id, support_bundle_id) .await - .expect("Should have been able to DELETE bundle"); + .expect("Should have been able to delete bundle"); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn chunked_file_transfer() { + let logctx = test_setup_log("chunked_file_transfer"); + let log = &logctx.log; + + // Set up storage + let harness = SingleU2StorageHarness::new(log).await; + + // For this test, we'll add a dataset that can contain our bundles. + let dataset_id = DatasetUuid::new_v4(); + harness.configure_dataset(dataset_id, DatasetKind::Debug).await; + + // Access the Support Bundle API + let mgr = SupportBundleManager::new( + log, + harness.storage_test_harness.handle(), + ); + + // Create a fake support bundle -- really, just a zipfile. + let support_bundle_id = SupportBundleUuid::new_v4(); + let zipfile_data = example_zipfile(); + let hash = ArtifactHash( + Sha256::digest(zipfile_data.as_slice()) + .as_slice() + .try_into() + .unwrap(), + ); + + let zpool_id = harness.zpool_id; + + mgr.start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have started creation"); + + // Split the zipfile into halves, so we can transfer it in two chunks + let len1 = zipfile_data.len() / 2; + let stream1 = stream::once(async { + Ok(Bytes::copy_from_slice(&zipfile_data.as_slice()[..len1])) + }); + let stream2 = stream::once(async { + Ok(Bytes::copy_from_slice(&zipfile_data.as_slice()[len1..])) + }); + + mgr.transfer(zpool_id, dataset_id, support_bundle_id, 0, stream1) + .await + .expect("Should have transferred bundle (part1)"); + mgr.transfer( + zpool_id, + dataset_id, + support_bundle_id, + len1 as u64, + stream2, + ) + .await + .expect("Should have transferred bundle (part2)"); + let bundle = mgr + .finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await + .expect("Should have finalized bundle"); + assert_eq!(bundle.support_bundle_id, support_bundle_id); + assert_eq!(bundle.state, SupportBundleState::Complete); + + // GET the bundle we created, and observe the contents of the bundle + let mut response = mgr + .get( + harness.zpool_id, + dataset_id, + support_bundle_id, + None, + SupportBundleQueryType::Whole, + ) + .await + .expect("Should have been able to GET bundle"); + assert_eq!(read_body(&mut response).await, zipfile_data); + assert_eq!(response.headers().len(), 3); + assert_eq!( + response.headers()[CONTENT_LENGTH], + zipfile_data.len().to_string() + ); + assert_eq!(response.headers()[CONTENT_TYPE], "application/zip"); + assert_eq!(response.headers()[ACCEPT_RANGES], "bytes"); + + // delete the bundle on the dataset + mgr.delete(harness.zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have been able to delete bundle"); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn chunked_file_transfer_restart() { + let logctx = test_setup_log("chunked_file_transfer_restart"); + let log = &logctx.log; + + // Set up storage + let harness = SingleU2StorageHarness::new(log).await; + + // For this test, we'll add a dataset that can contain our bundles. + let dataset_id = DatasetUuid::new_v4(); + harness.configure_dataset(dataset_id, DatasetKind::Debug).await; + + // Access the Support Bundle API + let mgr = SupportBundleManager::new( + log, + harness.storage_test_harness.handle(), + ); + + // Create a fake support bundle -- really, just a zipfile. + let support_bundle_id = SupportBundleUuid::new_v4(); + let zipfile_data = example_zipfile(); + let hash = ArtifactHash( + Sha256::digest(zipfile_data.as_slice()) + .as_slice() + .try_into() + .unwrap(), + ); + + let zpool_id = harness.zpool_id; + + mgr.start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have started creation"); + + let bad_stream = + stream::once(async { Ok(Bytes::copy_from_slice(&[0, 0, 0, 0])) }); + + // Write some "bad bytes" and finalize it. + mgr.transfer(zpool_id, dataset_id, support_bundle_id, 0, bad_stream) + .await + .expect("Should have transferred bad bytes"); + let err = mgr + .finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await + .expect_err("Should have failed to finalize"); + assert!(matches!(err, Error::HashMismatch), "Unexpected err: {err:?}"); + + // We can restart the stream if we invoke "start_creation" again. + mgr.start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have started creation"); + + // Now transfer valid data + mgr.transfer( + zpool_id, + dataset_id, + support_bundle_id, + 0, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await + .expect("Should have transferred bundle"); + let bundle = mgr + .finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await + .expect("Should have finalized bundle"); + assert_eq!(bundle.support_bundle_id, support_bundle_id); + assert_eq!(bundle.state, SupportBundleState::Complete); + + // GET the bundle we created, and observe the contents of the bundle + let mut response = mgr + .get( + harness.zpool_id, + dataset_id, + support_bundle_id, + None, + SupportBundleQueryType::Whole, + ) + .await + .expect("Should have been able to GET bundle"); + assert_eq!(read_body(&mut response).await, zipfile_data); + assert_eq!(response.headers().len(), 3); + assert_eq!( + response.headers()[CONTENT_LENGTH], + zipfile_data.len().to_string() + ); + assert_eq!(response.headers()[CONTENT_TYPE], "application/zip"); + assert_eq!(response.headers()[ACCEPT_RANGES], "bytes"); + + // Delete the bundle on the dataset + mgr.delete(harness.zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have been able to delete bundle"); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn try_to_write_after_finalize() { + let logctx = test_setup_log("try_to_write_after_finalize"); + let log = &logctx.log; + + // Set up storage + let harness = SingleU2StorageHarness::new(log).await; + + // For this test, we'll add a dataset that can contain our bundles. + let dataset_id = DatasetUuid::new_v4(); + harness.configure_dataset(dataset_id, DatasetKind::Debug).await; + + // Access the Support Bundle API + let mgr = SupportBundleManager::new( + log, + harness.storage_test_harness.handle(), + ); + + // Create a fake support bundle -- really, just a zipfile. + let support_bundle_id = SupportBundleUuid::new_v4(); + let zipfile_data = example_zipfile(); + let hash = ArtifactHash( + Sha256::digest(zipfile_data.as_slice()) + .as_slice() + .try_into() + .unwrap(), + ); + + let zpool_id = harness.zpool_id; + + mgr.start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have started creation"); + mgr.transfer( + zpool_id, + dataset_id, + support_bundle_id, + 0, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await + .expect("Should have transferred bundle"); + let bundle = mgr + .finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await + .expect("Should have finalized bundle"); + assert_eq!(bundle.support_bundle_id, support_bundle_id); + assert_eq!(bundle.state, SupportBundleState::Complete); + + // If we try to transfer again, we'll see a failure. + let err = mgr + .transfer( + zpool_id, + dataset_id, + support_bundle_id, + 0, + stream::once(async { + Ok(Bytes::copy_from_slice(zipfile_data.as_slice())) + }), + ) + .await + .expect_err("Should have failed to transfer bundle"); + assert!( + matches!(err, Error::Io(ref io) if io.kind() == std::io::ErrorKind::NotFound), + "Unexpected err: {err:?}" + ); + + // If we try to finalize again, we'll see a failure. + let err = mgr + .finalize(zpool_id, dataset_id, support_bundle_id, hash) + .await + .expect_err("Should have failed to finalize bundle"); + assert!( + matches!(err, Error::Io(ref io) if io.kind() == std::io::ErrorKind::NotFound), + "Unexpected err: {err:?}" + ); + + // If we try to create again, we'll see "OK" - but the + // bundle should already exist, so we can immediately "GET" it afterwards. + let metadata = mgr + .start_creation(zpool_id, dataset_id, support_bundle_id) + .await + .expect("Creation should have succeeded"); + assert_eq!(metadata.state, SupportBundleState::Complete); + + // GET the bundle we created, and observe the contents of the bundle + let mut response = mgr + .get( + harness.zpool_id, + dataset_id, + support_bundle_id, + None, + SupportBundleQueryType::Whole, + ) + .await + .expect("Should have been able to GET bundle"); + assert_eq!(read_body(&mut response).await, zipfile_data); + assert_eq!(response.headers().len(), 3); + assert_eq!( + response.headers()[CONTENT_LENGTH], + zipfile_data.len().to_string() + ); + assert_eq!(response.headers()[CONTENT_TYPE], "application/zip"); + assert_eq!(response.headers()[ACCEPT_RANGES], "bytes"); + + // Delete the bundle on the dataset + mgr.delete(harness.zpool_id, dataset_id, support_bundle_id) + .await + .expect("Should have been able to delete bundle"); harness.cleanup().await; logctx.cleanup_successful(); @@ -1671,7 +1978,7 @@ mod tests { // We can delete the bundle, and it should no longer appear. mgr.delete(harness.zpool_id, dataset_id, support_bundle_id) .await - .expect("Should have been able to DELETE bundle"); + .expect("Should have been able to delete bundle"); let bundles = mgr.list(harness.zpool_id, dataset_id).await.unwrap(); assert_eq!(bundles.len(), 0); @@ -1736,7 +2043,7 @@ mod tests { // We can delete the bundle, and it should no longer appear. mgr.delete(harness.zpool_id, dataset_id, support_bundle_id) .await - .expect("Should have been able to DELETE bundle"); + .expect("Should have been able to delete bundle"); let bundles = mgr.list(harness.zpool_id, dataset_id).await.unwrap(); assert_eq!(bundles.len(), 0); @@ -2197,10 +2504,10 @@ mod tests { .expect_err("Should not be able to HEAD directory"); assert!(matches!(err, Error::NotAFile), "Unexpected error: {err:?}"); - // DELETE the bundle on the dataset + // delete the bundle on the dataset mgr.delete(harness.zpool_id, dataset_id, support_bundle_id) .await - .expect("Should have been able to DELETE bundle"); + .expect("Should have been able to delete bundle"); harness.cleanup().await; logctx.cleanup_successful();