Skip to content

Commit 84be721

Browse files
committed
expanding test support
1 parent dc659ce commit 84be721

File tree

2 files changed

+461
-21
lines changed

2 files changed

+461
-21
lines changed

nexus/src/app/background/tasks/support_bundle_collector.rs

Lines changed: 148 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use sha2::{Digest, Sha256};
4646
use slog_error_chain::InlineErrorChain;
4747
use std::future::Future;
4848
use std::io::Write;
49+
use std::num::NonZeroU64;
4950
use std::sync::Arc;
5051
use tokio::io::AsyncReadExt;
5152
use tokio::io::AsyncSeekExt;
@@ -59,6 +60,10 @@ use zip::write::FullFileOptions;
5960
// rather than "/tmp", which would keep this collected data in-memory.
6061
const TEMPDIR: &str = "/var/tmp";
6162

63+
// The size of piece of a support bundle to transfer to the sled agent
64+
// within a single streaming request.
65+
const CHUNK_SIZE: NonZeroU64 = NonZeroU64::new(1024 * 1024 * 1024).unwrap();
66+
6267
fn authz_support_bundle_from_id(id: SupportBundleUuid) -> authz::SupportBundle {
6368
authz::SupportBundle::new(
6469
authz::FLEET,
@@ -68,10 +73,22 @@ fn authz_support_bundle_from_id(id: SupportBundleUuid) -> authz::SupportBundle {
6873
}
6974

7075
// Specifies the data to be collected within the Support Bundle.
71-
#[derive(Clone, Default)]
76+
#[derive(Clone)]
7277
struct BundleRequest {
7378
// If "false": Skip collecting host-specific info from each sled.
7479
skip_sled_info: bool,
80+
81+
// The size of chunks to use when transferring a bundle from Nexus
82+
// to a sled agent.
83+
//
84+
// Typically, this is CHUNK_SIZE, but can be modified for testing.
85+
transfer_chunk_size: NonZeroU64,
86+
}
87+
88+
impl Default for BundleRequest {
89+
fn default() -> Self {
90+
Self { skip_sled_info: false, transfer_chunk_size: CHUNK_SIZE }
91+
}
7592
}
7693

7794
// Result of asking a sled agent to clean up a bundle
@@ -390,6 +407,7 @@ impl SupportBundleCollector {
390407
opctx: opctx.child(std::collections::BTreeMap::new()),
391408
request: request.clone(),
392409
bundle: bundle.clone(),
410+
transfer_chunk_size: request.transfer_chunk_size,
393411
});
394412

395413
let authz_bundle = authz_support_bundle_from_id(bundle.id.into());
@@ -434,6 +452,7 @@ struct BundleCollection {
434452
opctx: OpContext,
435453
request: BundleRequest,
436454
bundle: SupportBundle,
455+
transfer_chunk_size: NonZeroU64,
437456
}
438457

439458
impl BundleCollection {
@@ -445,6 +464,20 @@ impl BundleCollection {
445464
// as it's being collected.
446465
let dir = tempdir_in(TEMPDIR)?;
447466

467+
let report = self.collect_bundle_locally(&dir).await?;
468+
self.store_bundle_on_sled(dir).await?;
469+
Ok(report)
470+
}
471+
472+
// Create the support bundle, placing the contents into a user-specified
473+
// directory.
474+
//
475+
// Does not attempt to convert the contents into a zipfile, nor send them
476+
// to any durable storage.
477+
async fn collect_bundle_locally(
478+
self: &Arc<Self>,
479+
dir: &Utf8TempDir,
480+
) -> anyhow::Result<SupportBundleCollectionReport> {
448481
let mut collection = Box::pin(self.collect_bundle_as_file(&dir));
449482

450483
// We periodically check the state of the support bundle - if a user
@@ -456,7 +489,7 @@ impl BundleCollection {
456489
work_duration,
457490
);
458491

459-
let report = loop {
492+
loop {
460493
tokio::select! {
461494
// Timer fired mid-collection - let's check if we should stop.
462495
_ = yield_interval.tick() => {
@@ -487,11 +520,16 @@ impl BundleCollection {
487520
"Bundle Collection completed";
488521
"bundle" => %self.bundle.id
489522
);
490-
break report?;
523+
return report;
491524
},
492525
}
493-
};
526+
}
527+
}
494528

529+
async fn store_bundle_on_sled(
530+
&self,
531+
dir: Utf8TempDir,
532+
) -> anyhow::Result<()> {
495533
// Create the zipfile as a temporary file
496534
let mut zipfile = tokio::fs::File::from_std(bundle_to_zipfile(&dir)?);
497535
let total_len = zipfile.metadata().await?.len();
@@ -537,12 +575,10 @@ impl BundleCollection {
537575
// crashed or failed between "finalizing" and "writing to the database that we
538576
// finished".
539577
info!(&self.log, "Support bundle was already collected"; "bundle" => %self.bundle.id);
540-
return Ok(report);
578+
return Ok(());
541579
}
542580
info!(&self.log, "Support bundle creation started"; "bundle" => %self.bundle.id);
543581

544-
const CHUNK_SIZE: u64 = 1024 * 1024 * 1024;
545-
546582
let mut offset = 0;
547583
while offset < total_len {
548584
// Stream the zipfile to the sled where it should be kept
@@ -554,19 +590,30 @@ impl BundleCollection {
554590
format!("Failed to seek to offset {offset} / {total_len} within zipfile")
555591
})?;
556592

557-
// Only stream at most CHUNK_SIZE bytes at once
558-
let remaining = std::cmp::min(CHUNK_SIZE, total_len - offset);
593+
// Only stream at most "transfer_chunk_size" bytes at once
594+
let remaining = std::cmp::min(
595+
self.transfer_chunk_size.get(),
596+
total_len - offset,
597+
);
559598
let limited_file = file.take(remaining);
560599
let stream = tokio_util::io::ReaderStream::new(limited_file);
561600
let body = reqwest::Body::wrap_stream(stream);
562601

602+
info!(
603+
&self.log,
604+
"Streaming bundle chunk";
605+
"bundle" => %self.bundle.id,
606+
"offset" => offset,
607+
"length" => remaining,
608+
);
609+
563610
sled_client.support_bundle_transfer(
564611
&zpool, &dataset, &support_bundle, offset, body
565612
).await.with_context(|| {
566613
format!("Failed to transfer bundle: {remaining}@{offset} of {total_len} to sled")
567614
})?;
568615

569-
offset += CHUNK_SIZE;
616+
offset += self.transfer_chunk_size.get();
570617
}
571618

572619
sled_client
@@ -581,7 +628,7 @@ impl BundleCollection {
581628

582629
// Returning from this method should drop all temporary storage
583630
// allocated locally for this support bundle.
584-
Ok(report)
631+
Ok(())
585632
}
586633

587634
// Perform the work of collecting the support bundle into a temporary directory
@@ -1122,7 +1169,9 @@ async fn save_sp_dumps(
11221169
mod test {
11231170
use super::*;
11241171

1172+
use crate::app::support_bundles::SupportBundleQueryType;
11251173
use camino_tempfile::tempdir;
1174+
use http_body_util::BodyExt;
11261175
use nexus_db_model::PhysicalDisk;
11271176
use nexus_db_model::PhysicalDiskKind;
11281177
use nexus_db_model::RendezvousDebugDataset;
@@ -1402,6 +1451,7 @@ mod test {
14021451
// NOTE: The support bundle querying interface isn't supported on
14031452
// the simulated sled agent (yet?) so we're skipping this step.
14041453
skip_sled_info: true,
1454+
..Default::default()
14051455
};
14061456
let report = collector
14071457
.collect_bundle(&opctx, &request)
@@ -1428,6 +1478,85 @@ mod test {
14281478
assert!(report.is_none());
14291479
}
14301480

1481+
#[nexus_test(server = crate::Server)]
1482+
async fn test_collect_chunked(cptestctx: &ControlPlaneTestContext) {
1483+
let nexus = &cptestctx.server.server_context().nexus;
1484+
let datastore = nexus.datastore();
1485+
let resolver = nexus.resolver();
1486+
let opctx = OpContext::for_tests(
1487+
cptestctx.logctx.log.clone(),
1488+
datastore.clone(),
1489+
);
1490+
1491+
// Before we can create any bundles, we need to create the
1492+
// space for them to be provisioned.
1493+
let _datasets =
1494+
TestDataset::setup(cptestctx, &datastore, &opctx, 1).await;
1495+
1496+
let bundle = datastore
1497+
.support_bundle_create(&opctx, "For collection testing", nexus.id())
1498+
.await
1499+
.expect("Couldn't allocate a support bundle");
1500+
assert_eq!(bundle.state, SupportBundleState::Collecting);
1501+
1502+
let collector = SupportBundleCollector::new(
1503+
datastore.clone(),
1504+
resolver.clone(),
1505+
false,
1506+
nexus.id(),
1507+
);
1508+
1509+
// The bundle collection should complete successfully.
1510+
//
1511+
// We're going to use a really small chunk size here to force the bundle
1512+
// to get split up.
1513+
let request = BundleRequest {
1514+
skip_sled_info: true,
1515+
transfer_chunk_size: NonZeroU64::new(16).unwrap(),
1516+
};
1517+
1518+
let report = collector
1519+
.collect_bundle(&opctx, &request)
1520+
.await
1521+
.expect("Collection should have succeeded under test")
1522+
.expect("Collecting the bundle should have generated a report");
1523+
assert_eq!(report.bundle, bundle.id.into());
1524+
assert!(report.listed_in_service_sleds);
1525+
assert!(report.listed_sps);
1526+
assert!(report.activated_in_db_ok);
1527+
1528+
let observed_bundle = datastore
1529+
.support_bundle_get(&opctx, bundle.id.into())
1530+
.await
1531+
.expect("Bundle should definitely be in db by this point");
1532+
assert_eq!(observed_bundle.state, SupportBundleState::Active);
1533+
1534+
// Download a file from the bundle, to verify that it was trasnferred
1535+
// successfully.
1536+
let head = false;
1537+
let range = None;
1538+
let response = nexus
1539+
.support_bundle_download(
1540+
&opctx,
1541+
observed_bundle.id.into(),
1542+
SupportBundleQueryType::Path {
1543+
file_path: "bundle_id.txt".to_string(),
1544+
},
1545+
head,
1546+
range,
1547+
)
1548+
.await
1549+
.unwrap();
1550+
1551+
// Read the body to bytes, then convert to string
1552+
let body_bytes =
1553+
response.into_body().collect().await.unwrap().to_bytes();
1554+
let body_string = String::from_utf8(body_bytes.to_vec()).unwrap();
1555+
1556+
// Verify the content matches the bundle ID
1557+
assert_eq!(body_string, observed_bundle.id.to_string());
1558+
}
1559+
14311560
#[nexus_test(server = crate::Server)]
14321561
async fn test_collect_many(cptestctx: &ControlPlaneTestContext) {
14331562
let nexus = &cptestctx.server.server_context().nexus;
@@ -1461,7 +1590,8 @@ mod test {
14611590
);
14621591

14631592
// Each time we call "collect_bundle", we collect a SINGLE bundle.
1464-
let request = BundleRequest { skip_sled_info: true };
1593+
let request =
1594+
BundleRequest { skip_sled_info: true, ..Default::default() };
14651595
let report = collector
14661596
.collect_bundle(&opctx, &request)
14671597
.await
@@ -1599,7 +1729,8 @@ mod test {
15991729
false,
16001730
nexus.id(),
16011731
);
1602-
let request = BundleRequest { skip_sled_info: true };
1732+
let request =
1733+
BundleRequest { skip_sled_info: true, ..Default::default() };
16031734
let report = collector
16041735
.collect_bundle(&opctx, &request)
16051736
.await
@@ -1735,7 +1866,8 @@ mod test {
17351866
false,
17361867
nexus.id(),
17371868
);
1738-
let request = BundleRequest { skip_sled_info: true };
1869+
let request =
1870+
BundleRequest { skip_sled_info: true, ..Default::default() };
17391871
let report = collector
17401872
.collect_bundle(&opctx, &request)
17411873
.await
@@ -1814,7 +1946,8 @@ mod test {
18141946
false,
18151947
nexus.id(),
18161948
);
1817-
let request = BundleRequest { skip_sled_info: true };
1949+
let request =
1950+
BundleRequest { skip_sled_info: true, ..Default::default() };
18181951
let report = collector
18191952
.collect_bundle(&opctx, &request)
18201953
.await

0 commit comments

Comments
 (0)