@@ -46,6 +46,7 @@ use sha2::{Digest, Sha256};
46
46
use slog_error_chain:: InlineErrorChain ;
47
47
use std:: future:: Future ;
48
48
use std:: io:: Write ;
49
+ use std:: num:: NonZeroU64 ;
49
50
use std:: sync:: Arc ;
50
51
use tokio:: io:: AsyncReadExt ;
51
52
use tokio:: io:: AsyncSeekExt ;
@@ -59,6 +60,10 @@ use zip::write::FullFileOptions;
59
60
// rather than "/tmp", which would keep this collected data in-memory.
60
61
const TEMPDIR : & str = "/var/tmp" ;
61
62
63
+ // The size of piece of a support bundle to transfer to the sled agent
64
+ // within a single streaming request.
65
+ const CHUNK_SIZE : NonZeroU64 = NonZeroU64 :: new ( 1024 * 1024 * 1024 ) . unwrap ( ) ;
66
+
62
67
fn authz_support_bundle_from_id ( id : SupportBundleUuid ) -> authz:: SupportBundle {
63
68
authz:: SupportBundle :: new (
64
69
authz:: FLEET ,
@@ -68,10 +73,22 @@ fn authz_support_bundle_from_id(id: SupportBundleUuid) -> authz::SupportBundle {
68
73
}
69
74
70
75
// Specifies the data to be collected within the Support Bundle.
71
- #[ derive( Clone , Default ) ]
76
+ #[ derive( Clone ) ]
72
77
struct BundleRequest {
73
78
// If "false": Skip collecting host-specific info from each sled.
74
79
skip_sled_info : bool ,
80
+
81
+ // The size of chunks to use when transferring a bundle from Nexus
82
+ // to a sled agent.
83
+ //
84
+ // Typically, this is CHUNK_SIZE, but can be modified for testing.
85
+ transfer_chunk_size : NonZeroU64 ,
86
+ }
87
+
88
+ impl Default for BundleRequest {
89
+ fn default ( ) -> Self {
90
+ Self { skip_sled_info : false , transfer_chunk_size : CHUNK_SIZE }
91
+ }
75
92
}
76
93
77
94
// Result of asking a sled agent to clean up a bundle
@@ -390,6 +407,7 @@ impl SupportBundleCollector {
390
407
opctx : opctx. child ( std:: collections:: BTreeMap :: new ( ) ) ,
391
408
request : request. clone ( ) ,
392
409
bundle : bundle. clone ( ) ,
410
+ transfer_chunk_size : request. transfer_chunk_size ,
393
411
} ) ;
394
412
395
413
let authz_bundle = authz_support_bundle_from_id ( bundle. id . into ( ) ) ;
@@ -434,6 +452,7 @@ struct BundleCollection {
434
452
opctx : OpContext ,
435
453
request : BundleRequest ,
436
454
bundle : SupportBundle ,
455
+ transfer_chunk_size : NonZeroU64 ,
437
456
}
438
457
439
458
impl BundleCollection {
@@ -445,6 +464,20 @@ impl BundleCollection {
445
464
// as it's being collected.
446
465
let dir = tempdir_in ( TEMPDIR ) ?;
447
466
467
+ let report = self . collect_bundle_locally ( & dir) . await ?;
468
+ self . store_bundle_on_sled ( dir) . await ?;
469
+ Ok ( report)
470
+ }
471
+
472
+ // Create the support bundle, placing the contents into a user-specified
473
+ // directory.
474
+ //
475
+ // Does not attempt to convert the contents into a zipfile, nor send them
476
+ // to any durable storage.
477
+ async fn collect_bundle_locally (
478
+ self : & Arc < Self > ,
479
+ dir : & Utf8TempDir ,
480
+ ) -> anyhow:: Result < SupportBundleCollectionReport > {
448
481
let mut collection = Box :: pin ( self . collect_bundle_as_file ( & dir) ) ;
449
482
450
483
// We periodically check the state of the support bundle - if a user
@@ -456,7 +489,7 @@ impl BundleCollection {
456
489
work_duration,
457
490
) ;
458
491
459
- let report = loop {
492
+ loop {
460
493
tokio:: select! {
461
494
// Timer fired mid-collection - let's check if we should stop.
462
495
_ = yield_interval. tick( ) => {
@@ -487,11 +520,16 @@ impl BundleCollection {
487
520
"Bundle Collection completed" ;
488
521
"bundle" => %self . bundle. id
489
522
) ;
490
- break report? ;
523
+ return report;
491
524
} ,
492
525
}
493
- } ;
526
+ }
527
+ }
494
528
529
+ async fn store_bundle_on_sled (
530
+ & self ,
531
+ dir : Utf8TempDir ,
532
+ ) -> anyhow:: Result < ( ) > {
495
533
// Create the zipfile as a temporary file
496
534
let mut zipfile = tokio:: fs:: File :: from_std ( bundle_to_zipfile ( & dir) ?) ;
497
535
let total_len = zipfile. metadata ( ) . await ?. len ( ) ;
@@ -537,12 +575,10 @@ impl BundleCollection {
537
575
// crashed or failed between "finalizing" and "writing to the database that we
538
576
// finished".
539
577
info ! ( & self . log, "Support bundle was already collected" ; "bundle" => %self . bundle. id) ;
540
- return Ok ( report ) ;
578
+ return Ok ( ( ) ) ;
541
579
}
542
580
info ! ( & self . log, "Support bundle creation started" ; "bundle" => %self . bundle. id) ;
543
581
544
- const CHUNK_SIZE : u64 = 1024 * 1024 * 1024 ;
545
-
546
582
let mut offset = 0 ;
547
583
while offset < total_len {
548
584
// Stream the zipfile to the sled where it should be kept
@@ -554,19 +590,30 @@ impl BundleCollection {
554
590
format ! ( "Failed to seek to offset {offset} / {total_len} within zipfile" )
555
591
} ) ?;
556
592
557
- // Only stream at most CHUNK_SIZE bytes at once
558
- let remaining = std:: cmp:: min ( CHUNK_SIZE , total_len - offset) ;
593
+ // Only stream at most "transfer_chunk_size" bytes at once
594
+ let remaining = std:: cmp:: min (
595
+ self . transfer_chunk_size . get ( ) ,
596
+ total_len - offset,
597
+ ) ;
559
598
let limited_file = file. take ( remaining) ;
560
599
let stream = tokio_util:: io:: ReaderStream :: new ( limited_file) ;
561
600
let body = reqwest:: Body :: wrap_stream ( stream) ;
562
601
602
+ info ! (
603
+ & self . log,
604
+ "Streaming bundle chunk" ;
605
+ "bundle" => %self . bundle. id,
606
+ "offset" => offset,
607
+ "length" => remaining,
608
+ ) ;
609
+
563
610
sled_client. support_bundle_transfer (
564
611
& zpool, & dataset, & support_bundle, offset, body
565
612
) . await . with_context ( || {
566
613
format ! ( "Failed to transfer bundle: {remaining}@{offset} of {total_len} to sled" )
567
614
} ) ?;
568
615
569
- offset += CHUNK_SIZE ;
616
+ offset += self . transfer_chunk_size . get ( ) ;
570
617
}
571
618
572
619
sled_client
@@ -581,7 +628,7 @@ impl BundleCollection {
581
628
582
629
// Returning from this method should drop all temporary storage
583
630
// allocated locally for this support bundle.
584
- Ok ( report )
631
+ Ok ( ( ) )
585
632
}
586
633
587
634
// Perform the work of collecting the support bundle into a temporary directory
@@ -1122,7 +1169,9 @@ async fn save_sp_dumps(
1122
1169
mod test {
1123
1170
use super :: * ;
1124
1171
1172
+ use crate :: app:: support_bundles:: SupportBundleQueryType ;
1125
1173
use camino_tempfile:: tempdir;
1174
+ use http_body_util:: BodyExt ;
1126
1175
use nexus_db_model:: PhysicalDisk ;
1127
1176
use nexus_db_model:: PhysicalDiskKind ;
1128
1177
use nexus_db_model:: RendezvousDebugDataset ;
@@ -1402,6 +1451,7 @@ mod test {
1402
1451
// NOTE: The support bundle querying interface isn't supported on
1403
1452
// the simulated sled agent (yet?) so we're skipping this step.
1404
1453
skip_sled_info : true ,
1454
+ ..Default :: default ( )
1405
1455
} ;
1406
1456
let report = collector
1407
1457
. collect_bundle ( & opctx, & request)
@@ -1428,6 +1478,85 @@ mod test {
1428
1478
assert ! ( report. is_none( ) ) ;
1429
1479
}
1430
1480
1481
+ #[ nexus_test( server = crate :: Server ) ]
1482
+ async fn test_collect_chunked ( cptestctx : & ControlPlaneTestContext ) {
1483
+ let nexus = & cptestctx. server . server_context ( ) . nexus ;
1484
+ let datastore = nexus. datastore ( ) ;
1485
+ let resolver = nexus. resolver ( ) ;
1486
+ let opctx = OpContext :: for_tests (
1487
+ cptestctx. logctx . log . clone ( ) ,
1488
+ datastore. clone ( ) ,
1489
+ ) ;
1490
+
1491
+ // Before we can create any bundles, we need to create the
1492
+ // space for them to be provisioned.
1493
+ let _datasets =
1494
+ TestDataset :: setup ( cptestctx, & datastore, & opctx, 1 ) . await ;
1495
+
1496
+ let bundle = datastore
1497
+ . support_bundle_create ( & opctx, "For collection testing" , nexus. id ( ) )
1498
+ . await
1499
+ . expect ( "Couldn't allocate a support bundle" ) ;
1500
+ assert_eq ! ( bundle. state, SupportBundleState :: Collecting ) ;
1501
+
1502
+ let collector = SupportBundleCollector :: new (
1503
+ datastore. clone ( ) ,
1504
+ resolver. clone ( ) ,
1505
+ false ,
1506
+ nexus. id ( ) ,
1507
+ ) ;
1508
+
1509
+ // The bundle collection should complete successfully.
1510
+ //
1511
+ // We're going to use a really small chunk size here to force the bundle
1512
+ // to get split up.
1513
+ let request = BundleRequest {
1514
+ skip_sled_info : true ,
1515
+ transfer_chunk_size : NonZeroU64 :: new ( 16 ) . unwrap ( ) ,
1516
+ } ;
1517
+
1518
+ let report = collector
1519
+ . collect_bundle ( & opctx, & request)
1520
+ . await
1521
+ . expect ( "Collection should have succeeded under test" )
1522
+ . expect ( "Collecting the bundle should have generated a report" ) ;
1523
+ assert_eq ! ( report. bundle, bundle. id. into( ) ) ;
1524
+ assert ! ( report. listed_in_service_sleds) ;
1525
+ assert ! ( report. listed_sps) ;
1526
+ assert ! ( report. activated_in_db_ok) ;
1527
+
1528
+ let observed_bundle = datastore
1529
+ . support_bundle_get ( & opctx, bundle. id . into ( ) )
1530
+ . await
1531
+ . expect ( "Bundle should definitely be in db by this point" ) ;
1532
+ assert_eq ! ( observed_bundle. state, SupportBundleState :: Active ) ;
1533
+
1534
+ // Download a file from the bundle, to verify that it was trasnferred
1535
+ // successfully.
1536
+ let head = false ;
1537
+ let range = None ;
1538
+ let response = nexus
1539
+ . support_bundle_download (
1540
+ & opctx,
1541
+ observed_bundle. id . into ( ) ,
1542
+ SupportBundleQueryType :: Path {
1543
+ file_path : "bundle_id.txt" . to_string ( ) ,
1544
+ } ,
1545
+ head,
1546
+ range,
1547
+ )
1548
+ . await
1549
+ . unwrap ( ) ;
1550
+
1551
+ // Read the body to bytes, then convert to string
1552
+ let body_bytes =
1553
+ response. into_body ( ) . collect ( ) . await . unwrap ( ) . to_bytes ( ) ;
1554
+ let body_string = String :: from_utf8 ( body_bytes. to_vec ( ) ) . unwrap ( ) ;
1555
+
1556
+ // Verify the content matches the bundle ID
1557
+ assert_eq ! ( body_string, observed_bundle. id. to_string( ) ) ;
1558
+ }
1559
+
1431
1560
#[ nexus_test( server = crate :: Server ) ]
1432
1561
async fn test_collect_many ( cptestctx : & ControlPlaneTestContext ) {
1433
1562
let nexus = & cptestctx. server . server_context ( ) . nexus ;
@@ -1461,7 +1590,8 @@ mod test {
1461
1590
) ;
1462
1591
1463
1592
// Each time we call "collect_bundle", we collect a SINGLE bundle.
1464
- let request = BundleRequest { skip_sled_info : true } ;
1593
+ let request =
1594
+ BundleRequest { skip_sled_info : true , ..Default :: default ( ) } ;
1465
1595
let report = collector
1466
1596
. collect_bundle ( & opctx, & request)
1467
1597
. await
@@ -1599,7 +1729,8 @@ mod test {
1599
1729
false ,
1600
1730
nexus. id ( ) ,
1601
1731
) ;
1602
- let request = BundleRequest { skip_sled_info : true } ;
1732
+ let request =
1733
+ BundleRequest { skip_sled_info : true , ..Default :: default ( ) } ;
1603
1734
let report = collector
1604
1735
. collect_bundle ( & opctx, & request)
1605
1736
. await
@@ -1735,7 +1866,8 @@ mod test {
1735
1866
false ,
1736
1867
nexus. id ( ) ,
1737
1868
) ;
1738
- let request = BundleRequest { skip_sled_info : true } ;
1869
+ let request =
1870
+ BundleRequest { skip_sled_info : true , ..Default :: default ( ) } ;
1739
1871
let report = collector
1740
1872
. collect_bundle ( & opctx, & request)
1741
1873
. await
@@ -1814,7 +1946,8 @@ mod test {
1814
1946
false ,
1815
1947
nexus. id ( ) ,
1816
1948
) ;
1817
- let request = BundleRequest { skip_sled_info : true } ;
1949
+ let request =
1950
+ BundleRequest { skip_sled_info : true , ..Default :: default ( ) } ;
1818
1951
let report = collector
1819
1952
. collect_bundle ( & opctx, & request)
1820
1953
. await
0 commit comments