@@ -49,14 +49,19 @@ use tracing::Instrument;
49
49
use tracing:: Span ;
50
50
use uuid:: Uuid ;
51
51
52
- type BoxedFuture =
53
- Pin < Box < dyn Future < Output = Result < CompactionResponse , Box < dyn ChromaError > > > + Send > > ;
52
+ type CompactionOutput = Result < CompactionResponse , Box < dyn ChromaError > > ;
53
+ type BoxedFuture = Pin < Box < dyn Future < Output = CompactionOutput > + Send > > ;
54
54
55
55
struct CompactionTask {
56
56
collection_id : CollectionUuid ,
57
57
future : BoxedFuture ,
58
58
}
59
59
60
+ struct CompactionTaskCompletion {
61
+ collection_id : CollectionUuid ,
62
+ result : CompactionOutput ,
63
+ }
64
+
60
65
#[ derive( Clone ) ]
61
66
pub ( crate ) struct CompactionManagerContext {
62
67
system : System ,
@@ -85,7 +90,7 @@ pub(crate) struct CompactionManager {
85
90
scheduler : Scheduler ,
86
91
context : CompactionManagerContext ,
87
92
compact_awaiter_channel : mpsc:: Sender < CompactionTask > ,
88
- compact_awaiter_completion_channel : mpsc:: UnboundedReceiver < CompactionResponse > ,
93
+ compact_awaiter_completion_channel : mpsc:: UnboundedReceiver < CompactionTaskCompletion > ,
89
94
compact_awaiter : tokio:: task:: JoinHandle < ( ) > ,
90
95
on_next_memberlist_signal : Option < oneshot:: Sender < ( ) > > ,
91
96
}
@@ -129,7 +134,7 @@ impl CompactionManager {
129
134
// Using unbounded channel for the completion channel as its size
130
135
// is bounded by max_concurrent_jobs. It's far more important for the
131
136
// completion channel to not block or drop messages.
132
- let ( completion_tx, completion_rx) = mpsc:: unbounded_channel :: < CompactionResponse > ( ) ;
137
+ let ( completion_tx, completion_rx) = mpsc:: unbounded_channel :: < CompactionTaskCompletion > ( ) ;
133
138
let compact_awaiter = tokio:: spawn ( async {
134
139
compact_awaiter_loop ( compact_awaiter_rx, completion_tx) . await ;
135
140
} ) ;
@@ -237,11 +242,18 @@ impl CompactionManager {
237
242
self . context . dispatcher = Some ( dispatcher) ;
238
243
}
239
244
240
- fn process_completions ( & mut self ) -> Vec < CompactionResponse > {
245
+ fn process_completions ( & mut self ) -> Vec < CompactionTaskCompletion > {
241
246
let compact_awaiter_completion_channel = & mut self . compact_awaiter_completion_channel ;
242
247
let mut completed_collections = Vec :: new ( ) ;
243
248
while let Ok ( resp) = compact_awaiter_completion_channel. try_recv ( ) {
244
- self . scheduler . complete_collection ( resp. collection_id ) ;
249
+ match resp. result {
250
+ Ok ( _) => {
251
+ self . scheduler . succeed_collection ( resp. collection_id ) ;
252
+ }
253
+ Err ( _) => {
254
+ self . scheduler . fail_collection ( resp. collection_id ) ;
255
+ }
256
+ }
245
257
completed_collections. push ( resp) ;
246
258
}
247
259
completed_collections
@@ -349,6 +361,7 @@ impl Configurable<(CompactionServiceConfig, System)> for CompactionManager {
349
361
Box :: < dyn AssignmentPolicy > :: try_from_config ( assignment_policy_config, registry)
350
362
. await ?;
351
363
let job_expiry_seconds = config. compactor . job_expiry_seconds ;
364
+ let max_failure_count = config. compactor . max_failure_count ;
352
365
let scheduler = Scheduler :: new (
353
366
my_ip,
354
367
log. clone ( ) ,
@@ -359,6 +372,7 @@ impl Configurable<(CompactionServiceConfig, System)> for CompactionManager {
359
372
assignment_policy,
360
373
disabled_collections,
361
374
job_expiry_seconds,
375
+ max_failure_count,
362
376
) ;
363
377
364
378
let blockfile_provider = BlockfileProvider :: try_from_config (
@@ -405,25 +419,31 @@ impl Configurable<(CompactionServiceConfig, System)> for CompactionManager {
405
419
406
420
async fn compact_awaiter_loop (
407
421
mut job_rx : mpsc:: Receiver < CompactionTask > ,
408
- completion_tx : mpsc:: UnboundedSender < CompactionResponse > ,
422
+ completion_tx : mpsc:: UnboundedSender < CompactionTaskCompletion > ,
409
423
) {
410
424
let mut futures = FuturesUnordered :: new ( ) ;
411
425
loop {
412
426
select ! {
413
427
Some ( job) = job_rx. recv( ) => {
414
428
futures. push( async move {
415
- let _ = AssertUnwindSafe ( job. future) . catch_unwind( ) . await ;
416
- CompactionResponse {
417
- collection_id: job. collection_id,
429
+ let result = AssertUnwindSafe ( job. future) . catch_unwind( ) . await ;
430
+ match result {
431
+ Ok ( response) => CompactionTaskCompletion {
432
+ collection_id: job. collection_id,
433
+ result: response,
434
+ } ,
435
+ Err ( _) => CompactionTaskCompletion {
436
+ collection_id: job. collection_id,
437
+ result: Err ( Box :: new( CompactionError :: FailedToCompact ) ) ,
438
+ } ,
418
439
}
419
440
} ) ;
420
441
}
421
- Some ( compaction_response) = futures. next( ) => {
422
- match completion_tx. send( compaction_response) {
442
+ Some ( completed_job) = futures. next( ) => {
443
+ let collection_id = completed_job. collection_id;
444
+ match completion_tx. send( completed_job) {
423
445
Ok ( _) => { } ,
424
- Err ( _) => {
425
- tracing:: error!( "Failed to send compaction response" ) ;
426
- }
446
+ Err ( _) => tracing:: error!( "Failed to record compaction result for collection {}" , collection_id) ,
427
447
}
428
448
}
429
449
else => {
@@ -771,6 +791,7 @@ mod tests {
771
791
let fetch_log_batch_size = 100 ;
772
792
let purge_dirty_log_timeout_seconds = 60 ;
773
793
let job_expiry_seconds = 3600 ;
794
+ let max_failure_count = 3 ;
774
795
775
796
// Set assignment policy
776
797
let mut assignment_policy = Box :: new ( RendezvousHashingAssignmentPolicy :: default ( ) ) ;
@@ -786,6 +807,7 @@ mod tests {
786
807
assignment_policy,
787
808
HashSet :: new ( ) ,
788
809
job_expiry_seconds,
810
+ max_failure_count,
789
811
) ;
790
812
// Set memberlist
791
813
scheduler. set_memberlist ( vec ! [ my_member. clone( ) ] ) ;
@@ -863,6 +885,7 @@ mod tests {
863
885
completed_compactions. extend (
864
886
completed
865
887
. iter ( )
888
+ . filter ( |c| c. result . is_ok ( ) )
866
889
. map ( |c| c. collection_id )
867
890
. collect :: < Vec < CollectionUuid > > ( ) ,
868
891
) ;
0 commit comments