@@ -253,13 +253,19 @@ namespace NKikimr::NBsController {
253
253
struct TWithFaultyDisks {};
254
254
struct TWithInvalidLayout {};
255
255
256
+ enum class EReassignStatus : ui8 {
257
+ NotNeeded = 0 ,
258
+ Enqueued,
259
+ Active,
260
+ };
261
+
256
262
struct TGroupRecord
257
263
: TIntrusiveListItem<TGroupRecord, TWithFaultyDisks>
258
264
, TIntrusiveListItem<TGroupRecord, TWithInvalidLayout>
259
265
{
260
266
const TGroupId GroupId;
261
267
TEvControllerUpdateSelfHealInfo::TGroupContent Content;
262
- TActorId ReassignerActorId; // reassigner in flight
268
+ EReassignStatus ReassignStatus = EReassignStatus::NotNeeded;
263
269
TDuration RetryTimeout = MinRetryTimeout;
264
270
TMonotonic NextRetryTimestamp = TMonotonic::Zero();
265
271
std::shared_ptr<TBlobStorageGroupInfo::TTopology> Topology;
@@ -278,7 +284,8 @@ namespace NKikimr::NBsController {
278
284
THashMap<TGroupId, TGroupRecord> Groups;
279
285
TIntrusiveList<TGroupRecord, TWithFaultyDisks> GroupsWithFaultyDisks;
280
286
TIntrusiveList<TGroupRecord, TWithInvalidLayout> GroupsWithInvalidLayout;
281
- std::shared_ptr<std::atomic_uint64_t > UnreassignableGroups;
287
+ std::unordered_set<TGroupId> UnreassignableGroups;
288
+ std::shared_ptr<std::atomic_uint64_t > UnreassignableGroupsCount;
282
289
bool GroupLayoutSanitizerEnabled;
283
290
bool AllowMultipleRealmsOccupation;
284
291
bool DonorMode;
@@ -294,13 +301,17 @@ namespace NKikimr::NBsController {
294
301
static constexpr uint32_t GroupLayoutSanitizerOperationLogSize = 128 ;
295
302
TOperationLog<GroupLayoutSanitizerOperationLogSize> GroupLayoutSanitizerOperationLog;
296
303
304
+ std::deque<TGroupId> SelfHealReassignQueue;
305
+ std::deque<TGroupId> GroupLayoutSanitizerReassignQueue;
306
+ std::optional<TActorId> ActiveReassignerActorId = std::nullopt;
307
+
297
308
public:
298
309
TSelfHealActor (ui64 tabletId, std::shared_ptr<std::atomic_uint64_t > unreassignableGroups, THostRecordMap hostRecords,
299
310
bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode,
300
311
std::shared_ptr<TControlWrapper> enableSelfHealWithDegraded,
301
312
std::shared_ptr<std::atomic_uint64_t > groupsWithInvalidLayoutCounter)
302
313
: TabletId(tabletId)
303
- , UnreassignableGroups (std::move(unreassignableGroups))
314
+ , UnreassignableGroupsCount (std::move(unreassignableGroups))
304
315
, GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled)
305
316
, AllowMultipleRealmsOccupation(allowMultipleRealmsOccupation)
306
317
, DonorMode(donorMode)
@@ -385,8 +396,11 @@ namespace NKikimr::NBsController {
385
396
TGroupRecord& group = it->second ;
386
397
387
398
// kill reassigner, if it is working
388
- if (group.ReassignerActorId ) {
389
- Send (group.ReassignerActorId , new TEvents::TEvPoison);
399
+ if (group.ReassignStatus == EReassignStatus::Active) {
400
+ Y_DEBUG_ABORT_UNLESS (ActiveReassignerActorId);
401
+ if (ActiveReassignerActorId) {
402
+ Send (*ActiveReassignerActorId, new TEvents::TEvPoison);
403
+ }
390
404
}
391
405
392
406
// remove the group
@@ -422,49 +436,16 @@ namespace NKikimr::NBsController {
422
436
void CheckGroups () {
423
437
const TMonotonic now = TActivationContext::Monotonic ();
424
438
425
- ui64 counter = 0 ;
426
-
427
439
for (TGroupRecord& group : GroupsWithFaultyDisks) {
428
- if (group.ReassignerActorId || now < group.NextRetryTimestamp ) {
429
- continue ; // we are already running reassigner for this group
440
+ if (group.ReassignStatus != EReassignStatus::NotNeeded || now < group.NextRetryTimestamp ) {
441
+ continue ; // reassign is already enqueued
430
442
}
431
443
432
444
if (group.UpdateConfigTxSeqNo < group.ResponseConfigTxSeqNo ) {
433
445
continue ; // response from bsc was received before selfheal info update
434
446
}
435
-
436
- // check if it is possible to move anything out
437
- bool isSelfHealReasonDecommit;
438
- bool ignoreDegradedGroupsChecks;
439
- if (const auto v = FindVDiskToReplace (group.Content , now, group.Topology .get (), &isSelfHealReasonDecommit,
440
- &ignoreDegradedGroupsChecks)) {
441
- group.ReassignerActorId = Register (new TReassignerActor (ControllerId, group.GroupId , group.Content ,
442
- *v, group.Topology , isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
443
- } else {
444
- ++counter; // this group can't be reassigned right now
445
-
446
- auto log = [&]() {
447
- TStringStream ss;
448
- ss << " [" ;
449
- bool first = true ;
450
- for (const auto & [vdiskId, vdisk] : group.Content .VDisks ) {
451
- if (!std::exchange (first, false )) {
452
- ss << " ," ;
453
- }
454
- ss << " {" ;
455
- ss << vdiskId;
456
- ss << (IsReady (vdisk, now) ? " Ready" : " NotReady" );
457
- ss << (vdisk.Faulty ? " Faulty" : " " );
458
- ss << (vdisk.Bad ? " IsBad" : " " );
459
- ss << (vdisk.Decommitted ? " Decommitted" : " " );
460
- ss << " }" ;
461
- }
462
- ss << " ]" ;
463
- return ss.Str ();
464
- };
465
-
466
- STLOG (PRI_INFO, BS_SELFHEAL, BSSH11, " group can't be reassigned right now " << log (), (GroupId, group.GroupId ));
467
- }
447
+
448
+ EnqueueReassign (group, EGroupRepairOperation::SelfHeal);
468
449
}
469
450
470
451
if (GroupLayoutSanitizerEnabled) {
@@ -488,20 +469,19 @@ namespace NKikimr::NBsController {
488
469
}
489
470
490
471
Y_ABORT_UNLESS (!group.LayoutValid );
491
- if (group.ReassignerActorId || now < group.NextRetryTimestamp ) {
472
+ if (group.ReassignStatus != EReassignStatus::NotNeeded || now < group.NextRetryTimestamp ) {
492
473
// nothing to do
493
474
} else {
494
475
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG (GroupLayoutSanitizerOperationLog,
495
476
" Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content .Generation );
496
- group.ReassignerActorId = Register (new TReassignerActor (ControllerId, group.GroupId , group.Content ,
497
- std::nullopt, group.Topology , false /* isSelfHealReasonDecommit*/ ,
498
- false /* ignoreDegradedGroupsChecks*/ , DonorMode));
477
+ EnqueueReassign (group, EGroupRepairOperation::GroupLayoutSanitizer);
499
478
}
500
479
}
501
480
}
502
481
482
+ ProcessReassignQueues ();
503
483
GroupsWithInvalidLayoutCounter->store (GroupsWithInvalidLayout.Size ());
504
- UnreassignableGroups ->store (counter );
484
+ UnreassignableGroupsCount ->store (UnreassignableGroups. size () );
505
485
}
506
486
507
487
void UpdateGroupLayoutInformation (TGroupRecord& group) {
@@ -602,9 +582,13 @@ namespace NKikimr::NBsController {
602
582
}
603
583
604
584
void Handle (TEvReassignerDone::TPtr& ev) {
605
- if (const auto it = Groups.find (ev->Get ()->GroupId ); it != Groups.end () && it->second .ReassignerActorId == ev->Sender ) {
585
+ Y_ABORT_UNLESS (ActiveReassignerActorId);
586
+ TActorId reassigner = *std::exchange (ActiveReassignerActorId, std::nullopt);
587
+ Y_ABORT_UNLESS (reassigner == ev->Sender );
588
+
589
+ if (const auto it = Groups.find (ev->Get ()->GroupId ); it != Groups.end ()) {
606
590
auto & group = it->second ;
607
- group.ReassignerActorId = {} ;
591
+ group.ReassignStatus = EReassignStatus::NotNeeded ;
608
592
609
593
const TMonotonic now = TActivationContext::Monotonic ();
610
594
if (ev->Get ()->Success ) {
@@ -623,9 +607,9 @@ namespace NKikimr::NBsController {
623
607
" Sanitizing failed GroupId# " << group.GroupId << " ErrorReason# " << ev->Get ()->ErrorReason );
624
608
}
625
609
}
626
-
627
610
CheckGroups ();
628
611
}
612
+ ProcessReassignQueues ();
629
613
}
630
614
631
615
using TVDiskInfo = TEvControllerUpdateSelfHealInfo::TGroupContent::TVDiskInfo;
@@ -654,6 +638,101 @@ namespace NKikimr::NBsController {
654
638
Send (ev->Sender , new NMon::TEvRemoteHttpInfoRes (str.Str ()));
655
639
}
656
640
641
+ void ProcessReassignQueues () {
642
+ while (!ActiveReassignerActorId && !SelfHealReassignQueue.empty ()) {
643
+ TGroupId groupId = SelfHealReassignQueue.front ();
644
+ SelfHealReassignQueue.pop_front ();
645
+ CreateReassignerActorIfNeededForSelfHeal (groupId);
646
+ }
647
+
648
+ while (!ActiveReassignerActorId && !GroupLayoutSanitizerReassignQueue.empty ()) {
649
+ TGroupId groupId = GroupLayoutSanitizerReassignQueue.front ();
650
+ GroupLayoutSanitizerReassignQueue.pop_front ();
651
+ auto it = Groups.find (groupId);
652
+ if (it != Groups.end ()) {
653
+ TGroupRecord& group = it->second ;
654
+ CreateReassignerActor (group, std::nullopt, false , false );
655
+ }
656
+ }
657
+ }
658
+
659
+ bool CreateReassignerActorIfNeededForSelfHeal (TGroupId groupId) {
660
+ auto it = Groups.find (groupId);
661
+ if (it == Groups.end ()) {
662
+ // group is deleted
663
+ return false ;
664
+ }
665
+
666
+ TGroupRecord& group = it->second ;
667
+ if (group.ReassignStatus == EReassignStatus::NotNeeded) {
668
+ // Group is already fully healed
669
+ return false ;
670
+ }
671
+
672
+ // check if it is possible to move anything out
673
+ bool isSelfHealReasonDecommit;
674
+ bool ignoreDegradedGroupsChecks;
675
+ if (const std::optional<TVDiskID> vdiskId = FindVDiskToReplace (group.Content , TActivationContext::Monotonic (),
676
+ group.Topology .get (), &isSelfHealReasonDecommit, &ignoreDegradedGroupsChecks)) {
677
+ if (auto it = UnreassignableGroups.find (groupId); it != UnreassignableGroups.end ()) {
678
+ UnreassignableGroups.erase (it);
679
+ }
680
+ CreateReassignerActor (group, vdiskId, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks);
681
+ return true ;
682
+ } else {
683
+ // unable to reassign VDisk
684
+ UnreassignableGroups.insert (groupId);
685
+ group.ReassignStatus = EReassignStatus::NotNeeded;
686
+
687
+ TMonotonic now = TActivationContext::Monotonic ();
688
+ auto log = [&]() {
689
+ TStringStream ss;
690
+ ss << " [" ;
691
+ bool first = true ;
692
+ for (const auto & [vdiskId, vdisk] : group.Content .VDisks ) {
693
+ if (!std::exchange (first, false )) {
694
+ ss << " ," ;
695
+ }
696
+ ss << " {" ;
697
+ ss << vdiskId;
698
+ ss << (IsReady (vdisk, now) ? " Ready" : " NotReady" );
699
+ ss << (vdisk.Faulty ? " Faulty" : " " );
700
+ ss << (vdisk.Bad ? " IsBad" : " " );
701
+ ss << (vdisk.Decommitted ? " Decommitted" : " " );
702
+ ss << " }" ;
703
+ }
704
+ ss << " ]" ;
705
+ return ss.Str ();
706
+ };
707
+
708
+ STLOG (PRI_INFO, BS_SELFHEAL, BSSH11, " group can't be reassigned right now " << log (), (GroupId, groupId));
709
+ }
710
+ return false ;
711
+ }
712
+
713
+
714
+ void CreateReassignerActor (TGroupRecord& group, std::optional<TVDiskID> vdiskId, bool isSelfHealReasonDecommit,
715
+ bool ignoreDegradedGroupsChecks) {
716
+ group.ReassignStatus = EReassignStatus::Active;
717
+ Y_ABORT_UNLESS (!ActiveReassignerActorId);
718
+ ActiveReassignerActorId = Register (new TReassignerActor (ControllerId, group.GroupId , group.Content ,
719
+ vdiskId, group.Topology , isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
720
+ }
721
+
722
+ void EnqueueReassign (TGroupRecord& group, EGroupRepairOperation operation) {
723
+ group.ReassignStatus = EReassignStatus::Enqueued;
724
+ switch (operation) {
725
+ case EGroupRepairOperation::SelfHeal:
726
+ SelfHealReassignQueue.push_back (group.GroupId );
727
+ break ;
728
+ case EGroupRepairOperation::GroupLayoutSanitizer:
729
+ GroupLayoutSanitizerReassignQueue.push_back (group.GroupId );
730
+ break ;
731
+ default :
732
+ Y_ABORT (" Unknown operation" );
733
+ }
734
+ }
735
+
657
736
void RenderMonPage (IOutputStream& out, bool selfHealEnabled) {
658
737
HTML (out) {
659
738
TAG (TH2) {
0 commit comments