Skip to content

Commit a05a0b9

Browse files
authored
Limit SelfHeal reassign requests in-flight (#17618) (#18298)
2 parents a3e836e + bb7c9d4 commit a05a0b9

File tree

3 files changed

+197
-50
lines changed

3 files changed

+197
-50
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#include <ydb/core/blobstorage/ut_blobstorage/lib/env.h>
2+
3+
Y_UNIT_TEST_SUITE(SelfHeal) {
4+
void TestReassignThrottling() {
5+
const TBlobStorageGroupType erasure = TBlobStorageGroupType::ErasureMirror3dc;
6+
const ui32 groupsCount = 32;
7+
8+
TEnvironmentSetup env({
9+
.NodeCount = erasure.BlobSubgroupSize(),
10+
.Erasure = erasure,
11+
});
12+
13+
// create 2 pdisks per node to allow self-healings and
14+
// allocate groups
15+
env.CreateBoxAndPool(2, groupsCount);
16+
env.Sim(TDuration::Minutes(1));
17+
18+
auto base = env.FetchBaseConfig();
19+
UNIT_ASSERT_VALUES_EQUAL(base.GroupSize(), groupsCount);
20+
21+
ui32 maxReassignsInFlight = 0;
22+
23+
std::set<TActorId> reassignersInFlight;
24+
25+
auto catchReassigns = [&](ui32 /*nodeId*/, std::unique_ptr<IEventHandle>& ev) {
26+
if (ev->GetTypeRewrite() == TEvBlobStorage::TEvControllerConfigRequest::EventType) {
27+
const auto& request = ev->Get<TEvBlobStorage::TEvControllerConfigRequest>()->Record.GetRequest();
28+
for (const auto& command : request.GetCommand()) {
29+
if (command.GetCommandCase() == NKikimrBlobStorage::TConfigRequest::TCommand::kReassignGroupDisk) {
30+
UNIT_ASSERT(!reassignersInFlight.contains(ev->Sender));
31+
reassignersInFlight.insert(ev->Sender);
32+
maxReassignsInFlight = std::max(maxReassignsInFlight, (ui32)reassignersInFlight.size());
33+
}
34+
}
35+
} else if (ev->GetTypeRewrite() == TEvBlobStorage::TEvControllerConfigResponse::EventType) {
36+
auto it = reassignersInFlight.find(ev->Recipient);
37+
if (it != reassignersInFlight.end()) {
38+
reassignersInFlight.erase(it);
39+
}
40+
}
41+
return true;
42+
};
43+
44+
env.Runtime->FilterFunction = catchReassigns;
45+
46+
auto pdisk = base.GetPDisk(0);
47+
// set FAULTY status on the chosen PDisk
48+
{
49+
NKikimrBlobStorage::TConfigRequest request;
50+
auto* cmd = request.AddCommand()->MutableUpdateDriveStatus();
51+
cmd->MutableHostKey()->SetNodeId(pdisk.GetNodeId());
52+
cmd->SetPDiskId(pdisk.GetPDiskId());
53+
cmd->SetStatus(NKikimrBlobStorage::FAULTY);
54+
auto res = env.Invoke(request);
55+
UNIT_ASSERT_C(res.GetSuccess(), res.GetErrorDescription());
56+
UNIT_ASSERT_C(res.GetStatus(0).GetSuccess(), res.GetStatus(0).GetErrorDescription());
57+
}
58+
59+
env.Sim(TDuration::Minutes(15));
60+
61+
UNIT_ASSERT_C(maxReassignsInFlight == 1, "maxReassignsInFlight# " << maxReassignsInFlight);
62+
}
63+
64+
Y_UNIT_TEST(ReassignThrottling) {
65+
TestReassignThrottling();
66+
}
67+
}

ydb/core/blobstorage/ut_blobstorage/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ SRCS(
4141
recovery.cpp
4242
sanitize_groups.cpp
4343
scrub_fast.cpp
44+
self_heal.cpp
4445
shred.cpp
4546
snapshots.cpp
4647
space_check.cpp

ydb/core/mind/bscontroller/self_heal.cpp

Lines changed: 129 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,19 @@ namespace NKikimr::NBsController {
253253
struct TWithFaultyDisks {};
254254
struct TWithInvalidLayout {};
255255

256+
enum class EReassignStatus : ui8 {
257+
NotNeeded = 0,
258+
Enqueued,
259+
Active,
260+
};
261+
256262
struct TGroupRecord
257263
: TIntrusiveListItem<TGroupRecord, TWithFaultyDisks>
258264
, TIntrusiveListItem<TGroupRecord, TWithInvalidLayout>
259265
{
260266
const TGroupId GroupId;
261267
TEvControllerUpdateSelfHealInfo::TGroupContent Content;
262-
TActorId ReassignerActorId; // reassigner in flight
268+
EReassignStatus ReassignStatus = EReassignStatus::NotNeeded;
263269
TDuration RetryTimeout = MinRetryTimeout;
264270
TMonotonic NextRetryTimestamp = TMonotonic::Zero();
265271
std::shared_ptr<TBlobStorageGroupInfo::TTopology> Topology;
@@ -278,7 +284,8 @@ namespace NKikimr::NBsController {
278284
THashMap<TGroupId, TGroupRecord> Groups;
279285
TIntrusiveList<TGroupRecord, TWithFaultyDisks> GroupsWithFaultyDisks;
280286
TIntrusiveList<TGroupRecord, TWithInvalidLayout> GroupsWithInvalidLayout;
281-
std::shared_ptr<std::atomic_uint64_t> UnreassignableGroups;
287+
std::unordered_set<TGroupId> UnreassignableGroups;
288+
std::shared_ptr<std::atomic_uint64_t> UnreassignableGroupsCount;
282289
bool GroupLayoutSanitizerEnabled;
283290
bool AllowMultipleRealmsOccupation;
284291
bool DonorMode;
@@ -294,13 +301,17 @@ namespace NKikimr::NBsController {
294301
static constexpr uint32_t GroupLayoutSanitizerOperationLogSize = 128;
295302
TOperationLog<GroupLayoutSanitizerOperationLogSize> GroupLayoutSanitizerOperationLog;
296303

304+
std::deque<TGroupId> SelfHealReassignQueue;
305+
std::deque<TGroupId> GroupLayoutSanitizerReassignQueue;
306+
std::optional<TActorId> ActiveReassignerActorId = std::nullopt;
307+
297308
public:
298309
TSelfHealActor(ui64 tabletId, std::shared_ptr<std::atomic_uint64_t> unreassignableGroups, THostRecordMap hostRecords,
299310
bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode,
300311
std::shared_ptr<TControlWrapper> enableSelfHealWithDegraded,
301312
std::shared_ptr<std::atomic_uint64_t> groupsWithInvalidLayoutCounter)
302313
: TabletId(tabletId)
303-
, UnreassignableGroups(std::move(unreassignableGroups))
314+
, UnreassignableGroupsCount(std::move(unreassignableGroups))
304315
, GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled)
305316
, AllowMultipleRealmsOccupation(allowMultipleRealmsOccupation)
306317
, DonorMode(donorMode)
@@ -385,8 +396,11 @@ namespace NKikimr::NBsController {
385396
TGroupRecord& group = it->second;
386397

387398
// kill reassigner, if it is working
388-
if (group.ReassignerActorId) {
389-
Send(group.ReassignerActorId, new TEvents::TEvPoison);
399+
if (group.ReassignStatus == EReassignStatus::Active) {
400+
Y_DEBUG_ABORT_UNLESS(ActiveReassignerActorId);
401+
if (ActiveReassignerActorId) {
402+
Send(*ActiveReassignerActorId, new TEvents::TEvPoison);
403+
}
390404
}
391405

392406
// remove the group
@@ -422,49 +436,16 @@ namespace NKikimr::NBsController {
422436
void CheckGroups() {
423437
const TMonotonic now = TActivationContext::Monotonic();
424438

425-
ui64 counter = 0;
426-
427439
for (TGroupRecord& group : GroupsWithFaultyDisks) {
428-
if (group.ReassignerActorId || now < group.NextRetryTimestamp) {
429-
continue; // we are already running reassigner for this group
440+
if (group.ReassignStatus != EReassignStatus::NotNeeded || now < group.NextRetryTimestamp) {
441+
continue; // reassign is already enqueued
430442
}
431443

432444
if (group.UpdateConfigTxSeqNo < group.ResponseConfigTxSeqNo) {
433445
continue; // response from bsc was received before selfheal info update
434446
}
435-
436-
// check if it is possible to move anything out
437-
bool isSelfHealReasonDecommit;
438-
bool ignoreDegradedGroupsChecks;
439-
if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit,
440-
&ignoreDegradedGroupsChecks)) {
441-
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
442-
*v, group.Topology, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
443-
} else {
444-
++counter; // this group can't be reassigned right now
445-
446-
auto log = [&]() {
447-
TStringStream ss;
448-
ss << "[";
449-
bool first = true;
450-
for (const auto& [vdiskId, vdisk] : group.Content.VDisks) {
451-
if (!std::exchange(first, false)) {
452-
ss << ",";
453-
}
454-
ss << "{";
455-
ss << vdiskId;
456-
ss << (IsReady(vdisk, now) ? " Ready" : " NotReady");
457-
ss << (vdisk.Faulty ? " Faulty" : "");
458-
ss << (vdisk.Bad ? " IsBad" : "");
459-
ss << (vdisk.Decommitted ? " Decommitted" : "");
460-
ss << "}";
461-
}
462-
ss << "]";
463-
return ss.Str();
464-
};
465-
466-
STLOG(PRI_INFO, BS_SELFHEAL, BSSH11, "group can't be reassigned right now " << log(), (GroupId, group.GroupId));
467-
}
447+
448+
EnqueueReassign(group, EGroupRepairOperation::SelfHeal);
468449
}
469450

470451
if (GroupLayoutSanitizerEnabled) {
@@ -488,20 +469,19 @@ namespace NKikimr::NBsController {
488469
}
489470

490471
Y_ABORT_UNLESS(!group.LayoutValid);
491-
if (group.ReassignerActorId || now < group.NextRetryTimestamp) {
472+
if (group.ReassignStatus != EReassignStatus::NotNeeded || now < group.NextRetryTimestamp) {
492473
// nothing to do
493474
} else {
494475
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(GroupLayoutSanitizerOperationLog,
495476
"Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content.Generation);
496-
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
497-
std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/,
498-
false /*ignoreDegradedGroupsChecks*/, DonorMode));
477+
EnqueueReassign(group, EGroupRepairOperation::GroupLayoutSanitizer);
499478
}
500479
}
501480
}
502481

482+
ProcessReassignQueues();
503483
GroupsWithInvalidLayoutCounter->store(GroupsWithInvalidLayout.Size());
504-
UnreassignableGroups->store(counter);
484+
UnreassignableGroupsCount->store(UnreassignableGroups.size());
505485
}
506486

507487
void UpdateGroupLayoutInformation(TGroupRecord& group) {
@@ -602,9 +582,13 @@ namespace NKikimr::NBsController {
602582
}
603583

604584
void Handle(TEvReassignerDone::TPtr& ev) {
605-
if (const auto it = Groups.find(ev->Get()->GroupId); it != Groups.end() && it->second.ReassignerActorId == ev->Sender) {
585+
Y_ABORT_UNLESS(ActiveReassignerActorId);
586+
TActorId reassigner = *std::exchange(ActiveReassignerActorId, std::nullopt);
587+
Y_ABORT_UNLESS(reassigner == ev->Sender);
588+
589+
if (const auto it = Groups.find(ev->Get()->GroupId); it != Groups.end()) {
606590
auto& group = it->second;
607-
group.ReassignerActorId = {};
591+
group.ReassignStatus = EReassignStatus::NotNeeded;
608592

609593
const TMonotonic now = TActivationContext::Monotonic();
610594
if (ev->Get()->Success) {
@@ -623,9 +607,9 @@ namespace NKikimr::NBsController {
623607
"Sanitizing failed GroupId# " << group.GroupId << " ErrorReason# " << ev->Get()->ErrorReason);
624608
}
625609
}
626-
627610
CheckGroups();
628611
}
612+
ProcessReassignQueues();
629613
}
630614

631615
using TVDiskInfo = TEvControllerUpdateSelfHealInfo::TGroupContent::TVDiskInfo;
@@ -654,6 +638,101 @@ namespace NKikimr::NBsController {
654638
Send(ev->Sender, new NMon::TEvRemoteHttpInfoRes(str.Str()));
655639
}
656640

641+
void ProcessReassignQueues() {
642+
while (!ActiveReassignerActorId && !SelfHealReassignQueue.empty()) {
643+
TGroupId groupId = SelfHealReassignQueue.front();
644+
SelfHealReassignQueue.pop_front();
645+
CreateReassignerActorIfNeededForSelfHeal(groupId);
646+
}
647+
648+
while (!ActiveReassignerActorId && !GroupLayoutSanitizerReassignQueue.empty()) {
649+
TGroupId groupId = GroupLayoutSanitizerReassignQueue.front();
650+
GroupLayoutSanitizerReassignQueue.pop_front();
651+
auto it = Groups.find(groupId);
652+
if (it != Groups.end()) {
653+
TGroupRecord& group = it->second;
654+
CreateReassignerActor(group, std::nullopt, false, false);
655+
}
656+
}
657+
}
658+
659+
bool CreateReassignerActorIfNeededForSelfHeal(TGroupId groupId) {
660+
auto it = Groups.find(groupId);
661+
if (it == Groups.end()) {
662+
// group is deleted
663+
return false;
664+
}
665+
666+
TGroupRecord& group = it->second;
667+
if (group.ReassignStatus == EReassignStatus::NotNeeded) {
668+
// Group is already fully healed
669+
return false;
670+
}
671+
672+
// check if it is possible to move anything out
673+
bool isSelfHealReasonDecommit;
674+
bool ignoreDegradedGroupsChecks;
675+
if (const std::optional<TVDiskID> vdiskId = FindVDiskToReplace(group.Content, TActivationContext::Monotonic(),
676+
group.Topology.get(), &isSelfHealReasonDecommit, &ignoreDegradedGroupsChecks)) {
677+
if (auto it = UnreassignableGroups.find(groupId); it != UnreassignableGroups.end()) {
678+
UnreassignableGroups.erase(it);
679+
}
680+
CreateReassignerActor(group, vdiskId, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks);
681+
return true;
682+
} else {
683+
// unable to reassign VDisk
684+
UnreassignableGroups.insert(groupId);
685+
group.ReassignStatus = EReassignStatus::NotNeeded;
686+
687+
TMonotonic now = TActivationContext::Monotonic();
688+
auto log = [&]() {
689+
TStringStream ss;
690+
ss << "[";
691+
bool first = true;
692+
for (const auto& [vdiskId, vdisk] : group.Content.VDisks) {
693+
if (!std::exchange(first, false)) {
694+
ss << ",";
695+
}
696+
ss << "{";
697+
ss << vdiskId;
698+
ss << (IsReady(vdisk, now) ? " Ready" : " NotReady");
699+
ss << (vdisk.Faulty ? " Faulty" : "");
700+
ss << (vdisk.Bad ? " IsBad" : "");
701+
ss << (vdisk.Decommitted ? " Decommitted" : "");
702+
ss << "}";
703+
}
704+
ss << "]";
705+
return ss.Str();
706+
};
707+
708+
STLOG(PRI_INFO, BS_SELFHEAL, BSSH11, "group can't be reassigned right now " << log(), (GroupId, groupId));
709+
}
710+
return false;
711+
}
712+
713+
714+
void CreateReassignerActor(TGroupRecord& group, std::optional<TVDiskID> vdiskId, bool isSelfHealReasonDecommit,
715+
bool ignoreDegradedGroupsChecks) {
716+
group.ReassignStatus = EReassignStatus::Active;
717+
Y_ABORT_UNLESS(!ActiveReassignerActorId);
718+
ActiveReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
719+
vdiskId, group.Topology, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
720+
}
721+
722+
void EnqueueReassign(TGroupRecord& group, EGroupRepairOperation operation) {
723+
group.ReassignStatus = EReassignStatus::Enqueued;
724+
switch (operation) {
725+
case EGroupRepairOperation::SelfHeal:
726+
SelfHealReassignQueue.push_back(group.GroupId);
727+
break;
728+
case EGroupRepairOperation::GroupLayoutSanitizer:
729+
GroupLayoutSanitizerReassignQueue.push_back(group.GroupId);
730+
break;
731+
default:
732+
Y_ABORT("Unknown operation");
733+
}
734+
}
735+
657736
void RenderMonPage(IOutputStream& out, bool selfHealEnabled) {
658737
HTML(out) {
659738
TAG(TH2) {

0 commit comments

Comments
 (0)