Skip to content

Commit 207f337

Browse files
authored
25-1: handle follower dying while being promoted to leader (#15441)
1 parent 1524617 commit 207f337

File tree

2 files changed

+77
-31
lines changed

2 files changed

+77
-31
lines changed

ydb/core/mind/hive/hive_ut.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3352,7 +3352,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33523352
}
33533353
}
33543354

3355-
Y_UNIT_TEST(TestFollowerPromotion) {
3355+
void TestFollowerPromotion(bool killDuringPromotion) {
33563356
constexpr int NODES = 3;
33573357
TTestBasicRuntime runtime(NODES, false);
33583358
Setup(runtime, true);
@@ -3383,30 +3383,42 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33833383
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesBefore[i]);
33843384
}
33853385
int leaders = std::accumulate(tabletRolesBefore.begin(), tabletRolesBefore.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3386-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
33873386
int leaderNode = std::find(tabletRolesBefore.begin(), tabletRolesBefore.end(), true) - tabletRolesBefore.begin();
3388-
// killing leader
3389-
SendKillLocal(runtime, leaderNode);
3387+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
33903388
{
3391-
TDispatchOptions options;
3392-
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus);
3393-
runtime.DispatchEvents(options);
3394-
}
3395-
std::array<bool, NODES> tabletRolesIntermediate = {};
3396-
for (int i = 0; i < NODES; ++i) {
3397-
if (i != leaderNode) {
3398-
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesIntermediate[i]);
3399-
} else {
3400-
tabletRolesIntermediate[i] = false;
3389+
TBlockEvents<TEvTablet::TEvPromoteToLeader> blockPromote(runtime);
3390+
// killing leader
3391+
SendKillLocal(runtime, leaderNode);
3392+
3393+
while (blockPromote.empty()) {
3394+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3395+
}
3396+
3397+
if (killDuringPromotion) {
3398+
for (int i = 0; i < NODES; ++i) {
3399+
if (i == leaderNode) {
3400+
continue;
3401+
}
3402+
TActorId sender = runtime.AllocateEdgeActor(i);
3403+
runtime.SendToPipe(tabletId, sender, new TEvents::TEvPoisonPill, i, pipeConfig);
3404+
}
34013405
}
3406+
3407+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3408+
3409+
blockPromote.Stop().Unblock();
3410+
}
3411+
{
3412+
TDispatchOptions options;
3413+
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, killDuringPromotion ? 3 : 1);
3414+
runtime.DispatchEvents(options, TDuration::MilliSeconds(100));
34023415
}
3403-
leaders = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3404-
int followers = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a : a + 1; });
3405-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3406-
UNIT_ASSERT_VALUES_EQUAL(followers, 2);
34073416
std::unordered_set<std::pair<TTabletId, TFollowerId>> activeTablets;
34083417
TActorId senderA = runtime.AllocateEdgeActor();
34093418
for (int i = 0; i < NODES; ++i) {
3419+
if (i == leaderNode) {
3420+
continue;
3421+
}
34103422
TActorId whiteboard = NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(i));
34113423
runtime.Send(new IEventHandle(whiteboard, senderA, new NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest()));
34123424
TAutoPtr<IEventHandle> handle;
@@ -3421,6 +3433,16 @@ Y_UNIT_TEST_SUITE(THiveTest) {
34213433
}
34223434
}
34233435
UNIT_ASSERT_VALUES_EQUAL(activeTablets.size(), 3);
3436+
leaders = std::count_if(activeTablets.begin(), activeTablets.end(), [](auto&& p) { return p.second == 0; });
3437+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3438+
}
3439+
3440+
Y_UNIT_TEST(TestFollowerPromotion) {
3441+
TestFollowerPromotion(false);
3442+
}
3443+
3444+
Y_UNIT_TEST(TestFollowerPromotionFollowerDies) {
3445+
TestFollowerPromotion(true);
34243446
}
34253447

34263448
Y_UNIT_TEST(TestManyFollowersOnOneNode) {

ydb/core/mind/local.cpp

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,19 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
5252
ui32 Generation;
5353
TTabletTypes::EType TabletType;
5454
NKikimrLocal::EBootMode BootMode;
55-
ui32 FollowerId;
5655

5756
TTablet()
5857
: Tablet()
5958
, Generation(0)
6059
, TabletType()
6160
, BootMode(NKikimrLocal::EBootMode::BOOT_MODE_LEADER)
62-
, FollowerId(0)
6361
{}
6462
};
6563

6664
struct TTabletEntry : TTablet {
6765
TInstant From;
66+
bool IsPromoting = false;
67+
ui32 PromotingFromFollower = 0;
6868

6969
TTabletEntry()
7070
: From(TInstant::MicroSeconds(0))
@@ -141,6 +141,10 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
141141
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelDemotedByBS;
142142
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelUnknownReason;
143143

144+
static TTabletId LeaderId(TTabletId tabletId) {
145+
return {tabletId.first, 0};
146+
}
147+
144148
void Die(const TActorContext &ctx) override {
145149
if (HivePipeClient) {
146150
if (Connected) {
@@ -385,6 +389,24 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
385389
ScheduleSendTabletMetrics(ctx);
386390
}
387391

392+
void StartPromotion(TTabletId tabletId, TOnlineTabletEntry& followerEntry, ui32 suggestedGen, TInstant now) {
393+
TTabletId leaderId = LeaderId(tabletId);
394+
TTabletEntry& leaderEntry = InbootTablets[leaderId];
395+
followerEntry.IsPromoting = true;
396+
leaderEntry = followerEntry;
397+
leaderEntry.From = now;
398+
leaderEntry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
399+
leaderEntry.Generation = suggestedGen;
400+
leaderEntry.PromotingFromFollower = tabletId.second;
401+
}
402+
403+
void FinishPromotion(TTabletId tabletId, TTabletEntry& entry) {
404+
TTabletId promotedTablet{tabletId.first, entry.PromotingFromFollower};
405+
OnlineTablets.erase(promotedTablet);
406+
entry.IsPromoting = false;
407+
entry.PromotingFromFollower = 0;
408+
}
409+
388410
void Handle(TEvLocal::TEvBootTablet::TPtr &ev, const TActorContext &ctx) {
389411
NKikimrLocal::TEvBootTablet &record = ev->Get()->Record;
390412
TIntrusivePtr<TTabletStorageInfo> info(TabletStorageInfoFromProto(record.GetInfo()));
@@ -427,18 +449,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
427449
if (it != OnlineTablets.end()) {
428450
if (it->second.BootMode == NKikimrLocal::EBootMode::BOOT_MODE_FOLLOWER
429451
&& record.GetBootMode() == NKikimrLocal::EBootMode::BOOT_MODE_LEADER) {
430-
// promote to leader
431-
it->second.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
432-
it->second.Generation = suggestedGen;
433-
tabletId.second = 0; // FollowerId = 0
434-
TTabletEntry &entry = InbootTablets[tabletId];
435-
entry = it->second;
436-
entry.From = ctx.Now();
437-
entry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
438-
entry.Generation = suggestedGen;
439-
ctx.Send(entry.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
452+
StartPromotion(tabletId, it->second, suggestedGen, ctx.Now());
453+
ctx.Send(it->second.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
440454
MarkDeadTablet(it->first, 0, TEvLocal::TEvTabletStatus::StatusSupersededByLeader, TEvTablet::TEvTabletDead::ReasonError, ctx);
441-
OnlineTablets.erase(it);
442455
LOG_DEBUG_S(ctx, NKikimrServices::LOCAL,
443456
"TLocalNodeRegistrar::Handle TEvLocal::TEvBootTablet follower tablet " << tabletId << " promoted to leader");
444457
return;
@@ -718,6 +731,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
718731
<< " marked as running at generation "
719732
<< generation);
720733
NTabletPipe::SendData(ctx, HivePipeClient, new TEvLocal::TEvTabletStatus(TEvLocal::TEvTabletStatus::StatusOk, tabletId, generation));
734+
if (inbootIt->second.IsPromoting) {
735+
FinishPromotion(tabletId, inbootIt->second);
736+
}
721737
OnlineTablets.emplace(tabletId, inbootIt->second);
722738
InbootTablets.erase(inbootIt);
723739
}
@@ -818,6 +834,14 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
818834
});
819835
if (onlineIt != OnlineTablets.end()) { // from online list
820836
MarkDeadTablet(onlineIt->first, generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
837+
if (onlineIt->second.IsPromoting) {
838+
TTabletId leader = LeaderId(onlineIt->first);
839+
auto inbootIt = InbootTablets.find(leader);
840+
if (inbootIt != InbootTablets.end()) {
841+
MarkDeadTablet(leader, inbootIt->second.Generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
842+
}
843+
InbootTablets.erase(inbootIt);
844+
}
821845
OnlineTablets.erase(onlineIt);
822846
UpdateEstimate();
823847
return;

0 commit comments

Comments
 (0)