Skip to content

Commit 093fd1f

Browse files
vporyadkeAleksandr Zalialov
andauthored
actually tie per-dc-followers to dc (#8069)
Co-authored-by: Aleksandr Zalialov <zalyalov@mr-nvme-testing-01.search.yandex.net>
1 parent 28b32eb commit 093fd1f

File tree

21 files changed

+321
-109
lines changed

21 files changed

+321
-109
lines changed

ydb/core/mind/hive/data_center_info.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include "hive.h"
2+
#include "hive_events.h"
3+
4+
namespace NKikimr {
5+
namespace NHive {
6+
7+
struct TDataCenterInfo {
8+
using TFullFollowerGroupId = std::pair<TTabletId, TFollowerGroupId>;
9+
using TFollowerIter = TList<TFollowerTabletInfo>::iterator; // list iterators are not invalidated
10+
11+
std::unordered_set<TNodeId> RegisteredNodes;
12+
bool UpdateScheduled = false;
13+
std::unordered_map<TFullFollowerGroupId, std::vector<TFollowerIter>> Followers;
14+
15+
bool IsRegistered() const {
16+
return !RegisteredNodes.empty();
17+
}
18+
};
19+
20+
} // NHive
21+
} // NKikimr

ydb/core/mind/hive/follower_group.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,17 @@ struct TFollowerGroup {
6767
}
6868
}
6969

70+
ui32 GetFollowerCountForDataCenter(const TDataCenterId& dc) const {
71+
if (!FollowerCountPerDataCenter) {
72+
return 0;
73+
}
74+
if (NodeFilter.IsAllowedDataCenter(dc)) {
75+
return FollowerCount;
76+
} else {
77+
return 0;
78+
}
79+
}
80+
7081
void SetFollowerCount(ui32 followerCount) {
7182
FollowerCount = followerCount;
7283
}

ydb/core/mind/hive/hive.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,11 @@ NMetrics::EResource GetDominantResourceType(const TResourceNormalizedValues& nor
8282
}
8383

8484
TNodeFilter::TNodeFilter(const THive& hive)
85-
: Hive(hive)
85+
: Hive(&hive)
8686
{}
8787

8888
TArrayRef<const TSubDomainKey> TNodeFilter::GetEffectiveAllowedDomains() const {
89-
const auto* objectDomainInfo = Hive.FindDomain(ObjectDomain);
89+
const auto* objectDomainInfo = Hive->FindDomain(ObjectDomain);
9090

9191
if (!objectDomainInfo) {
9292
return {AllowedDomains.begin(), AllowedDomains.end()};
@@ -100,6 +100,13 @@ TArrayRef<const TSubDomainKey> TNodeFilter::GetEffectiveAllowedDomains() const {
100100
}
101101
}
102102

103+
bool TNodeFilter::IsAllowedDataCenter(TDataCenterId dc) const {
104+
if (AllowedDataCenters.empty()) {
105+
return true;
106+
}
107+
return std::find(AllowedDataCenters.begin(), AllowedDataCenters.end(), dc) != AllowedDataCenters.end();
108+
}
109+
103110
template <typename K, typename V>
104111
std::unordered_map<V, K> MakeReverseMap(const std::unordered_map<K, V>& map) {
105112
std::unordered_map<V, K> result;

ydb/core/mind/hive/hive.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,13 @@ struct TNodeFilter {
323323
TSubDomainKey ObjectDomain;
324324
TTabletTypes::EType TabletType = TTabletTypes::TypeInvalid;
325325

326-
const THive& Hive;
326+
const THive* Hive;
327327

328328
explicit TNodeFilter(const THive& hive);
329329

330330
TArrayRef<const TSubDomainKey> GetEffectiveAllowedDomains() const;
331+
332+
bool IsAllowedDataCenter(TDataCenterId dc) const;
331333
};
332334

333335
} // NHive

ydb/core/mind/hive/hive_events.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct TEvPrivate {
3333
EvStorageBalancerOut,
3434
EvDeleteNode,
3535
EvCanMoveTablets,
36+
EvUpdateDataCenterFollowers,
3637
EvEnd
3738
};
3839

@@ -120,6 +121,12 @@ struct TEvPrivate {
120121
};
121122

122123
struct TEvCanMoveTablets : TEventLocal<TEvCanMoveTablets, EvCanMoveTablets> {};
124+
125+
struct TEvUpdateDataCenterFollowers : TEventLocal<TEvUpdateDataCenterFollowers, EvUpdateDataCenterFollowers> {
126+
TDataCenterId DataCenter;
127+
128+
TEvUpdateDataCenterFollowers(TDataCenterId dataCenter) : DataCenter(dataCenter) {};
129+
};
123130
};
124131

125132
} // NHive

ydb/core/mind/hive/hive_impl.cpp

Lines changed: 81 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ TInstant THive::GetAllowedBootingTime() {
206206
return result;
207207
}
208208

209-
void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
209+
void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb&, TSideEffects& sideEffects) {
210210
TInstant now = TActivationContext::Now();
211211
if (WarmUp) {
212212
TInstant allowed = GetAllowedBootingTime();
@@ -257,10 +257,6 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec
257257
sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay"));
258258
}
259259
tablet->ActorsToNotifyOnRestart.clear();
260-
if (tablet->IsFollower()) {
261-
TLeaderTabletInfo& leader = tablet->GetLeader();
262-
UpdateTabletFollowersNumber(leader, db, sideEffects);
263-
}
264260
BootQueue.AddToWaitQueue(record); // waiting for new node
265261
continue;
266262
}
@@ -786,16 +782,11 @@ void THive::Handle(TEvInterconnect::TEvNodeInfo::TPtr &ev) {
786782
}
787783

788784
void THive::Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) {
789-
THashSet<TDataCenterId> dataCenters;
790785
for (const TEvInterconnect::TNodeInfo& node : ev->Get()->Nodes) {
791786
NodesInfo[node.NodeId] = node;
792-
dataCenters.insert(node.Location.GetDataCenterId());
793-
}
794-
dataCenters.erase(0); // remove default data center id if exists
795-
if (!dataCenters.empty()) {
796-
if (DataCenters != dataCenters.size()) {
797-
DataCenters = dataCenters.size();
798-
BLOG_D("TEvInterconnect::TEvNodesInfo DataCenters=" << DataCenters << " RegisteredDataCenters=" << RegisteredDataCenters);
787+
auto dataCenterId = node.Location.GetDataCenterId();
788+
if (dataCenterId != 0) {
789+
DataCenters[dataCenterId]; // just create entry in hash map
799790
}
800791
}
801792
Execute(CreateLoadEverything());
@@ -2737,81 +2728,97 @@ void THive::SendReconnect(const TActorId& local) {
27372728
}
27382729

27392730
ui32 THive::GetDataCenters() {
2740-
return DataCenters ? DataCenters : 1;
2741-
}
2742-
2743-
ui32 THive::GetRegisteredDataCenters() {
2744-
return RegisteredDataCenters ? RegisteredDataCenters : 1;
2745-
}
2746-
2747-
void THive::UpdateRegisteredDataCenters() {
2748-
if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) {
2749-
BLOG_D("THive (UpdateRegisteredDC) DataCenters=" << DataCenters << " RegisteredDataCenters=" << RegisteredDataCenters << "->" << RegisteredDataCenterNodes.size());
2750-
RegisteredDataCenters = RegisteredDataCenterNodes.size();
2751-
}
2731+
return DataCenters.size() ? DataCenters.size() : 1;
27522732
}
27532733

27542734
void THive::AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId) {
2735+
BLOG_D("AddRegisteredDataCentersNode(" << dataCenterId << ", " << nodeId << ")");
27552736
if (dataCenterId != 0) { // ignore default data center id if exists
2756-
if (RegisteredDataCenterNodes[dataCenterId].insert(nodeId).second) {
2757-
if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) {
2758-
UpdateRegisteredDataCenters();
2759-
}
2737+
auto& dataCenter = DataCenters[dataCenterId];
2738+
bool wasRegistered = dataCenter.IsRegistered();
2739+
dataCenter.RegisteredNodes.insert(nodeId);
2740+
if (!wasRegistered && !dataCenter.UpdateScheduled) {
2741+
dataCenter.UpdateScheduled = true;
2742+
Schedule(TDuration::Seconds(1), new TEvPrivate::TEvUpdateDataCenterFollowers(dataCenterId));
27602743
}
27612744
}
27622745
}
27632746

27642747
void THive::RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId) {
2748+
BLOG_D("RemoveRegisteredDataCentersNode(" << dataCenterId << ", " << nodeId << ")");
27652749
if (dataCenterId != 0) { // ignore default data center id if exists
2766-
RegisteredDataCenterNodes[dataCenterId].erase(nodeId);
2767-
if (RegisteredDataCenterNodes[dataCenterId].size() == 0) {
2768-
RegisteredDataCenterNodes.erase(dataCenterId);
2769-
}
2770-
if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) {
2771-
UpdateRegisteredDataCenters();
2750+
auto& dataCenter = DataCenters[dataCenterId];
2751+
bool wasRegistered = dataCenter.IsRegistered();
2752+
dataCenter.RegisteredNodes.erase(nodeId);
2753+
if (wasRegistered && !dataCenter.IsRegistered() && !dataCenter.UpdateScheduled) {
2754+
dataCenter.UpdateScheduled = true;
2755+
Schedule(TDuration::Seconds(1), new TEvPrivate::TEvUpdateDataCenterFollowers(dataCenterId));
27722756
}
27732757
}
27742758
}
27752759

2776-
void THive::UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
2777-
BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " RegisteredDataCenters=" << GetRegisteredDataCenters());
2778-
for (TFollowerGroup& group : tablet.FollowerGroups) {
2779-
ui32 followerCount = tablet.GetActualFollowerCount(group.Id);
2780-
ui32 requiredFollowerCount = group.GetComputedFollowerCount(GetRegisteredDataCenters());
2781-
2782-
while (followerCount < requiredFollowerCount) {
2783-
BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " is increasing number of followers (" << followerCount << "<" << requiredFollowerCount << ")");
2784-
2785-
TFollowerTabletInfo& follower = tablet.AddFollower(group);
2786-
follower.Statistics.SetLastAliveTimestamp(TlsActivationContext->Now().MilliSeconds());
2787-
db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Update(
2788-
NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
2789-
NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
2790-
NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics));
2791-
follower.InitTabletMetrics();
2792-
follower.BecomeStopped();
2793-
++followerCount;
2794-
}
2760+
void THive::CreateTabletFollowers(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
2761+
BLOG_D("CreateTabletFollowers Tablet " << tablet.ToString());
27952762

2796-
while (followerCount > requiredFollowerCount) {
2797-
BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " is decreasing number of followers (" << followerCount << ">" << requiredFollowerCount << ")");
2763+
// In case tablet already has followers (happens if tablet is modified through CreateTablet), delete them
2764+
// But create new ones before deleting old ones, to avoid issues with reusing ids
2765+
decltype(tablet.Followers)::iterator oldFollowersIt;
2766+
if (tablet.Followers.empty()) {
2767+
oldFollowersIt = tablet.Followers.end();
2768+
} else {
2769+
oldFollowersIt = std::prev(tablet.Followers.end());
2770+
}
27982771

2799-
auto itFollower = tablet.Followers.rbegin();
2800-
while (itFollower != tablet.Followers.rend() && itFollower->FollowerGroup.Id != group.Id) {
2801-
++itFollower;
2772+
for (TFollowerGroup& group : tablet.FollowerGroups) {
2773+
if (group.FollowerCountPerDataCenter) {
2774+
for (auto& [dataCenterId, dataCenter] : DataCenters) {
2775+
if (!dataCenter.IsRegistered()) {
2776+
continue;
2777+
}
2778+
for (ui32 i = 0; i < group.GetFollowerCountForDataCenter(dataCenterId); ++i) {
2779+
TFollowerTabletInfo& follower = tablet.AddFollower(group);
2780+
follower.NodeFilter.AllowedDataCenters = {dataCenterId};
2781+
follower.Statistics.SetLastAliveTimestamp(TlsActivationContext->Now().MilliSeconds());
2782+
db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Update(
2783+
NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
2784+
NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
2785+
NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics),
2786+
NIceDb::TUpdate<Schema::TabletFollowerTablet::DataCenter>(dataCenterId));
2787+
follower.InitTabletMetrics();
2788+
follower.BecomeStopped();
2789+
dataCenter.Followers[{tablet.Id, group.Id}].push_back(std::prev(tablet.Followers.end()));
2790+
BLOG_D("Created follower " << follower.GetFullTabletId() << " for dc " << dataCenterId);
2791+
}
28022792
}
2803-
if (itFollower == tablet.Followers.rend()) {
2804-
break;
2793+
} else {
2794+
for (ui32 i = 0; i < group.GetRawFollowerCount(); ++i) {
2795+
TFollowerTabletInfo& follower = tablet.AddFollower(group);
2796+
follower.Statistics.SetLastAliveTimestamp(TlsActivationContext->Now().MilliSeconds());
2797+
db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Update(
2798+
NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
2799+
NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
2800+
NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics));
2801+
follower.InitTabletMetrics();
2802+
follower.BecomeStopped();
2803+
BLOG_D("Created follower " << follower.GetFullTabletId());
28052804
}
2806-
TFollowerTabletInfo& follower = *itFollower;
2807-
db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Delete();
2808-
db.Table<Schema::Metrics>().Key(tablet.Id, follower.Id).Delete();
2809-
follower.InitiateStop(sideEffects);
2810-
tablet.Followers.erase(std::prev(itFollower.base()));
2811-
UpdateCounterTabletsTotal(-1);
2812-
--followerCount;
2805+
28132806
}
28142807
}
2808+
2809+
if (oldFollowersIt == tablet.Followers.end()) {
2810+
return;
2811+
}
2812+
auto endIt = std::next(oldFollowersIt);
2813+
for (auto followerIt = tablet.Followers.begin(); followerIt != endIt; ++followerIt) {
2814+
TFollowerTabletInfo& follower = *followerIt;
2815+
BLOG_D("Deleting follower " << follower.GetFullTabletId());
2816+
db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Delete();
2817+
db.Table<Schema::Metrics>().Key(tablet.Id, follower.Id).Delete();
2818+
follower.InitiateStop(sideEffects);
2819+
UpdateCounterTabletsTotal(-1);
2820+
}
2821+
tablet.Followers.erase(tablet.Followers.begin(), endIt);
28152822
}
28162823

28172824
TDuration THive::GetBalancerCooldown(EBalancerType balancerType) const {
@@ -3036,6 +3043,7 @@ void THive::ProcessEvent(std::unique_ptr<IEventHandle> event) {
30363043
hFunc(TEvHive::TEvUpdateDomain, Handle);
30373044
hFunc(TEvPrivate::TEvDeleteNode, Handle);
30383045
hFunc(TEvHive::TEvRequestTabletDistribution, Handle);
3046+
hFunc(TEvPrivate::TEvUpdateDataCenterFollowers, Handle);
30393047
}
30403048
}
30413049

@@ -3138,6 +3146,7 @@ STFUNC(THive::StateWork) {
31383146
fFunc(TEvPrivate::TEvProcessStorageBalancer::EventType, EnqueueIncomingEvent);
31393147
fFunc(TEvPrivate::TEvDeleteNode::EventType, EnqueueIncomingEvent);
31403148
fFunc(TEvHive::TEvRequestTabletDistribution::EventType, EnqueueIncomingEvent);
3149+
fFunc(TEvPrivate::TEvUpdateDataCenterFollowers::EventType, EnqueueIncomingEvent);
31413150
hFunc(TEvPrivate::TEvProcessIncomingEvent, Handle);
31423151
default:
31433152
if (!HandleDefaultEvents(ev, SelfId())) {
@@ -3435,6 +3444,10 @@ void THive::Handle(TEvHive::TEvRequestTabletDistribution::TPtr& ev) {
34353444
Send(ev->Sender, response.release());
34363445
}
34373446

3447+
void THive::Handle(TEvPrivate::TEvUpdateDataCenterFollowers::TPtr& ev) {
3448+
Execute(CreateUpdateDcFollowers(ev->Get()->DataCenter));
3449+
}
3450+
34383451
TVector<TNodeId> THive::GetNodesForWhiteboardBroadcast(size_t maxNodesToReturn) {
34393452
TVector<TNodeId> nodes;
34403453
TNodeId selfNodeId = SelfId().NodeId();

ydb/core/mind/hive/hive_impl.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "sequencer.h"
5555
#include "boot_queue.h"
5656
#include "object_distribution.h"
57+
#include "data_center_info.h"
5758

5859
#define DEPRECATED_CTX (ActorContext())
5960
#define DEPRECATED_NOW (TActivationContext::Now())
@@ -239,6 +240,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
239240
friend class TTxUpdateTabletGroups;
240241
friend class TTxMonEvent_TabletAvailability;
241242
friend class TLoggedMonTransaction;
243+
friend class TTxUpdateDcFollowers;
242244

243245
friend class TDeleteTabletActor;
244246

@@ -301,6 +303,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
301303
ITransaction* CreateRequestTabletOwners(TEvHive::TEvRequestTabletOwners::TPtr event);
302304
ITransaction* CreateUpdateTabletsObject(TEvHive::TEvUpdateTabletsObject::TPtr event);
303305
ITransaction* CreateUpdateDomain(TSubDomainKey subdomainKey, TEvHive::TEvUpdateDomain::TPtr event = {});
306+
ITransaction* CreateUpdateDcFollowers(const TDataCenterId& dc);
304307

305308
public:
306309
TDomainsView DomainsView;
@@ -329,8 +332,6 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
329332
ui32 ConfigurationGeneration = 0;
330333
ui64 TabletsTotal = 0;
331334
ui64 TabletsAlive = 0;
332-
ui32 DataCenters = 1;
333-
ui32 RegisteredDataCenters = 1;
334335
TObjectDistributions ObjectDistributions;
335336
double StorageScatter = 0;
336337
std::set<TTabletTypes::EType> SeenTabletTypes;
@@ -448,7 +449,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
448449
TDuration NodeBrokerEpoch;
449450
std::unordered_map<TTabletTypes::EType, NKikimrConfig::THiveTabletLimit> TabletLimit; // built from CurrentConfig
450451
std::unordered_map<TTabletTypes::EType, NKikimrHive::TDataCentersPreference> DefaultDataCentersPreference;
451-
std::unordered_map<TDataCenterId, std::unordered_set<TNodeId>> RegisteredDataCenterNodes;
452+
std::unordered_map<TDataCenterId, TDataCenterInfo> DataCenters;
452453
std::unordered_set<TNodeId> ConnectedNodes;
453454

454455
// normalized to be sorted list of unique values
@@ -576,6 +577,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
576577
void Handle(TEvHive::TEvUpdateDomain::TPtr& ev);
577578
void Handle(TEvPrivate::TEvDeleteNode::TPtr& ev);
578579
void Handle(TEvHive::TEvRequestTabletDistribution::TPtr& ev);
580+
void Handle(TEvPrivate::TEvUpdateDataCenterFollowers::TPtr& ev);
579581

580582
protected:
581583
void RestartPipeTx(ui64 tabletId);
@@ -680,8 +682,6 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId
680682
void FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabletId, const TLeaderTabletInfo* info, const NKikimrHive::TEvRequestHiveInfo& req);
681683
void ExecuteStartTablet(TFullTabletId tabletId, const TActorId& local, ui64 cookie, bool external);
682684
ui32 GetDataCenters();
683-
ui32 GetRegisteredDataCenters();
684-
void UpdateRegisteredDataCenters();
685685
void AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId);
686686
void RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId);
687687
void QueuePing(const TActorId& local);
@@ -694,7 +694,7 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId
694694
void StopTablet(const TActorId& local, const TTabletInfo& tablet);
695695
void StopTablet(const TActorId& local, TFullTabletId tabletId);
696696
void ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects);
697-
void UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects);
697+
void CreateTabletFollowers(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects);
698698
TDuration GetBalancerCooldown(EBalancerType balancerType) const;
699699
void UpdateObjectCount(const TLeaderTabletInfo& tablet, const TNodeInfo& node, i64 diff);
700700
ui64 GetObjectImbalance(TFullObjectId object);

0 commit comments

Comments
 (0)