Skip to content

Commit 2532302

Browse files
authored
Automatic blobstorage balancing (#18856)
1 parent 9c75f0c commit 2532302

File tree

18 files changed

+716
-12
lines changed

18 files changed

+716
-12
lines changed

ydb/core/base/blobstorage_common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ namespace NKikimr{
1010
struct TBridgePileTag;
1111
using TBridgePileId = TIdWrapper<ui32, TBridgePileTag>;
1212

13+
inline bool IsDynamicGroup(TGroupId groupId) {
14+
return groupId.GetRawId() & 0x80000000;
15+
}
1316
}
1417

1518

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
#include <ydb/core/blobstorage/ut_blobstorage/lib/env.h>
2+
#include <ydb/core/util/random.h>
3+
4+
#include <library/cpp/iterator/enumerate.h>
5+
6+
struct TTestEnv {
7+
TTestEnv(ui32 nodeCount, TBlobStorageGroupType erasure, ui32 pdiskPerNode, ui32 groupCount)
8+
: Env({
9+
.NodeCount = nodeCount,
10+
.VDiskReplPausedAtStart = false,
11+
.Erasure = erasure,
12+
.ConfigPreprocessor = [](ui32, TNodeWardenConfig& conf) {
13+
auto* bscSettings = conf.BlobStorageConfig.MutableBscSettings();
14+
auto* clusterBalancingSettings = bscSettings->MutableClusterBalancingSettings();
15+
16+
clusterBalancingSettings->SetEnable(true);
17+
clusterBalancingSettings->SetMaxReplicatingPDisks(100);
18+
clusterBalancingSettings->SetMaxReplicatingVDisks(800);
19+
clusterBalancingSettings->SetIterationIntervalMs(TDuration::Seconds(1).MilliSeconds());
20+
},
21+
})
22+
{
23+
Env.CreateBoxAndPool(pdiskPerNode, groupCount);
24+
Env.Sim(TDuration::Minutes(1));
25+
// Uncomment to see cluster rebalancing logs
26+
// Env.Runtime->SetLogPriority(NKikimrServices::BS_CLUSTER_BALANCING, NActors::NLog::PRI_DEBUG);
27+
}
28+
29+
bool IsDynamicGroup(ui32 groupId) {
30+
return groupId & 0x80000000;
31+
}
32+
33+
std::unordered_map<TPDiskId, ui32> BuildPDiskUsageMap() {
34+
auto config = Env.FetchBaseConfig();
35+
36+
std::unordered_map<TPDiskId, ui32> pdiskUsageMap;
37+
38+
for (const auto& pdisk : config.pdisk()) {
39+
TPDiskId pdiskId(pdisk.GetNodeId(), pdisk.GetPDiskId());
40+
pdiskUsageMap[pdiskId] = pdisk.GetNumStaticSlots();
41+
}
42+
43+
for (const auto& vslot : config.vslot()) {
44+
if (!IsDynamicGroup(vslot.GetGroupId())) {
45+
continue;
46+
}
47+
48+
TPDiskId pdiskId(vslot.GetVSlotId().GetNodeId(), vslot.GetVSlotId().GetPDiskId());
49+
auto it = pdiskUsageMap.find(pdiskId);
50+
if (it == pdiskUsageMap.end()) {
51+
continue;
52+
}
53+
it->second += 1;
54+
}
55+
56+
return pdiskUsageMap;
57+
}
58+
59+
bool EachPDiskHasNVDisks(ui32 n) {
60+
auto usageMap = BuildPDiskUsageMap();
61+
62+
return std::all_of(usageMap.begin(), usageMap.end(), [&](std::pair<TPDiskId, ui32> val) { return val.second == n; });
63+
}
64+
65+
template<class TCondition>
66+
bool WaitFor(TCondition&& condition, size_t maxAttempts = 1) {
67+
for (size_t attempt = 0; attempt < maxAttempts; ++attempt) {
68+
if (condition()) {
69+
return true;
70+
}
71+
Env.Sim(TDuration::Minutes(1));
72+
}
73+
return false;
74+
}
75+
76+
TEnvironmentSetup* operator->() {
77+
return &Env;
78+
}
79+
80+
TEnvironmentSetup Env;
81+
};
82+
83+
Y_UNIT_TEST_SUITE(ClusterBalancing) {
84+
85+
Y_UNIT_TEST(ClusterBalancingEvenDistribution) {
86+
TTestEnv env(8, TBlobStorageGroupType::Erasure4Plus2Block, 1, 2);
87+
88+
UNIT_ASSERT(env.EachPDiskHasNVDisks(2));
89+
90+
env->AlterBox(1, 2);
91+
92+
bool success = env.WaitFor([&] {
93+
return env.EachPDiskHasNVDisks(1);
94+
}, 10);
95+
96+
UNIT_ASSERT(success);
97+
}
98+
99+
Y_UNIT_TEST(ClusterBalancingEvenDistributionNotPossible) {
100+
TTestEnv env(8, TBlobStorageGroupType::Erasure4Plus2Block, 1, 3);
101+
102+
UNIT_ASSERT(env.EachPDiskHasNVDisks(3));
103+
104+
env->AlterBox(1, 2);
105+
106+
auto check = [&] {
107+
auto usageMap = env.BuildPDiskUsageMap();
108+
std::unordered_map<ui32, ui32> countByUsage;
109+
for (const auto& [pdiskId, usage] : usageMap) {
110+
countByUsage[usage]++;
111+
}
112+
// There is now total of 16 PDisks, 8 of them should be used by 2 VDisks and 8 of them should be used by 1 VDisk.
113+
// This is the best distribution possible.
114+
return countByUsage[1] == 8 && countByUsage[2] == 8;
115+
};
116+
117+
bool success = env.WaitFor(check, 10);
118+
119+
UNIT_ASSERT(success);
120+
121+
auto usageMap1 = env.BuildPDiskUsageMap();
122+
123+
env.Env.Runtime->FilterFunction = [&](ui32, std::unique_ptr<IEventHandle>& ev) {
124+
if (ev->GetTypeRewrite() == TEvBlobStorage::TEvControllerConfigResponse::EventType) {
125+
const auto& response = ev->Get<TEvBlobStorage::TEvControllerConfigResponse>()->Record.GetResponse();
126+
127+
const auto& status = response.GetStatus(0);
128+
129+
UNIT_ASSERT_VALUES_EQUAL(0, status.ReassignedItemSize());
130+
}
131+
return true;
132+
};
133+
134+
env.Env.Sim(TDuration::Seconds(5));
135+
136+
// Check that the cluster balancing doesn't do anything now.
137+
// Optimal distribution is already achieved, any reassignment will only move data around for no reason.
138+
UNIT_ASSERT(check());
139+
140+
auto usageMap2 = env.BuildPDiskUsageMap();
141+
142+
UNIT_ASSERT(usageMap1 == usageMap2);
143+
}
144+
}

ydb/core/blobstorage/ut_blobstorage/lib/env.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,41 @@ struct TEnvironmentSetup {
596596
UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription());
597597
}
598598

599+
void AlterBox(ui32 itemConfigGeneration, ui32 numDrivesPerNode = 0, ui32 numStorageNodes = 0,
600+
NKikimrBlobStorage::EPDiskType pdiskType = NKikimrBlobStorage::EPDiskType::ROT) {
601+
NKikimrBlobStorage::TConfigRequest request;
602+
603+
auto *cmd = request.AddCommand()->MutableDefineHostConfig();
604+
cmd->SetHostConfigId(1);
605+
for (ui32 j = 0; j < (numDrivesPerNode ? numDrivesPerNode : DrivesPerNode); ++j) {
606+
auto *drive = cmd->AddDrive();
607+
drive->SetPath(Sprintf("SectorMap:%" PRIu32 ":1000", j));
608+
drive->SetType(pdiskType);
609+
}
610+
611+
cmd->SetItemConfigGeneration(itemConfigGeneration);
612+
613+
cmd = request.AddCommand()->MutableDefineHostConfig();
614+
cmd->SetHostConfigId(2);
615+
616+
cmd->SetItemConfigGeneration(itemConfigGeneration);
617+
618+
auto *cmd1 = request.AddCommand()->MutableDefineBox();
619+
cmd1->SetBoxId(1);
620+
ui32 index = 0;
621+
for (ui32 nodeId : Runtime->GetNodes()) {
622+
auto *host = cmd1->AddHost();
623+
host->MutableKey()->SetNodeId(nodeId);
624+
host->SetHostConfigId(numStorageNodes == 0 || index < numStorageNodes ? 1 : 2);
625+
++index;
626+
}
627+
628+
cmd1->SetItemConfigGeneration(itemConfigGeneration);
629+
630+
auto response = Invoke(request);
631+
UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription());
632+
}
633+
599634
void CreatePoolInBox(ui32 boxId, ui32 poolId, TString poolName, ui32 defaultGroupSizeInUnits = 0) {
600635
NKikimrBlobStorage::TConfigRequest request;
601636

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
UNITTEST_FOR(ydb/core/blobstorage/ut_blobstorage)
2+
3+
SIZE(MEDIUM)
4+
5+
FORK_SUBTESTS()
6+
7+
SRCS(
8+
cluster_balancing.cpp
9+
)
10+
11+
PEERDIR(
12+
ydb/core/blobstorage/ut_blobstorage/lib
13+
)
14+
15+
END()

ydb/core/blobstorage/ut_blobstorage/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,5 @@ RECURSE_FOR_TESTS(
8282
ut_restart_pdisk
8383
ut_read_only_pdisk
8484
ut_stop_pdisk
85+
ut_cluster_balancing
8586
)

ydb/core/mind/bscontroller/bsc.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "config.h"
33
#include "self_heal.h"
44
#include "sys_view.h"
5+
#include "cluster_balancing.h"
56
#include "console_interaction.h"
67
#include "group_geometry_info.h"
78
#include "group_layout_checker.h"
@@ -547,6 +548,12 @@ void TBlobStorageController::SetHostRecords(THostRecordMap hostRecords) {
547548
}
548549
Y_ABORT_UNLESS(!SelfHealId);
549550
SelfHealId = Register(CreateSelfHealActor());
551+
552+
ClusterBalancingSettings = ParseClusterBalancingSettings(StorageConfig);
553+
if (ClusterBalancingSettings.Enable) {
554+
ClusterBalanceActorId = Register(CreateClusterBalancingActor(SelfId(), ClusterBalancingSettings));
555+
}
556+
550557
PushStaticGroupsToSelfHeal();
551558
Execute(CreateTxInitScheme());
552559
}
@@ -711,7 +718,7 @@ void TBlobStorageController::PassAway() {
711718
ResponsivenessPinger->Detach(TActivationContext::ActorContextFor(ResponsivenessActorID));
712719
ResponsivenessPinger = nullptr;
713720
}
714-
for (TActorId *ptr : {&SelfHealId, &StatProcessorActorId, &SystemViewsCollectorId}) {
721+
for (TActorId *ptr : {&SelfHealId, &StatProcessorActorId, &SystemViewsCollectorId, &ClusterBalanceActorId}) {
715722
if (const TActorId actorId = std::exchange(*ptr, {})) {
716723
TActivationContext::Send(new IEventHandle(TEvents::TSystem::Poison, 0, actorId, SelfId(), nullptr, 0));
717724
}

0 commit comments

Comments
 (0)