Skip to content

Commit 0a59a8d

Browse files
authored
Add cluster state switching command (#18593)
1 parent 003d8e6 commit 0a59a8d

File tree

14 files changed

+107
-10
lines changed

14 files changed

+107
-10
lines changed

ydb/core/blobstorage/base/blobstorage_events.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
namespace NKikimr {
55

66
TEvNodeWardenStorageConfig::TEvNodeWardenStorageConfig(const NKikimrBlobStorage::TStorageConfig& config,
7-
const NKikimrBlobStorage::TStorageConfig *proposedConfig, bool selfManagementEnabled)
7+
const NKikimrBlobStorage::TStorageConfig *proposedConfig, bool selfManagementEnabled,
8+
bool isPrimary, bool isBeingPromoted)
89
: Config(std::make_unique<NKikimrBlobStorage::TStorageConfig>(config))
910
, ProposedConfig(proposedConfig
1011
? std::make_unique<NKikimrBlobStorage::TStorageConfig>(*proposedConfig)
1112
: nullptr)
1213
, SelfManagementEnabled(selfManagementEnabled)
14+
, IsPrimary(isPrimary)
15+
, IsBeingPromoted(isBeingPromoted)
1316
{}
1417

1518
TEvNodeWardenStorageConfig::~TEvNodeWardenStorageConfig()

ydb/core/blobstorage/base/blobstorage_events.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,9 +583,12 @@ namespace NKikimr {
583583
std::unique_ptr<NKikimrBlobStorage::TStorageConfig> Config;
584584
std::unique_ptr<NKikimrBlobStorage::TStorageConfig> ProposedConfig;
585585
bool SelfManagementEnabled;
586+
bool IsPrimary; // if bridge mode is enabled
587+
bool IsBeingPromoted; // if bridge mode is enabled
586588

587589
TEvNodeWardenStorageConfig(const NKikimrBlobStorage::TStorageConfig& config,
588-
const NKikimrBlobStorage::TStorageConfig *proposedConfig, bool selfManagementEnabled);
590+
const NKikimrBlobStorage::TStorageConfig *proposedConfig, bool selfManagementEnabled,
591+
bool isPrimary, bool isBeingPromoted);
589592
~TEvNodeWardenStorageConfig();
590593
};
591594

ydb/core/blobstorage/nodewarden/distconf.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "distconf.h"
22
#include "node_warden_impl.h"
33
#include <ydb/core/mind/dynamic_nameserver.h>
4+
#include <ydb/core/protos/bridge.pb.h>
45
#include <ydb/library/yaml_config/yaml_config_helpers.h>
56
#include <ydb/library/yaml_config/yaml_config.h>
67
#include <library/cpp/streams/zstd/zstd.h>
@@ -101,6 +102,23 @@ namespace NKikimr::NStorage {
101102
config.GetSelfManagementConfig().GetEnabled() &&
102103
config.GetGeneration();
103104

105+
if (config.HasClusterState()) {
106+
const auto& state = config.GetClusterState();
107+
bool found = false;
108+
for (const auto& node : config.GetAllNodes()) {
109+
if (node.GetNodeId() == SelfNode.NodeId() && node.HasBridgePileId()) {
110+
IsPrimary = node.GetBridgePileId() == state.GetPrimaryPile();
111+
IsBeingPromoted = !IsPrimary && node.GetBridgePileId() == state.GetPromotedPile();
112+
found = true;
113+
break;
114+
}
115+
}
116+
if (!found) {
117+
// TODO(alexvru): think about better handling of node removal
118+
IsPrimary = IsBeingPromoted = false;
119+
}
120+
}
121+
104122
StorageConfig.emplace(config);
105123
if (ProposedStorageConfig && ProposedStorageConfig->GetGeneration() <= StorageConfig->GetGeneration()) {
106124
ProposedStorageConfig.reset();
@@ -308,7 +326,8 @@ namespace NKikimr::NStorage {
308326
const NKikimrBlobStorage::TStorageConfig *proposedConfig = ProposedStorageConfig && SelfManagementEnabled
309327
? &ProposedStorageConfig.value()
310328
: nullptr;
311-
auto ev = std::make_unique<TEvNodeWardenStorageConfig>(*config, proposedConfig, SelfManagementEnabled);
329+
auto ev = std::make_unique<TEvNodeWardenStorageConfig>(*config, proposedConfig, SelfManagementEnabled,
330+
IsPrimary, IsBeingPromoted);
312331
Send(wardenId, ev.release(), 0, cookie);
313332
}
314333

ydb/core/blobstorage/nodewarden/distconf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ namespace NKikimr::NStorage {
175175
const bool IsSelfStatic = false;
176176
TIntrusivePtr<TNodeWardenConfig> Cfg;
177177
bool SelfManagementEnabled = false;
178+
bool IsPrimary = false;
179+
bool IsBeingPromoted = false;
178180

179181
// currently active storage config
180182
std::optional<NKikimrBlobStorage::TStorageConfig> StorageConfig;

ydb/core/blobstorage/nodewarden/distconf_invoke.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ namespace NKikimr::NStorage {
116116
void Handle(TEvBlobStorage::TEvControllerValidateConfigResponse::TPtr ev);
117117
void BootstrapCluster(const TString& selfAssemblyUUID);
118118

119+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
120+
// Bridge mode
121+
122+
void SwitchBridgeClusterState(const NKikimrBridge::TClusterState& newClusterState);
123+
119124
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
120125
// Configuration proposition
121126

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include "distconf_invoke.h"
2+
3+
#include <ydb/core/protos/bridge.pb.h>
4+
5+
namespace NKikimr::NStorage {
6+
7+
using TInvokeRequestHandlerActor = TDistributedConfigKeeper::TInvokeRequestHandlerActor;
8+
9+
void TInvokeRequestHandlerActor::SwitchBridgeClusterState(const NKikimrBridge::TClusterState& newClusterState) {
10+
if (!RunCommonChecks()) {
11+
return;
12+
} else if (!Self->Cfg->BridgeConfig) {
13+
return FinishWithError(TResult::ERROR, "Bridge mode is not enabled");
14+
}
15+
16+
// check new config alone
17+
const ui32 numPiles = Self->Cfg->BridgeConfig->PilesSize();
18+
if (newClusterState.PerPileStateSize() != numPiles) {
19+
return FinishWithError(TResult::ERROR, "incorrect number of per-pile states in new config");
20+
} else if (newClusterState.GetPrimaryPile() >= numPiles) {
21+
return FinishWithError(TResult::ERROR, "incorrect primary pile");
22+
} else if (newClusterState.GetPromotedPile() >= numPiles) {
23+
return FinishWithError(TResult::ERROR, "incorrect promoted pile");
24+
} else if (newClusterState.GetPerPileState(newClusterState.GetPrimaryPile()) != NKikimrBridge::TClusterState::SYNCHRONIZED) {
25+
return FinishWithError(TResult::ERROR, "incorrect primary pile state");
26+
} else if (newClusterState.GetPerPileState(newClusterState.GetPromotedPile()) != NKikimrBridge::TClusterState::SYNCHRONIZED) {
27+
return FinishWithError(TResult::ERROR, "incorrect promoted pile state");
28+
}
29+
30+
NKikimrBlobStorage::TStorageConfig config = *Self->StorageConfig;
31+
32+
if (config.HasClusterState()) {
33+
const NKikimrBridge::TClusterState& current = config.GetClusterState();
34+
Y_ABORT_UNLESS(current.PerPileStateSize() == numPiles);
35+
ui32 numDifferent = 0;
36+
for (ui32 i = 0; i < numPiles; ++i) {
37+
numDifferent += current.GetPerPileState(i) != newClusterState.GetPerPileState(i);
38+
}
39+
if (numDifferent > 1) {
40+
return FinishWithError(TResult::ERROR, "too many state changes in new configuration");
41+
}
42+
}
43+
44+
config.SetGeneration(config.GetGeneration() + 1);
45+
config.MutableClusterState()->CopyFrom(newClusterState);
46+
StartProposition(&config);
47+
}
48+
49+
} // NKikimr::NStorage

ydb/core/blobstorage/nodewarden/distconf_invoke_common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ namespace NKikimr::NStorage {
125125
case TQuery::kBootstrapCluster:
126126
return BootstrapCluster(record.GetBootstrapCluster().GetSelfAssemblyUUID());
127127

128+
case TQuery::kSwitchBridgeClusterState:
129+
return SwitchBridgeClusterState(record.GetSwitchBridgeClusterState().GetNewClusterState());
130+
128131
case TQuery::REQUEST_NOT_SET:
129132
return FinishWithError(TResult::ERROR, "Request field not set");
130133
}

ydb/core/blobstorage/nodewarden/node_warden_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,8 @@ namespace NKikimr::NStorage {
639639

640640
NKikimrBlobStorage::TStorageConfig StorageConfig;
641641
bool SelfManagementEnabled = false;
642+
bool IsPrimary = false;
643+
bool IsBeingPromoted = false;
642644
THashSet<TActorId> StorageConfigSubscribers;
643645

644646
void Handle(TEvNodeWardenQueryStorageConfig::TPtr ev);

ydb/core/blobstorage/nodewarden/node_warden_resource.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,19 @@ void TNodeWarden::ApplyServiceSet(const NKikimrBlobStorage::TNodeWardenServiceSe
7777
}
7878

7979
void TNodeWarden::Handle(TEvNodeWardenQueryStorageConfig::TPtr ev) {
80-
Send(ev->Sender, new TEvNodeWardenStorageConfig(StorageConfig, nullptr, SelfManagementEnabled));
80+
Send(ev->Sender, new TEvNodeWardenStorageConfig(StorageConfig, nullptr, SelfManagementEnabled, IsPrimary,
81+
IsBeingPromoted));
8182
if (ev->Get()->Subscribe) {
8283
StorageConfigSubscribers.insert(ev->Sender);
8384
}
8485
}
8586

8687
void TNodeWarden::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
87-
ev->Get()->Config->Swap(&StorageConfig);
88-
SelfManagementEnabled = ev->Get()->SelfManagementEnabled;
88+
auto *msg = ev->Get();
89+
msg->Config->Swap(&StorageConfig);
90+
SelfManagementEnabled = msg->SelfManagementEnabled;
91+
IsPrimary = msg->IsPrimary;
92+
IsBeingPromoted = msg->IsBeingPromoted;
8993

9094
if (StorageConfig.HasBlobStorageConfig()) {
9195
if (const auto& bsConfig = StorageConfig.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
@@ -111,7 +115,8 @@ void TNodeWarden::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
111115
}
112116

113117
for (const TActorId& subscriber : StorageConfigSubscribers) {
114-
Send(subscriber, new TEvNodeWardenStorageConfig(StorageConfig, nullptr, SelfManagementEnabled));
118+
Send(subscriber, new TEvNodeWardenStorageConfig(StorageConfig, nullptr, SelfManagementEnabled,
119+
IsPrimary, IsBeingPromoted));
115120
}
116121

117122
if (StorageConfig.HasConfigComposite()) {

ydb/core/blobstorage/nodewarden/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ SRCS(
1111
distconf_generate.cpp
1212
distconf_fsm.cpp
1313
distconf_invoke.h
14+
distconf_invoke_bridge.cpp
1415
distconf_invoke_common.cpp
1516
distconf_invoke_state_storage.cpp
1617
distconf_invoke_static_group.cpp

0 commit comments

Comments
 (0)