Skip to content

Commit 70de92e

Browse files
authored
Support Fetch and Replace operations for storage_config.yaml (#9412)
1 parent ad3aeba commit 70de92e

File tree

11 files changed

+348
-39
lines changed

11 files changed

+348
-39
lines changed

ydb/core/blobstorage/nodewarden/distconf_invoke.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#include "distconf.h"
22
#include "node_warden_impl.h"
33

4+
#include <ydb/library/yaml_config/yaml_config_parser.h>
5+
#include <library/cpp/streams/zstd/zstd.h>
6+
47
namespace NKikimr::NStorage {
58

69
class TDistributedConfigKeeper::TInvokeRequestHandlerActor : public TActorBootstrapped<TInvokeRequestHandlerActor> {
@@ -147,6 +150,12 @@ namespace NKikimr::NStorage {
147150
case TQuery::kAdvanceGeneration:
148151
return AdvanceGeneration();
149152

153+
case TQuery::kFetchStorageConfig:
154+
return FetchStorageConfig();
155+
156+
case TQuery::kReplaceStorageConfig:
157+
return ReplaceStorageConfig(record.GetReplaceStorageConfig().GetYAML());
158+
150159
case TQuery::REQUEST_NOT_SET:
151160
return FinishWithError(TResult::ERROR, "Request field not set");
152161
}
@@ -601,6 +610,54 @@ namespace NKikimr::NStorage {
601610
StartProposition(&config);
602611
}
603612

613+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
614+
// Storage configuration YAML manipulation
615+
616+
void FetchStorageConfig() {
617+
if (!Self->StorageConfig) {
618+
FinishWithError(TResult::ERROR, "no agreed StorageConfig");
619+
} else if (!Self->StorageConfig->HasStorageConfigCompressedYAML()) {
620+
FinishWithError(TResult::ERROR, "no stored YAML for storage config");
621+
} else {
622+
auto ev = PrepareResult(TResult::OK, std::nullopt);
623+
auto *record = &ev->Record;
624+
TStringInput ss(Self->StorageConfig->GetStorageConfigCompressedYAML());
625+
record->MutableFetchStorageConfig()->SetYAML(TZstdDecompress(&ss).ReadAll());
626+
Finish(Sender, SelfId(), ev.release(), 0, Cookie);
627+
}
628+
}
629+
630+
void ReplaceStorageConfig(const TString& yaml) {
631+
if (!RunCommonChecks()) {
632+
return;
633+
}
634+
635+
NKikimrConfig::TAppConfig appConfig;
636+
try {
637+
appConfig = NKikimr::NYaml::Parse(yaml);
638+
} catch (const std::exception& ex) {
639+
return FinishWithError(TResult::ERROR, TStringBuilder() << "exception while parsing YAML: " << ex.what());
640+
}
641+
642+
TString errorReason;
643+
NKikimrBlobStorage::TStorageConfig config(*Self->StorageConfig);
644+
const bool success = DeriveStorageConfig(appConfig, &config, &errorReason);
645+
if (!success) {
646+
return FinishWithError(TResult::ERROR, TStringBuilder() << "error while deriving StorageConfig: "
647+
<< errorReason);
648+
}
649+
650+
TStringStream ss;
651+
{
652+
TZstdCompress zstd(&ss);
653+
zstd << yaml;
654+
}
655+
config.SetStorageConfigCompressedYAML(ss.Str());
656+
657+
config.SetGeneration(config.GetGeneration() + 1);
658+
StartProposition(&config);
659+
}
660+
604661
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
605662
// Configuration proposition
606663

ydb/core/blobstorage/nodewarden/node_warden_impl.cpp

Lines changed: 177 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "node_warden_impl.h"
2+
#include "distconf.h"
23

34
#include <ydb/core/blobstorage/crypto/secured_block.h>
45
#include <ydb/core/blobstorage/dsproxy/dsproxy_request_reporting.h>
@@ -232,20 +233,13 @@ void TNodeWarden::Bootstrap() {
232233
// determine if we are running in 'mock' mode
233234
EnableProxyMock = Cfg->BlobStorageConfig.GetServiceSet().GetEnableProxyMock();
234235

235-
// fill in a storage config
236-
StorageConfig.MutableBlobStorageConfig()->CopyFrom(Cfg->BlobStorageConfig);
237-
for (const auto& node : Cfg->NameserviceConfig.GetNode()) {
238-
auto *r = StorageConfig.AddAllNodes();
239-
r->SetHost(node.GetInterconnectHost());
240-
r->SetPort(node.GetPort());
241-
r->SetNodeId(node.GetNodeId());
242-
if (node.HasLocation()) {
243-
r->MutableLocation()->CopyFrom(node.GetLocation());
244-
} else if (node.HasWalleLocation()) {
245-
r->MutableLocation()->CopyFrom(node.GetWalleLocation());
246-
}
247-
}
248-
StorageConfig.SetClusterUUID(Cfg->NameserviceConfig.GetClusterUUID());
236+
// fill in a base storage config (from the file)
237+
NKikimrConfig::TAppConfig appConfig;
238+
appConfig.MutableBlobStorageConfig()->CopyFrom(Cfg->BlobStorageConfig);
239+
appConfig.MutableNameserviceConfig()->CopyFrom(Cfg->NameserviceConfig);
240+
TString errorReason;
241+
const bool success = DeriveStorageConfig(appConfig, &StorageConfig, &errorReason);
242+
Y_VERIFY_S(success, "failed to generate initial TStorageConfig: " << errorReason);
249243

250244
// Start a statically configured set
251245
if (Cfg->BlobStorageConfig.HasServiceSet()) {
@@ -831,6 +825,175 @@ bool NKikimr::ObtainPDiskKey(NPDisk::TMainKey *mainKey, const NKikimrProto::TKey
831825
return true;
832826
}
833827

828+
bool NKikimr::NStorage::DeriveStorageConfig(const NKikimrConfig::TAppConfig& appConfig,
829+
NKikimrBlobStorage::TStorageConfig *config, TString *errorReason) {
830+
// copy blob storage config
831+
if (!appConfig.HasBlobStorageConfig()) {
832+
*errorReason = "original config missing mandatory BlobStorageConfig section";
833+
return false;
834+
}
835+
836+
const auto& bsFrom = appConfig.GetBlobStorageConfig();
837+
auto *bsTo = config->MutableBlobStorageConfig();
838+
if (bsFrom.HasAutoconfigSettings()) {
839+
const auto& acFrom = bsFrom.GetAutoconfigSettings();
840+
auto *acTo = bsTo->MutableAutoconfigSettings();
841+
if (acFrom.HasGeneration() && acTo->HasGeneration() && acTo->GetGeneration() + 1 != acFrom.GetGeneration()) {
842+
*errorReason = TStringBuilder() << "generation mismatch for AutoconfigSettings section existing Generation# "
843+
<< acTo->GetGeneration() << " newly provided Generation# " << acFrom.GetGeneration();
844+
return false;
845+
} else if (acTo->HasGeneration() && !acFrom.HasGeneration()) {
846+
*errorReason = "existing AutoconfigSettings has set generation, but newly provided one doesn't have it";
847+
return false;
848+
}
849+
850+
acTo->CopyFrom(acFrom);
851+
} else {
852+
bsTo->ClearAutoconfigSettings();
853+
}
854+
855+
if (bsFrom.HasServiceSet()) {
856+
const auto& ssFrom = bsFrom.GetServiceSet();
857+
auto *ssTo = bsTo->MutableServiceSet();
858+
859+
ssTo->MutableAvailabilityDomains()->CopyFrom(ssFrom.GetAvailabilityDomains());
860+
if (ssFrom.HasReplBrokerConfig()) {
861+
ssTo->MutableReplBrokerConfig()->CopyFrom(ssFrom.GetReplBrokerConfig());
862+
}
863+
if (!ssTo->PDisksSize() && !ssTo->VDisksSize() && !ssTo->GroupsSize()) {
864+
ssTo->MutablePDisks()->CopyFrom(ssFrom.GetPDisks());
865+
ssTo->MutableVDisks()->CopyFrom(ssFrom.GetVDisks());
866+
ssTo->MutableGroups()->CopyFrom(ssFrom.GetGroups());
867+
} else {
868+
NProtoBuf::util::MessageDifferencer differ;
869+
870+
auto error = [&](auto&& key, const char *error) {
871+
*errorReason = TStringBuilder() << key() << ' ' << error;
872+
return false;
873+
};
874+
875+
auto pdiskKey = [](const auto *item) {
876+
return TStringBuilder() << "PDisk NodeId# " << item->GetNodeID() << " PDiskId# " << item->GetPDiskID();
877+
};
878+
879+
auto vdiskKey = [](const auto *item) {
880+
return TStringBuilder() << "VDisk NodeId# " << item->GetNodeID() << " PDiskId# " << item->GetPDiskID()
881+
<< " VDiskSlotId# " << item->GetVDiskSlotID();
882+
};
883+
884+
auto groupKey = [](const auto *item) {
885+
return TStringBuilder() << "group GroupId# " << item->GetGroupID();
886+
};
887+
888+
auto duplicateKey = [&](auto&& key) { return error(std::move(key), "duplicate key in existing StorageConfig"); };
889+
auto removed = [&](auto&& key) { return error(std::move(key), "was removed from BlobStorageConfig of newly provided configuration"); };
890+
auto mismatch = [&](auto&& key) { return error(std::move(key), "configuration item mismatch"); };
891+
892+
THashMap<std::tuple<ui32, ui32>, const NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk*> pdiskMap;
893+
for (const auto& item : ssTo->GetPDisks()) {
894+
if (const auto [it, inserted] = pdiskMap.emplace(std::make_tuple(item.GetNodeID(), item.GetPDiskID()),
895+
&item); !inserted) {
896+
return duplicateKey(std::bind(pdiskKey, &item));
897+
}
898+
}
899+
for (const auto& item : ssFrom.GetPDisks()) {
900+
if (const auto it = pdiskMap.find(std::make_tuple(item.GetNodeID(), item.GetPDiskID())); it == pdiskMap.end()) {
901+
return removed(std::bind(pdiskKey, &item));
902+
} else if (!differ.Equals(item, *it->second)) {
903+
return mismatch(std::bind(pdiskKey, &item));
904+
} else {
905+
pdiskMap.erase(it);
906+
}
907+
}
908+
if (!pdiskMap.empty()) {
909+
*errorReason = "some PDisks were added in newly provided configuration";
910+
return false;
911+
}
912+
913+
THashMap<std::tuple<ui32, ui32, ui32>, const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk*> vdiskMap;
914+
for (const auto& item : ssTo->GetVDisks()) {
915+
if (!item.HasVDiskLocation()) {
916+
*errorReason = "VDisk in existing StorageConfig doesn't have VDiskLocation field set";
917+
return false;
918+
}
919+
const auto& loc = item.GetVDiskLocation();
920+
if (const auto [it, inserted] = vdiskMap.emplace(std::make_tuple(loc.GetNodeID(), loc.GetPDiskID(),
921+
loc.GetVDiskSlotID()), &item); !inserted) {
922+
return duplicateKey(std::bind(vdiskKey, &loc));
923+
}
924+
}
925+
for (const auto& item : ssFrom.GetVDisks()) {
926+
if (!item.HasVDiskLocation()) {
927+
*errorReason = "VDisk in newly provided configuration doesn't have VDiskLocation field set";
928+
return false;
929+
}
930+
const auto& loc = item.GetVDiskLocation();
931+
if (const auto it = vdiskMap.find(std::make_tuple(loc.GetNodeID(), loc.GetPDiskID(),
932+
loc.GetVDiskSlotID())); it == vdiskMap.end()) {
933+
return removed(std::bind(vdiskKey, &loc));
934+
} else if (!differ.Equals(item, *it->second)) {
935+
return mismatch(std::bind(vdiskKey, &loc));
936+
} else {
937+
vdiskMap.erase(it);
938+
}
939+
}
940+
if (!vdiskMap.empty()) {
941+
*errorReason = "some VDisks were added in newly provided configuration";
942+
return false;
943+
}
944+
945+
THashMap<ui32, const NKikimrBlobStorage::TGroupInfo*> groupMap;
946+
for (const auto& item : ssTo->GetGroups()) {
947+
if (const auto [it, inserted] = groupMap.emplace(item.GetGroupID(), &item); !inserted) {
948+
return duplicateKey(std::bind(groupKey, &item));
949+
}
950+
}
951+
for (const auto& item : ssFrom.GetGroups()) {
952+
if (const auto it = groupMap.find(item.GetGroupID()); it == groupMap.end()) {
953+
return removed(std::bind(groupKey, &item));
954+
} else if (!differ.Equals(item, *it->second)) {
955+
return mismatch(std::bind(groupKey, &item));
956+
} else {
957+
groupMap.erase(it);
958+
}
959+
}
960+
if (!groupMap.empty()) {
961+
*errorReason = "some groups were added in newly provided configuration";
962+
return false;
963+
}
964+
}
965+
}
966+
967+
// copy nameservice-related things
968+
if (!appConfig.HasNameserviceConfig()) {
969+
*errorReason = "origin config missing mandatory NameserviceConfig section";
970+
return false;
971+
}
972+
973+
const auto& nsFrom = appConfig.GetNameserviceConfig();
974+
auto *nodes = config->MutableAllNodes();
975+
976+
// just copy AllNodes from TAppConfig into TStorageConfig
977+
nodes->Clear();
978+
for (const auto& node : nsFrom.GetNode()) {
979+
auto *r = nodes->Add();
980+
r->SetHost(node.GetInterconnectHost());
981+
r->SetPort(node.GetPort());
982+
r->SetNodeId(node.GetNodeId());
983+
if (node.HasLocation()) {
984+
r->MutableLocation()->CopyFrom(node.GetLocation());
985+
} else if (node.HasWalleLocation()) {
986+
r->MutableLocation()->CopyFrom(node.GetWalleLocation());
987+
}
988+
}
989+
990+
// and copy ClusterUUID from there too
991+
config->SetClusterUUID(nsFrom.GetClusterUUID());
992+
993+
// TODO(alexvru): apply SS, SSB, SB configs from there too
994+
995+
return true;
996+
}
834997

835998
bool NKikimr::ObtainStaticKey(TEncryptionKey *key) {
836999
// TODO(cthulhu): Replace this with real data

ydb/core/blobstorage/nodewarden/node_warden_impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,9 @@ namespace NKikimr::NStorage {
718718
}
719719
};
720720

721+
bool DeriveStorageConfig(const NKikimrConfig::TAppConfig& appConfig, NKikimrBlobStorage::TStorageConfig *config,
722+
TString *errorReason);
723+
721724
}
722725

723726
template<>

ydb/core/blobstorage/nodewarden/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ PEERDIR(
4242
ydb/core/blobstorage/pdisk
4343
ydb/core/control
4444
ydb/library/pdisk_io
45+
ydb/library/yaml_config
4546
)
4647

4748
END()

ydb/core/mind/bscontroller/bsc.cpp

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,6 @@ void TBlobStorageController::OnActivateExecutor(const TActorContext&) {
108108
ResponsivenessActorID = RegisterWithSameMailbox(ResponsivenessPinger);
109109
}
110110

111-
// request node list
112-
Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes(true));
113-
114111
// create storage pool stats monitor
115112
StoragePoolStat = std::make_unique<TStoragePoolStat>(GetServiceCounters(AppData()->Counters, "storage_pool_stat"));
116113

@@ -134,7 +131,9 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
134131
const TMonotonic mono = TActivationContext::Monotonic();
135132

136133
if (StorageConfig.HasBlobStorageConfig()) {
137-
if (const auto& bsConfig = StorageConfig.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
134+
const auto& bsConfig = StorageConfig.GetBlobStorageConfig();
135+
136+
if (bsConfig.HasServiceSet()) {
138137
const auto& ss = bsConfig.GetServiceSet();
139138
for (const auto& pdisk : ss.GetPDisks()) {
140139
const TPDiskId pdiskId(pdisk.GetNodeID(), pdisk.GetPDiskID());
@@ -156,10 +155,23 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
156155
} else {
157156
Y_FAIL("no storage configuration provided");
158157
}
158+
159+
if (bsConfig.HasAutoconfigSettings()) {
160+
// assuming that in autoconfig mode HostRecords are managed by the distconf; we need to apply it here to
161+
// avoid race with box autoconfiguration and node list change
162+
HostRecords = std::make_shared<THostRecordMap::element_type>(StorageConfig);
163+
if (SelfHealId) {
164+
Send(SelfHealId, new TEvPrivate::TEvUpdateHostRecords(HostRecords));
165+
}
166+
}
159167
}
160168

161-
if (!std::exchange(StorageConfigObtained, true) && HostRecords) {
162-
Execute(CreateTxInitScheme());
169+
if (!std::exchange(StorageConfigObtained, true)) { // this is the first time we get StorageConfig in this instance of BSC
170+
if (HostRecords) {
171+
OnHostRecordsInitiate();
172+
} else {
173+
Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes(true));
174+
}
163175
}
164176

165177
if (Loaded) {
@@ -269,25 +281,27 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateGroupStat
269281

270282
void TBlobStorageController::Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) {
271283
STLOG(PRI_DEBUG, BS_CONTROLLER, BSC01, "Handle TEvInterconnect::TEvNodesInfo");
272-
const bool initial = !HostRecords;
273-
HostRecords = std::make_shared<THostRecordMap::element_type>(ev->Get());
274-
if (initial) {
275-
if (auto *appData = AppData()) {
276-
if (appData->Icb) {
277-
EnableSelfHealWithDegraded = std::make_shared<TControlWrapper>(0, 0, 1);
278-
appData->Icb->RegisterSharedControl(*EnableSelfHealWithDegraded,
279-
"BlobStorageControllerControls.EnableSelfHealWithDegraded");
280-
}
281-
}
282-
SelfHealId = Register(CreateSelfHealActor());
283-
PushStaticGroupsToSelfHeal();
284-
if (StorageConfigObtained) {
285-
Execute(CreateTxInitScheme());
286-
}
284+
if (!std::exchange(HostRecords, std::make_shared<THostRecordMap::element_type>(ev->Get()))) {
285+
OnHostRecordsInitiate();
287286
}
288287
Send(SelfHealId, new TEvPrivate::TEvUpdateHostRecords(HostRecords));
289288
}
290289

290+
void TBlobStorageController::OnHostRecordsInitiate() {
291+
if (auto *appData = AppData()) {
292+
if (appData->Icb) {
293+
EnableSelfHealWithDegraded = std::make_shared<TControlWrapper>(0, 0, 1);
294+
appData->Icb->RegisterSharedControl(*EnableSelfHealWithDegraded,
295+
"BlobStorageControllerControls.EnableSelfHealWithDegraded");
296+
}
297+
}
298+
SelfHealId = Register(CreateSelfHealActor());
299+
PushStaticGroupsToSelfHeal();
300+
if (StorageConfigObtained) {
301+
Execute(CreateTxInitScheme());
302+
}
303+
}
304+
291305
void TBlobStorageController::IssueInitialGroupContent() {
292306
auto ev = MakeHolder<TEvControllerNotifyGroupChange>();
293307
for (const auto& kv : GroupMap) {

0 commit comments

Comments
 (0)