Skip to content

Commit 4a723aa

Browse files
authored
Fix quorum for distconf when in Bridge mode (#19366)
1 parent 4f206ec commit 4a723aa

File tree

10 files changed

+564
-277
lines changed

10 files changed

+564
-277
lines changed

ydb/core/base/blobstorage.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,6 +1368,13 @@ struct TEvBlobStorage {
13681368
, GroupId(groupId)
13691369
{}
13701370

1371+
TEvGetResult(NKikimrProto::EReplyStatus status, ui32 sz, TArrayHolder<TResponse> responses, TGroupId groupId)
1372+
: Status(status)
1373+
, ResponseSz(sz)
1374+
, Responses(std::move(responses))
1375+
, GroupId(groupId.GetRawId())
1376+
{}
1377+
13711378
TString Print(bool isFull) const {
13721379
TStringStream str;
13731380
str << "TEvGetResult {Status# " << NKikimrProto::EReplyStatus_Name(Status).data();
@@ -2463,6 +2470,12 @@ struct TEvBlobStorage {
24632470
, StatusFlags(statusFlags)
24642471
{}
24652472

2473+
TEvStatusResult(NKikimrProto::EReplyStatus status, TStorageStatusFlags statusFlags, float approximateFreeSpaceShare)
2474+
: Status(status)
2475+
, StatusFlags(statusFlags)
2476+
, ApproximateFreeSpaceShare(approximateFreeSpaceShare)
2477+
{}
2478+
24662479
TString Print(bool isFull) const {
24672480
Y_UNUSED(isFull);
24682481
TStringStream str;

ydb/core/base/bridge.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ namespace NKikimr {
2323
const TPile *BeingPromotedPile = nullptr; // a reference to the pile being promoted, or nullptr if none are promoted
2424

2525
using TPtr = std::shared_ptr<const TBridgeInfo>;
26+
27+
const TPile *GetPile(TBridgePileId bridgePileId) const {
28+
Y_ABORT_UNLESS(bridgePileId.GetRawId() < Piles.size());
29+
return &Piles[bridgePileId.GetRawId()];
30+
}
2631
};
2732

2833
} // NKikimr

ydb/core/base/id_wrapper.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ template <typename T, typename Tag> class TIdWrapper {
6464
TIdWrapper old = *this;
6565
operator++();
6666
return old;
67-
}
67+
}
6868

6969
friend std::ostream &operator<<(std::ostream &out, TIdWrapper &id) {
7070
return out << id.Raw;
@@ -74,7 +74,7 @@ template <typename T, typename Tag> class TIdWrapper {
7474
return out << id.Raw;
7575
}
7676

77-
constexpr auto operator<=>(const TIdWrapper &) const = default;
77+
friend std::strong_ordering operator<=>(const TIdWrapper&, const TIdWrapper&) = default;
7878

7979
T GetRawId() const { return Raw; }
8080

@@ -83,14 +83,16 @@ template <typename T, typename Tag> class TIdWrapper {
8383
friend THash<TIdWrapper<T, Tag>>;
8484
};
8585

86-
template <typename T, typename Tag> struct std::hash<TIdWrapper<T, Tag>> {
86+
template<typename T, typename Tag>
87+
struct std::hash<TIdWrapper<T, Tag>> {
8788
std::size_t operator()(const TIdWrapper<T, Tag> &id) const {
8889
return std::hash<T>{}(id.Raw);
8990
}
9091
};
9192

92-
template <typename T, typename Tag> struct THash<TIdWrapper<T, Tag>> {
93+
template<typename T, typename Tag>
94+
struct THash<TIdWrapper<T, Tag>> {
9395
std::size_t operator()(const TIdWrapper<T, Tag> &id) const {
94-
return THash<T>()(id.Raw);
96+
return THash<T>{}(id.Raw);
9597
}
9698
};

ydb/core/blobstorage/dsproxy/bridge/bridge.cpp

Lines changed: 240 additions & 68 deletions
Large diffs are not rendered by default.

ydb/core/blobstorage/nodewarden/distconf.h

Lines changed: 12 additions & 203 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,22 @@
1010
namespace NKikimr::NStorage {
1111

1212
struct TNodeIdentifier : std::tuple<TString, ui32, ui32> {
13+
std::optional<TBridgePileId> BridgePileId;
14+
1315
TNodeIdentifier() = default;
1416

15-
TNodeIdentifier(TString host, ui32 port, ui32 nodeId)
17+
TNodeIdentifier(TString host, ui32 port, ui32 nodeId, std::optional<TBridgePileId> bridgePileId)
1618
: std::tuple<TString, ui32, ui32>(std::move(host), port, nodeId)
19+
, BridgePileId(bridgePileId)
1720
{}
1821

1922
TNodeIdentifier(const NKikimrBlobStorage::TNodeIdentifier& proto)
2023
: std::tuple<TString, ui32, ui32>(proto.GetHost(), proto.GetPort(), proto.GetNodeId())
21-
{}
24+
{
25+
if (proto.HasBridgePileId()) {
26+
BridgePileId.emplace(TBridgePileId::FromProto(&proto, &NKikimrBlobStorage::TNodeIdentifier::GetBridgePileId));
27+
}
28+
}
2229

2330
ui32 NodeId() const {
2431
return std::get<2>(*this);
@@ -28,6 +35,9 @@ namespace NKikimr::NStorage {
2835
proto->SetHost(std::get<0>(*this));
2936
proto->SetPort(std::get<1>(*this));
3037
proto->SetNodeId(std::get<2>(*this));
38+
if (BridgePileId) {
39+
BridgePileId->CopyToProto(proto, &NKikimrBlobStorage::TNodeIdentifier::SetBridgePileId);
40+
}
3141
}
3242
};
3343

@@ -563,207 +573,6 @@ namespace NKikimr::NStorage {
563573
}
564574
}
565575

566-
template<typename T>
567-
bool HasDiskQuorum(const NKikimrBlobStorage::TStorageConfig& config, T&& generateSuccessful) {
568-
// generate set of all required drives
569-
THashMap<TString, std::tuple<ui32, ui32>> status; // dc -> {ok, err}
570-
THashMap<ui32, const NKikimrBlobStorage::TNodeIdentifier*> nodeMap;
571-
THashSet<std::tuple<TNodeIdentifier, TString>> allDrives;
572-
auto cb = [&status, &allDrives](const auto& node, const auto& drive) {
573-
auto& [ok, err] = status[TNodeLocation(node.GetLocation()).GetDataCenterId()];
574-
++err;
575-
allDrives.emplace(node, drive.GetPath());
576-
};
577-
EnumerateConfigDrives(config, 0, cb, &nodeMap);
578-
579-
// process responses
580-
generateSuccessful([&](const TNodeIdentifier& node, const TString& path, std::optional<ui64> /*guid*/) {
581-
const auto it = nodeMap.find(node.NodeId());
582-
if (it == nodeMap.end() || TNodeIdentifier(*it->second) != node) { // unexpected node answers
583-
return;
584-
}
585-
if (!allDrives.erase(std::make_tuple(node, path))) { // unexpected drive
586-
return;
587-
}
588-
auto& [ok, err] = status[TNodeLocation(it->second->GetLocation()).GetDataCenterId()];
589-
Y_ABORT_UNLESS(err);
590-
++ok;
591-
--err;
592-
});
593-
594-
// calculate number of good and bad datacenters
595-
ui32 ok = 0;
596-
ui32 err = 0;
597-
for (const auto& [_, value] : status) {
598-
const auto [dcOk, dcErr] = value;
599-
++(dcOk > dcErr ? ok : err);
600-
}
601-
602-
// strict datacenter majority
603-
return ok > err;
604-
}
605-
606-
template<typename T>
607-
bool HasNodeQuorum(const NKikimrBlobStorage::TStorageConfig& config, T&& generateSuccessful) {
608-
// generate set of all nodes
609-
THashMap<TString, std::tuple<ui32, ui32>> status; // dc -> {ok, err}
610-
THashMap<ui32, const NKikimrBlobStorage::TNodeIdentifier*> nodeMap;
611-
for (const auto& node : config.GetAllNodes()) {
612-
auto& [ok, err] = status[TNodeLocation(node.GetLocation()).GetDataCenterId()];
613-
++err;
614-
nodeMap.emplace(node.GetNodeId(), &node);
615-
}
616-
617-
// process responses
618-
std::set<TNodeIdentifier> seen;
619-
generateSuccessful([&](const TNodeIdentifier& node) {
620-
const auto& [_, inserted] = seen.insert(node);
621-
Y_ABORT_UNLESS(inserted);
622-
623-
const auto it = nodeMap.find(node.NodeId());
624-
if (it == nodeMap.end() || TNodeIdentifier(*it->second) != node) { // unexpected node answers
625-
return;
626-
}
627-
auto& [ok, err] = status[TNodeLocation(it->second->GetLocation()).GetDataCenterId()];
628-
Y_ABORT_UNLESS(err);
629-
++ok;
630-
--err;
631-
});
632-
633-
// calculate number of good and bad datacenters
634-
ui32 ok = 0;
635-
ui32 err = 0;
636-
for (const auto& [_, value] : status) {
637-
const auto [dcOk, dcErr] = value;
638-
++(dcOk > dcErr ? ok : err);
639-
}
640-
641-
// strict datacenter majority
642-
return ok > err;
643-
}
644-
645-
template<typename T>
646-
bool HasStorageQuorum(const NKikimrBlobStorage::TStorageConfig& config, T&& generateSuccessful,
647-
const TNodeWardenConfig& nwConfig, bool allowUnformatted) {
648-
auto makeError = [&](TString error) -> bool {
649-
STLOG(PRI_CRIT, BS_NODE, NWDC41, "configuration incorrect", (Error, error));
650-
Y_DEBUG_ABORT("%s", error.c_str());
651-
return false;
652-
};
653-
if (!config.HasBlobStorageConfig()) { // no storage config at all -- however, this is quite strange
654-
return makeError("no BlobStorageConfig section in config");
655-
}
656-
const auto& bsConfig = config.GetBlobStorageConfig();
657-
if (!bsConfig.HasServiceSet()) { // maybe this is initial configuration
658-
return !config.GetGeneration() || makeError("non-initial configuration with missing ServiceSet");
659-
}
660-
const auto& ss = bsConfig.GetServiceSet();
661-
662-
// build map of group infos
663-
struct TGroupRecord {
664-
TIntrusivePtr<TBlobStorageGroupInfo> Info;
665-
TBlobStorageGroupInfo::TGroupVDisks Confirmed; // a set of confirmed group disks
666-
667-
TGroupRecord(TIntrusivePtr<TBlobStorageGroupInfo>&& info)
668-
: Info(std::move(info))
669-
, Confirmed(&Info->GetTopology())
670-
{}
671-
};
672-
THashMap<ui32, TGroupRecord> groups;
673-
for (const auto& group : ss.GetGroups()) {
674-
const ui32 groupId = group.GetGroupID();
675-
if (TGroupID(groupId).ConfigurationType() != EGroupConfigurationType::Static) {
676-
return makeError("nonstatic group id in static configuration section");
677-
}
678-
679-
TStringStream err;
680-
TIntrusivePtr<TBlobStorageGroupInfo> info = TBlobStorageGroupInfo::Parse(group, &nwConfig.StaticKey, &err);
681-
if (!info) {
682-
return makeError(TStringBuilder() << "failed to parse static group " << groupId << ": " << err.Str());
683-
}
684-
685-
if (const auto [it, inserted] = groups.emplace(groupId, std::move(info)); !inserted) {
686-
return makeError("duplicate group id in static configuration section");
687-
}
688-
}
689-
690-
// fill in pdisk map
691-
THashMap<std::tuple<ui32, ui32, ui64>, TString> pdiskIdToPath; // (nodeId, pdiskId, pdiskGuid) -> path
692-
for (const auto& pdisk : ss.GetPDisks()) {
693-
const auto [it, inserted] = pdiskIdToPath.emplace(std::make_tuple(pdisk.GetNodeID(), pdisk.GetPDiskID(),
694-
pdisk.GetPDiskGuid()), pdisk.GetPath());
695-
if (!inserted) {
696-
return makeError("duplicate pdisk in static configuration section");
697-
}
698-
}
699-
700-
// create confirmation map
701-
THashMultiMap<std::tuple<ui32, TString, std::optional<ui64>>, TVDiskID> confirm;
702-
for (const auto& vdisk : ss.GetVDisks()) {
703-
if (!vdisk.HasVDiskID() || !vdisk.HasVDiskLocation()) {
704-
return makeError("incorrect TVDisk record");
705-
}
706-
if (vdisk.GetEntityStatus() == NKikimrBlobStorage::EEntityStatus::DESTROY) {
707-
continue;
708-
}
709-
if (vdisk.HasDonorMode()) {
710-
continue;
711-
}
712-
const auto vdiskId = VDiskIDFromVDiskID(vdisk.GetVDiskID());
713-
const auto it = groups.find(vdiskId.GroupID.GetRawId());
714-
if (it == groups.end()) {
715-
return makeError(TStringBuilder() << "VDisk " << vdiskId << " does not match any static group");
716-
}
717-
const TGroupRecord& group = it->second;
718-
if (vdiskId.GroupGeneration != group.Info->GroupGeneration) {
719-
return makeError(TStringBuilder() << "VDisk " << vdiskId << " group generation mismatch");
720-
}
721-
const auto& location = vdisk.GetVDiskLocation();
722-
const auto jt = pdiskIdToPath.find(std::make_tuple(location.GetNodeID(), location.GetPDiskID(),
723-
location.GetPDiskGuid()));
724-
if (jt == pdiskIdToPath.end()) {
725-
return makeError(TStringBuilder() << "VDisk " << vdiskId << " points to incorrect PDisk record");
726-
}
727-
confirm.emplace(std::make_tuple(location.GetNodeID(), jt->second, location.GetPDiskGuid()), vdiskId);
728-
if (allowUnformatted) {
729-
confirm.emplace(std::make_tuple(location.GetNodeID(), jt->second, std::nullopt), vdiskId);
730-
}
731-
}
732-
733-
// process responded nodes
734-
generateSuccessful([&](const TNodeIdentifier& node, const TString& path, std::optional<ui64> guid) {
735-
const auto key = std::make_tuple(node.NodeId(), path, guid);
736-
const auto [begin, end] = confirm.equal_range(key);
737-
for (auto it = begin; it != end; ++it) {
738-
const TVDiskID& vdiskId = it->second;
739-
TGroupRecord& group = groups.at(vdiskId.GroupID.GetRawId());
740-
group.Confirmed |= {&group.Info->GetTopology(), vdiskId};
741-
}
742-
});
743-
744-
// scan all groups and find ones without quorum
745-
for (const auto& [groupId, group] : groups) {
746-
if (group.Info->IsBridged()) {
747-
continue;
748-
}
749-
if (const auto& checker = group.Info->GetQuorumChecker(); !checker.CheckQuorumForGroup(group.Confirmed)) {
750-
return false;
751-
}
752-
}
753-
754-
return true; // all group meet their quorums
755-
}
756-
757-
// Ensure configuration has quorum in both disk and storage ways for current and previous configuration.
758-
template<typename T>
759-
bool HasConfigQuorum(const NKikimrBlobStorage::TStorageConfig& config, T&& generateSuccessful,
760-
const TNodeWardenConfig& nwConfig, bool mindPrev = true) {
761-
return HasDiskQuorum(config, generateSuccessful) &&
762-
HasStorageQuorum(config, generateSuccessful, nwConfig, true) && (!mindPrev || !config.HasPrevConfig() || (
763-
HasDiskQuorum(config.GetPrevConfig(), generateSuccessful) &&
764-
HasStorageQuorum(config.GetPrevConfig(), generateSuccessful, nwConfig, false)));
765-
}
766-
767576
std::optional<TString> ValidateConfigUpdate(const NKikimrBlobStorage::TStorageConfig& current,
768577
const NKikimrBlobStorage::TStorageConfig& proposed);
769578

ydb/core/blobstorage/nodewarden/distconf_binding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace NKikimr::NStorage {
1010
const ui32 selfNodeId = SelfId().NodeId();
1111
for (const auto& item : ev->Get()->Nodes) {
1212
if (item.NodeId == selfNodeId) {
13-
SelfNode = TNodeIdentifier(item.ResolveHost, item.Port, selfNodeId);
13+
SelfNode = TNodeIdentifier(item.ResolveHost, item.Port, selfNodeId, std::nullopt);
1414
Y_ABORT_UNLESS(IsSelfStatic == item.IsStatic);
1515
}
1616
if (item.IsStatic) {

ydb/core/blobstorage/nodewarden/distconf_fsm.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "distconf.h"
2+
#include "distconf_quorum.h"
23

34
namespace NKikimr::NStorage {
45

0 commit comments

Comments
 (0)