Skip to content

Commit 1e51f2c

Browse files
committed
fixes list of nodes and databases in broken environment (#18553)
1 parent 8bfbc8c commit 1e51f2c

File tree

7 files changed

+113
-81
lines changed

7 files changed

+113
-81
lines changed

ydb/core/viewer/json_pipe_req.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,7 @@ void TViewerPipeClient::HandleResolveResource(TEvTxProxySchemeCache::TEvNavigate
10771077
TSchemeCacheNavigate::TEntry& entry(ResourceNavigateResponse->Get()->Request->ResultSet.front());
10781078
SharedDatabase = CanonizePath(entry.Path);
10791079
Direct |= (SharedDatabase == AppData()->TenantName);
1080-
DatabaseBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(SharedDatabase);
1080+
ResourceBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(SharedDatabase);
10811081
--DataRequests; // don't count this request
10821082
} else {
10831083
AddEvent("Failed to resolve database - shared database not found");
@@ -1114,16 +1114,25 @@ void TViewerPipeClient::HandleResolve(TEvStateStorage::TEvBoardInfo::TPtr& ev) {
11141114
DatabaseBoardInfoResponse->Set(std::move(ev));
11151115
if (DatabaseBoardInfoResponse->IsOk()) {
11161116
if (Direct) {
1117-
Bootstrap(); // retry bootstrap without redirect this time
1117+
return Bootstrap(); // retry bootstrap without redirect this time
11181118
} else {
1119-
ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(DatabaseBoardInfoResponse->GetRef())));
1119+
return ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(DatabaseBoardInfoResponse->GetRef())));
1120+
}
1121+
}
1122+
}
1123+
if (ResourceBoardInfoResponse) {
1124+
ResourceBoardInfoResponse->Set(std::move(ev));
1125+
if (ResourceBoardInfoResponse->IsOk()) {
1126+
if (Direct) {
1127+
return Bootstrap(); // retry bootstrap without redirect this time
1128+
} else {
1129+
return ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(ResourceBoardInfoResponse->GetRef())));
11201130
}
1121-
} else {
1122-
AddEvent("Failed to resolve database nodes");
1123-
Direct = true;
1124-
Bootstrap(); // retry bootstrap without redirect this time
11251131
}
11261132
}
1133+
AddEvent("Failed to resolve database nodes");
1134+
Direct = true;
1135+
Bootstrap(); // retry bootstrap without redirect this time
11271136
}
11281137

11291138
void TViewerPipeClient::HandleTimeout() {

ydb/core/viewer/json_pipe_req.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ class TViewerPipeClient : public TActorBootstrapped<TViewerPipeClient> {
192192
std::optional<TRequestResponse<TEvTxProxySchemeCache::TEvNavigateKeySetResult>> DatabaseNavigateResponse;
193193
std::optional<TRequestResponse<TEvTxProxySchemeCache::TEvNavigateKeySetResult>> ResourceNavigateResponse;
194194
std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> DatabaseBoardInfoResponse;
195+
std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> ResourceBoardInfoResponse;
195196

196197
NTabletPipe::TClientConfig GetPipeClientConfig();
197198

ydb/core/viewer/tests/canondata/result.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,6 +2352,7 @@
23522352
"ConnectStatus": "Green",
23532353
"Connections": "not-zero-number",
23542354
"CpuUsage": "not-zero-number",
2355+
"Database": "/Root",
23552356
"DiskSpaceUsage": "not-zero-number",
23562357
"NetworkUtilization": "number",
23572358
"NetworkUtilizationMax": "number",

ydb/core/viewer/viewer_nodes.h

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,17 @@ class TJsonNodes : public TViewerPipeClient {
7272

7373
std::optional<TRequestResponse<TEvInterconnect::TEvNodesInfo>> NodesInfoResponse;
7474
std::optional<TRequestResponse<TEvWhiteboard::TEvNodeStateResponse>> NodeStateResponse;
75-
std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> DatabaseBoardInfoResponse;
76-
std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> ResourceBoardInfoResponse;
7775
std::optional<TRequestResponse<TEvTxProxySchemeCache::TEvNavigateKeySetResult>> PathNavigateResponse;
7876
std::unordered_map<TTabletId, TRequestResponse<TEvHive::TEvResponseHiveNodeStats>> HiveNodeStats;
7977
bool HiveNodeStatsProcessed = false;
8078
std::vector<TTabletId> HivesToAsk;
8179
bool AskHiveAboutPaths = false;
8280
bool DatabaseNavigateProcessed = false;
81+
bool ResourceNavigateProcessed = false;
82+
bool PathNavigateProcessed = false;
83+
bool DatabaseBoardInfoProcessed = false;
84+
bool ResourceBoardInfoProcessed = false;
85+
bool PDisksProcessed = false;
8386

8487
std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetStoragePoolsResponse>> StoragePoolsResponse;
8588
std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetGroupsResponse>> GroupsResponse;
@@ -113,6 +116,7 @@ class TJsonNodes : public TViewerPipeClient {
113116
TString SharedDatabase;
114117
bool FilterDatabase = false;
115118
bool HasDatabaseNodes = false;
119+
bool HasSharedNodes = false;
116120
TPathId FilterPathId;
117121
TSubDomainKey SubDomainKey;
118122
TSubDomainKey SharedSubDomainKey;
@@ -150,6 +154,7 @@ class TJsonNodes : public TViewerPipeClient {
150154
Pools,
151155
Groups,
152156
VSlots,
157+
DoneOrError,
153158
};
154159

155160
enum class EPeerRole {
@@ -1090,7 +1095,7 @@ class TJsonNodes : public TViewerPipeClient {
10901095
if (!DatabaseNavigateResponse) {
10911096
DatabaseNavigateResponse = MakeRequestSchemeCacheNavigate(Database, ENavigateRequestDatabase);
10921097
}
1093-
if (!FieldsNeeded(FieldsHiveNodeStat) && !(FilterPath && FieldsNeeded(FieldsTablets))) {
1098+
if (!DatabaseBoardInfoResponse && !FieldsNeeded(FieldsHiveNodeStat) && !(FilterPath && FieldsNeeded(FieldsTablets))) {
10941099
DatabaseBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(Database, EBoardInfoRequestDatabase);
10951100
}
10961101
if ((Type == EType::Storage || Type == EType::Static) && FilterStoragePools.empty() && FilterGroupIds.empty()) {
@@ -1177,13 +1182,13 @@ class TJsonNodes : public TViewerPipeClient {
11771182
if (FilterDatabase) {
11781183
if (FilterSubDomainKey && FieldsAvailable.test(+ENodeFields::SubDomainKey)) {
11791184
TNodeView nodeView;
1180-
if (HasDatabaseNodes) {
1185+
if (HasDatabaseNodes || !HasSharedNodes) {
11811186
for (TNode* node : NodeView) {
11821187
if (node->HasSubDomainKey(SubDomainKey)) {
11831188
nodeView.push_back(node);
11841189
}
11851190
}
1186-
} else {
1191+
} else if (HasSharedNodes) {
11871192
for (TNode* node : NodeView) {
11881193
if (node->HasSubDomainKey(SharedSubDomainKey)) {
11891194
nodeView.push_back(node);
@@ -1194,16 +1199,16 @@ class TJsonNodes : public TViewerPipeClient {
11941199
FoundNodes = TotalNodes = NodeView.size();
11951200
InvalidateNodes();
11961201
FilterDatabase = false;
1197-
AddEvent("PreFilter Applied");
1202+
AddEvent("PreFilter SubDomain Applied");
11981203
} else if (FieldsAvailable.test(+ENodeFields::Database)) {
11991204
TNodeView nodeView;
1200-
if (HasDatabaseNodes) {
1205+
if (HasDatabaseNodes || !HasSharedNodes) {
12011206
for (TNode* node : NodeView) {
12021207
if (node->HasDatabase(Database)) {
12031208
nodeView.push_back(node);
12041209
}
12051210
}
1206-
} else {
1211+
} else if (HasSharedNodes) {
12071212
for (TNode* node : NodeView) {
12081213
if (node->HasDatabase(SharedDatabase)) {
12091214
nodeView.push_back(node);
@@ -1214,7 +1219,7 @@ class TJsonNodes : public TViewerPipeClient {
12141219
FoundNodes = TotalNodes = NodeView.size();
12151220
InvalidateNodes();
12161221
FilterDatabase = false;
1217-
AddEvent("PreFilter Applied");
1222+
AddEvent("PreFilter Database Applied");
12181223
} else {
12191224
return;
12201225
}
@@ -1666,7 +1671,7 @@ class TJsonNodes : public TViewerPipeClient {
16661671
if (PathNavigateResponse && !PathNavigateResponse->IsDone()) {
16671672
return false;
16681673
}
1669-
return CurrentTimeoutState < TimeoutTablets;
1674+
return true;
16701675
}
16711676

16721677
bool TimeToAskWhiteboard() {
@@ -1840,7 +1845,7 @@ class TJsonNodes : public TViewerPipeClient {
18401845
DatabaseNavigateProcessed = true;
18411846
}
18421847

1843-
if (ResourceNavigateResponse && ResourceNavigateResponse->IsDone()) { // database hive and subdomain key
1848+
if (ResourceNavigateResponse && ResourceNavigateResponse->IsDone() && !ResourceNavigateProcessed) { // database hive and subdomain key
18441849
if (ResourceNavigateResponse->IsOk()) {
18451850
auto* ev = ResourceNavigateResponse->Get();
18461851
if (ev->Request->ResultSet.size() == 1 && ev->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) {
@@ -1862,17 +1867,19 @@ class TJsonNodes : public TViewerPipeClient {
18621867
}
18631868
}
18641869
} else {
1865-
ResourceBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(path, EBoardInfoRequestResource);
1870+
if (!ResourceBoardInfoResponse) {
1871+
ResourceBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(path, EBoardInfoRequestResource);
1872+
}
18661873
}
18671874
}
18681875
} else {
18691876
NodeView.clear();
18701877
AddProblem("no-shared-database-info");
18711878
}
1872-
ResourceNavigateResponse.reset();
1879+
ResourceNavigateProcessed = true;
18731880
}
18741881

1875-
if (PathNavigateResponse && PathNavigateResponse->IsDone()) { // filter path id
1882+
if (PathNavigateResponse && PathNavigateResponse->IsDone() && !PathNavigateProcessed) { // filter path id
18761883
if (PathNavigateResponse->IsOk()) {
18771884
auto* ev = PathNavigateResponse->Get();
18781885
if (ev->Request->ResultSet.size() == 1 && ev->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) {
@@ -1901,10 +1908,10 @@ class TJsonNodes : public TViewerPipeClient {
19011908
} else {
19021909
AddProblem("no-path-info");
19031910
}
1904-
PathNavigateResponse.reset();
1911+
PathNavigateProcessed = true;
19051912
}
19061913

1907-
if (DatabaseBoardInfoResponse && DatabaseBoardInfoResponse->IsDone() && TotalNodes > 0) {
1914+
if (DatabaseBoardInfoResponse && DatabaseBoardInfoResponse->IsDone() && TotalNodes > 0 && !DatabaseBoardInfoProcessed) {
19081915
if (DatabaseBoardInfoResponse->IsOk() && DatabaseBoardInfoResponse->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) {
19091916
TString database = GetDatabaseFromEndpointsBoardPath(DatabaseBoardInfoResponse->Get()->Path);
19101917
for (const auto& entry : DatabaseBoardInfoResponse->Get()->InfoEntries) {
@@ -1921,10 +1928,10 @@ class TJsonNodes : public TViewerPipeClient {
19211928
} else {
19221929
AddProblem("no-database-board-info");
19231930
}
1924-
DatabaseBoardInfoResponse.reset();
1931+
DatabaseBoardInfoProcessed = true;
19251932
}
19261933

1927-
if (ResourceBoardInfoResponse && ResourceBoardInfoResponse->IsDone() && TotalNodes > 0) {
1934+
if (ResourceBoardInfoResponse && ResourceBoardInfoResponse->IsDone() && TotalNodes > 0 && !ResourceBoardInfoProcessed) {
19281935
if (ResourceBoardInfoResponse->IsOk() && ResourceBoardInfoResponse->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) {
19291936
TString database = GetDatabaseFromEndpointsBoardPath(ResourceBoardInfoResponse->Get()->Path);
19301937
for (const auto& entry : ResourceBoardInfoResponse->Get()->InfoEntries) {
@@ -1933,14 +1940,15 @@ class TJsonNodes : public TViewerPipeClient {
19331940
if (node) {
19341941
node->Database = database;
19351942
node->GotDatabaseFromResourceBoardInfo = true;
1943+
HasSharedNodes = true;
19361944
}
19371945
}
19381946
}
19391947
FieldsAvailable.set(+ENodeFields::Database);
19401948
} else {
19411949
AddProblem("no-shared-database-board-info");
19421950
}
1943-
ResourceBoardInfoResponse.reset();
1951+
ResourceBoardInfoProcessed = true;
19441952
}
19451953

19461954
if (!TimeToAskHive()) {
@@ -1949,8 +1957,8 @@ class TJsonNodes : public TViewerPipeClient {
19491957

19501958
AddEvent("TimeToAskHive");
19511959

1952-
if (!HivesToAsk.empty()) {
1953-
AddEvent("HivesTokHive");
1960+
if (!HivesToAsk.empty() && CurrentTimeoutState < TimeoutTablets) {
1961+
AddEvent("HivesToAsk");
19541962
std::sort(HivesToAsk.begin(), HivesToAsk.end());
19551963
HivesToAsk.erase(std::unique(HivesToAsk.begin(), HivesToAsk.end()), HivesToAsk.end());
19561964
for (TTabletId hiveId : HivesToAsk) {
@@ -2000,6 +2008,9 @@ class TJsonNodes : public TViewerPipeClient {
20002008
if (node->SubDomainKey == SubDomainKey) {
20012009
HasDatabaseNodes = true;
20022010
}
2011+
if (node->SubDomainKey == SharedSubDomainKey) {
2012+
HasSharedNodes = true;
2013+
}
20032014
}
20042015
}
20052016
}
@@ -2026,6 +2037,7 @@ class TJsonNodes : public TViewerPipeClient {
20262037
FilterStorageStage = EFilterStorageStage::Groups;
20272038
} else {
20282039
AddProblem("bsc-storage-pools-no-data");
2040+
FilterStorageStage = EFilterStorageStage::DoneOrError;
20292041
}
20302042
StoragePoolsResponse.reset();
20312043
}
@@ -2042,8 +2054,8 @@ class TJsonNodes : public TViewerPipeClient {
20422054
FilterStorageStage = EFilterStorageStage::VSlots;
20432055
} else {
20442056
AddProblem("bsc-storage-groups-no-data");
2057+
FilterStorageStage = EFilterStorageStage::DoneOrError;
20452058
}
2046-
GroupsResponse.reset();
20472059
}
20482060
if ((FilterStorageStage == EFilterStorageStage::VSlots || FilterStorageStage == EFilterStorageStage::None) && VSlotsResponse && VSlotsResponse->IsDone()) {
20492061
if (VSlotsResponse->IsOk()) {
@@ -2070,14 +2082,14 @@ class TJsonNodes : public TViewerPipeClient {
20702082
MaximumSlotsPerDisk = std::max(MaximumSlotsPerDisk.value_or(0), slots);
20712083
}
20722084
FieldsAvailable.set(+ENodeFields::HasDisks);
2073-
FilterStorageStage = EFilterStorageStage::None;
2085+
FilterStorageStage = EFilterStorageStage::DoneOrError;
20742086
ApplyEverything();
20752087
} else {
20762088
AddProblem("bsc-storage-slots-no-data");
2089+
FilterStorageStage = EFilterStorageStage::DoneOrError;
20772090
}
2078-
VSlotsResponse.reset();
20792091
}
2080-
if (PDisksResponse && PDisksResponse->IsDone()) {
2092+
if (PDisksResponse && PDisksResponse->IsDone() && !PDisksProcessed) {
20812093
if (PDisksResponse->IsOk()) {
20822094
std::unordered_map<TNodeId, std::size_t> disksPerNode;
20832095
for (const auto& pdiskEntry : PDisksResponse->Get()->Record.GetEntries()) {
@@ -2099,7 +2111,7 @@ class TJsonNodes : public TViewerPipeClient {
20992111
} else {
21002112
AddProblem("bsc-pdisks-no-data");
21012113
}
2102-
PDisksResponse.reset();
2114+
PDisksProcessed = true;
21032115
}
21042116

21052117
if (!TimeToAskWhiteboard()) {
@@ -3077,6 +3089,7 @@ class TJsonNodes : public TViewerPipeClient {
30773089
}
30783090
}
30793091
if (WaitingForResponse()) {
3092+
AddEvent("WaitingForSomethingOnTimeout");
30803093
ReplyAndPassAway();
30813094
}
30823095
}

ydb/core/viewer/viewer_tabletinfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ class TJsonTabletInfo : public TJsonWhiteboardRequest<TEvWhiteboard::TEvTabletSt
108108
}
109109
if (DatabaseBoardInfoResponse && DatabaseBoardInfoResponse->IsOk()) {
110110
TBase::RequestSettings.FilterNodeIds = TBase::GetNodesFromBoardReply(DatabaseBoardInfoResponse->GetRef());
111+
} else if (ResourceBoardInfoResponse && ResourceBoardInfoResponse->IsOk()) {
112+
TBase::RequestSettings.FilterNodeIds = TBase::GetNodesFromBoardReply(ResourceBoardInfoResponse->GetRef());
111113
} else if (Database || SharedDatabase) {
112114
RequestStateStorageEndpointsLookup(SharedDatabase ? SharedDatabase : Database);
113115
Become(&TThis::StateRequestedLookup, TDuration::MilliSeconds(TBase::RequestSettings.Timeout), new TEvents::TEvWakeup());

ydb/core/viewer/viewer_tenantinfo.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,48 @@ class TJsonTenantInfo : public TViewerPipeClient {
991991
ReplyAndPassAway(GetHTTPOKJSON(json.Str()));
992992
}
993993

994+
void HandleTimeout() {
995+
TString error = "Timeout";
996+
if (ListTenantsResponse) {
997+
ListTenantsResponse->Error(error);
998+
}
999+
for (auto& [_, request] : TenantStatusResponses) {
1000+
request.Error(error);
1001+
}
1002+
for (auto& [_, request] : NavigateKeySetResult) {
1003+
request.Error(error);
1004+
}
1005+
for (auto& [_, request] : DescribeSchemeResult) {
1006+
request.Error(error);
1007+
}
1008+
for (auto& [_, request] : HiveDomainStats) {
1009+
request.Error(error);
1010+
}
1011+
for (auto& [_, request] : HiveStorageStats) {
1012+
request.Error(error);
1013+
}
1014+
for (auto& [_, request] : SystemStateResponse) {
1015+
request.Error(error);
1016+
}
1017+
for (auto& [_, request] : TabletStateResponse) {
1018+
request.Error(error);
1019+
}
1020+
for (auto& [_, request] : OffloadedSystemStateResponse) {
1021+
request.Error(error);
1022+
}
1023+
for (auto& [_, request] : OffloadedTabletStateResponse) {
1024+
request.Error(error);
1025+
}
1026+
for (auto& [_, request] : SelfCheckResults) {
1027+
request.Error(error);
1028+
}
1029+
for (auto& [_, request] : MetadataCacheEndpointsLookup) {
1030+
request.Error(error);
1031+
}
1032+
ReplyAndPassAway();
1033+
TBase::HandleTimeout();
1034+
}
1035+
9941036
static YAML::Node GetSwagger() {
9951037
TSimpleYamlBuilder yaml({
9961038
.Method = "get",

0 commit comments

Comments
 (0)