@@ -1108,28 +1108,16 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
1108
1108
auto nodeId = ev->Get ()->NodeId ;
1109
1109
switch (eventId) {
1110
1110
case TEvWhiteboard::EvSystemStateRequest:
1111
- if (!NodeSystemState[nodeId].IsDone ()) {
1112
- NodeSystemState.erase (nodeId);
1113
- NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId, {-1 });
1114
- }
1111
+ NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId, {-1 });
1115
1112
break ;
1116
1113
case TEvWhiteboard::EvVDiskStateRequest:
1117
- if (!NodeVDiskState[nodeId].IsDone ()) {
1118
- NodeVDiskState.erase (nodeId);
1119
- NodeVDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId);
1120
- }
1114
+ NodeVDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId);
1121
1115
break ;
1122
1116
case TEvWhiteboard::EvPDiskStateRequest:
1123
- if (!NodePDiskState[nodeId].IsDone ()) {
1124
- NodePDiskState.erase (nodeId);
1125
- NodePDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId);
1126
- }
1117
+ NodePDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId);
1127
1118
break ;
1128
1119
case TEvWhiteboard::EvBSGroupStateRequest:
1129
- if (!NodeBSGroupState[nodeId].IsDone ()) {
1130
- NodeBSGroupState.erase (nodeId);
1131
- NodeBSGroupState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId);
1132
- }
1120
+ NodeBSGroupState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId);
1133
1121
break ;
1134
1122
default :
1135
1123
RequestDone (" unsupported event scheduled" );
@@ -1151,31 +1139,39 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
1151
1139
TString error = " Undelivered" ;
1152
1140
if (ev->Get ()->SourceType == TEvWhiteboard::EvSystemStateRequest) {
1153
1141
if (NodeSystemState.count (nodeId) && NodeSystemState[nodeId].Error (error)) {
1154
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId)) {
1142
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId)) {
1143
+ NodeSystemState.erase (nodeId);
1144
+ } else {
1155
1145
RequestDone (" undelivered of TEvSystemStateRequest" );
1156
1146
UnavailableComputeNodes.insert (nodeId);
1157
1147
}
1158
1148
}
1159
1149
}
1160
1150
if (ev->Get ()->SourceType == TEvWhiteboard::EvVDiskStateRequest) {
1161
1151
if (NodeVDiskState.count (nodeId) && NodeVDiskState[nodeId].Error (error)) {
1162
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId)) {
1152
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId)) {
1153
+ NodeVDiskState.erase (nodeId);
1154
+ } else {
1163
1155
RequestDone (" undelivered of TEvVDiskStateRequest" );
1164
1156
UnavailableStorageNodes.insert (nodeId);
1165
1157
}
1166
1158
}
1167
1159
}
1168
1160
if (ev->Get ()->SourceType == TEvWhiteboard::EvPDiskStateRequest) {
1169
1161
if (NodePDiskState.count (nodeId) && NodePDiskState[nodeId].Error (error)) {
1170
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId)) {
1162
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId)) {
1163
+ NodePDiskState.erase (nodeId);
1164
+ } else {
1171
1165
RequestDone (" undelivered of TEvPDiskStateRequest" );
1172
1166
UnavailableStorageNodes.insert (nodeId);
1173
1167
}
1174
1168
}
1175
1169
}
1176
1170
if (ev->Get ()->SourceType == TEvWhiteboard::EvBSGroupStateRequest) {
1177
1171
if (NodeBSGroupState.count (nodeId) && NodeBSGroupState[nodeId].Error (error)) {
1178
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId)) {
1172
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId)) {
1173
+ NodeBSGroupState.erase (nodeId);
1174
+ } else {
1179
1175
RequestDone (" undelivered of TEvBSGroupStateRequest" );
1180
1176
}
1181
1177
}
@@ -1186,25 +1182,33 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
1186
1182
ui32 nodeId = ev->Get ()->NodeId ;
1187
1183
TString error = " NodeDisconnected" ;
1188
1184
if (NodeSystemState.count (nodeId) && NodeSystemState[nodeId].Error (error)) {
1189
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId)) {
1185
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId)) {
1186
+ NodeSystemState.erase (nodeId);
1187
+ } else {
1190
1188
RequestDone (" node disconnected with TEvSystemStateRequest" );
1191
1189
UnavailableComputeNodes.insert (nodeId);
1192
1190
}
1193
1191
}
1194
1192
if (NodeVDiskState.count (nodeId) && NodeVDiskState[nodeId].Error (error)) {
1195
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId)) {
1193
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId)) {
1194
+ NodeVDiskState.erase (nodeId);
1195
+ } else {
1196
1196
RequestDone (" node disconnected with TEvVDiskStateRequest" );
1197
1197
UnavailableStorageNodes.insert (nodeId);
1198
1198
}
1199
1199
}
1200
1200
if (NodePDiskState.count (nodeId) && NodePDiskState[nodeId].Error (error)) {
1201
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId)) {
1201
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId)) {
1202
+ NodePDiskState.erase (nodeId);
1203
+ } else {
1202
1204
RequestDone (" node disconnected with TEvPDiskStateRequest" );
1203
1205
UnavailableStorageNodes.insert (nodeId);
1204
1206
}
1205
1207
}
1206
1208
if (NodeBSGroupState.count (nodeId) && NodeBSGroupState[nodeId].Error (error)) {
1207
- if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId)) {
1209
+ if (RetryRequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId)) {
1210
+ NodeBSGroupState.erase (nodeId);
1211
+ } else {
1208
1212
RequestDone (" node disconnected with TEvBSGroupStateRequest" );
1209
1213
}
1210
1214
}
@@ -1509,10 +1513,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
1509
1513
TNodeId nodeId = ev.Get ()->Cookie ;
1510
1514
auto & nodeSystemState (NodeSystemState[nodeId]);
1511
1515
nodeSystemState.Set (std::move (ev));
1512
- for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record .MutableSystemStateInfo ()) {
1513
- state.set_nodeid (nodeId);
1514
- MergedNodeSystemState[nodeId] = &state;
1515
- }
1516
1516
RequestDone (" TEvSystemStateResponse" );
1517
1517
}
1518
1518
@@ -1616,6 +1616,53 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
1616
1616
}
1617
1617
}
1618
1618
1619
+ void AggregateWhiteboard () {
1620
+ for (auto & [nodeId, nodeSystemState] : NodeSystemState) {
1621
+ if (nodeSystemState.IsOk ()) {
1622
+ for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record .MutableSystemStateInfo ()) {
1623
+ state.set_nodeid (nodeId);
1624
+ MergedNodeSystemState[nodeId] = &state;
1625
+ }
1626
+ }
1627
+ }
1628
+ for (auto & [nodeId, nodeVDiskState] : NodeVDiskState) {
1629
+ if (nodeVDiskState.IsOk ()) {
1630
+ for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record .MutableVDiskStateInfo ()) {
1631
+ state.set_nodeid (nodeId);
1632
+ auto id = GetVDiskId (state.vdiskid ());
1633
+ MergedVDiskState[id] = &state;
1634
+ }
1635
+ }
1636
+ }
1637
+ for (auto & [nodeId, nodePDiskState] : NodePDiskState) {
1638
+ if (nodePDiskState.IsOk ()) {
1639
+ for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record .MutablePDiskStateInfo ()) {
1640
+ state.set_nodeid (nodeId);
1641
+ auto id = GetPDiskId (state);
1642
+ MergedPDiskState[id] = &state;
1643
+ }
1644
+ }
1645
+ }
1646
+ for (auto & [nodeId, nodeBSGroupState] : NodeBSGroupState) {
1647
+ if (nodeBSGroupState.IsOk ()) {
1648
+ for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record .MutableBSGroupStateInfo ()) {
1649
+ state.set_nodeid (nodeId);
1650
+ TString storagePoolName = state.storagepoolname ();
1651
+ TGroupID groupId (state.groupid ());
1652
+ const NKikimrWhiteboard::TBSGroupStateInfo*& current (MergedBSGroupState[state.groupid ()]);
1653
+ if (current == nullptr || current->GetGroupGeneration () < state.GetGroupGeneration ()) {
1654
+ current = &state;
1655
+ }
1656
+ if (storagePoolName.empty () && groupId.ConfigurationType () != EGroupConfigurationType::Static) {
1657
+ continue ;
1658
+ }
1659
+ StoragePoolStateByName[storagePoolName].Groups .emplace (state.groupid ());
1660
+ StoragePoolStateByName[storagePoolName].Name = storagePoolName;
1661
+ }
1662
+ }
1663
+ }
1664
+ }
1665
+
1619
1666
static Ydb::Monitoring::StatusFlag::Status MaxStatus (Ydb::Monitoring::StatusFlag::Status a, Ydb::Monitoring::StatusFlag::Status b) {
1620
1667
return static_cast <Ydb::Monitoring::StatusFlag::Status>(std::max<int >(a, b));
1621
1668
}
@@ -2157,44 +2204,20 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
2157
2204
TNodeId nodeId = ev.Get ()->Cookie ;
2158
2205
auto & nodeVDiskState (NodeVDiskState[nodeId]);
2159
2206
nodeVDiskState.Set (std::move (ev));
2160
- for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record .MutableVDiskStateInfo ()) {
2161
- state.set_nodeid (nodeId);
2162
- auto id = GetVDiskId (state.vdiskid ());
2163
- MergedVDiskState[id] = &state;
2164
- }
2165
2207
RequestDone (" TEvVDiskStateResponse" );
2166
2208
}
2167
2209
2168
2210
void Handle (TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) {
2169
2211
TNodeId nodeId = ev.Get ()->Cookie ;
2170
2212
auto & nodePDiskState (NodePDiskState[nodeId]);
2171
2213
nodePDiskState.Set (std::move (ev));
2172
- for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record .MutablePDiskStateInfo ()) {
2173
- state.set_nodeid (nodeId);
2174
- auto id = GetPDiskId (state);
2175
- MergedPDiskState[id] = &state;
2176
- }
2177
2214
RequestDone (" TEvPDiskStateResponse" );
2178
2215
}
2179
2216
2180
2217
void Handle (TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) {
2181
2218
ui64 nodeId = ev.Get ()->Cookie ;
2182
2219
auto & nodeBSGroupState (NodeBSGroupState[nodeId]);
2183
2220
nodeBSGroupState.Set (std::move (ev));
2184
- for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record .MutableBSGroupStateInfo ()) {
2185
- state.set_nodeid (nodeId);
2186
- TString storagePoolName = state.storagepoolname ();
2187
- TGroupID groupId (state.groupid ());
2188
- const NKikimrWhiteboard::TBSGroupStateInfo*& current (MergedBSGroupState[state.groupid ()]);
2189
- if (current == nullptr || current->GetGroupGeneration () < state.GetGroupGeneration ()) {
2190
- current = &state;
2191
- }
2192
- if (storagePoolName.empty () && groupId.ConfigurationType () != EGroupConfigurationType::Static) {
2193
- continue ;
2194
- }
2195
- StoragePoolStateByName[storagePoolName].Groups .emplace (state.groupid ());
2196
- StoragePoolStateByName[storagePoolName].Name = storagePoolName;
2197
- }
2198
2221
RequestDone (" TEvBSGroupStateResponse" );
2199
2222
}
2200
2223
@@ -3049,6 +3072,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
3049
3072
AggregateHiveInfo ();
3050
3073
AggregateHiveNodeStats ();
3051
3074
AggregateStoragePools ();
3075
+ AggregateWhiteboard ();
3052
3076
3053
3077
for (auto & [requestId, request] : TabletRequests.RequestsInFlight ) {
3054
3078
auto tabletId = request.TabletId ;
0 commit comments