Skip to content

Commit 690c80b

Browse files
healthcheck segfault while retrying Whiteboard (#17836)
1 parent bd8a859 commit 690c80b

File tree

2 files changed

+62
-8
lines changed

2 files changed

+62
-8
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,20 +1108,28 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
11081108
auto nodeId = ev->Get()->NodeId;
11091109
switch (eventId) {
11101110
case TEvWhiteboard::EvSystemStateRequest:
1111-
NodeSystemState.erase(nodeId);
1112-
NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId);
1111+
if (!NodeSystemState[nodeId].IsDone()) {
1112+
NodeSystemState.erase(nodeId);
1113+
NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId, {-1});
1114+
}
11131115
break;
11141116
case TEvWhiteboard::EvVDiskStateRequest:
1115-
NodeVDiskState.erase(nodeId);
1116-
NodeVDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId);
1117+
if (!NodeVDiskState[nodeId].IsDone()) {
1118+
NodeVDiskState.erase(nodeId);
1119+
NodeVDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId);
1120+
}
11171121
break;
11181122
case TEvWhiteboard::EvPDiskStateRequest:
1119-
NodePDiskState.erase(nodeId);
1120-
NodePDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId);
1123+
if (!NodePDiskState[nodeId].IsDone()) {
1124+
NodePDiskState.erase(nodeId);
1125+
NodePDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId);
1126+
}
11211127
break;
11221128
case TEvWhiteboard::EvBSGroupStateRequest:
1123-
NodeBSGroupState.erase(nodeId);
1124-
NodeBSGroupState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId);
1129+
if (!NodeBSGroupState[nodeId].IsDone()) {
1130+
NodeBSGroupState.erase(nodeId);
1131+
NodeBSGroupState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId);
1132+
}
11251133
break;
11261134
default:
11271135
RequestDone("unsupported event scheduled");

ydb/core/health_check/health_check_ut.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2197,5 +2197,51 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
21972197
Y_UNIT_TEST(LayoutCorrect) {
21982198
LayoutCorrectTest(true);
21992199
}
2200+
2201+
Y_UNIT_TEST(TestSystemStateRetriesAfterReceivingResponse) {
2202+
TPortManager tp;
2203+
ui16 port = tp.GetPort(2134);
2204+
ui16 grpcPort = tp.GetPort(2135);
2205+
auto settings = TServerSettings(port)
2206+
.SetNodeCount(1)
2207+
.SetDynamicNodeCount(1)
2208+
.SetUseRealThreads(false)
2209+
.SetDomainName("Root");
2210+
TServer server(settings);
2211+
server.EnableGRpc(grpcPort);
2212+
TClient client(settings);
2213+
TTestActorRuntime& runtime = *server.GetRuntime();
2214+
2215+
TActorId sender = runtime.AllocateEdgeActor();
2216+
TAutoPtr<IEventHandle> handle;
2217+
2218+
std::optional<TActorId> targetActor;
2219+
auto observerFunc = [&](TAutoPtr<IEventHandle>& ev) {
2220+
switch (ev->GetTypeRewrite()) {
2221+
case TEvWhiteboard::EvSystemStateResponse: {
2222+
if (ev->Cookie == 1) {
2223+
if (!targetActor) {
2224+
targetActor = ev->Recipient;
2225+
runtime.Send(ev.Release());
2226+
runtime.Send(new IEventHandle(
2227+
*targetActor,
2228+
sender,
2229+
new NHealthCheck::TEvPrivate::TEvRetryNodeWhiteboard(1, TEvWhiteboard::TEvSystemStateRequest::EventType)
2230+
));
2231+
2232+
}
2233+
return TTestActorRuntime::EEventAction::DROP;
2234+
}
2235+
break;
2236+
}
2237+
}
2238+
return TTestActorRuntime::EEventAction::PROCESS;
2239+
};
2240+
runtime.SetObserverFunc(observerFunc);
2241+
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0));
2242+
2243+
auto result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
2244+
UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD);
2245+
}
22002246
}
22012247
}

0 commit comments

Comments
 (0)