Skip to content

Commit 7e5f853

Browse files
SammyVimesva-kuznecov
authored andcommitted
Handle PDisk stop event if PDisk is in error or init state (#17780)
Closes #17953
1 parent abb3b7f commit 7e5f853

File tree

4 files changed

+80
-13
lines changed

4 files changed

+80
-13
lines changed

ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -642,14 +642,27 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
642642
ev->Sender, evYardInit.CutLogID, evYardInit.WhiteboardProxyId, evYardInit.SlotId);
643643
}
644644

645-
void InitHandle(NPDisk::TEvYardControl::TPtr &ev) {
645+
void OnPDiskStop(TActorId &sender, void *cookie) {
646+
if (PDisk) {
647+
PDisk->Stop();
648+
*PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped;
649+
*PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped;
650+
*PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl;
651+
}
652+
InitError("Received TEvYardControl::PDiskStop");
653+
Send(sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, cookie, {}));
654+
}
646655

656+
void InitHandle(NPDisk::TEvYardControl::TPtr &ev) {
647657
const NPDisk::TEvYardControl &evControl = *ev->Get();
648658
switch (evControl.Action) {
649659
case TEvYardControl::PDiskStart:
650660
ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(),
651661
new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
652662
break;
663+
case TEvYardControl::PDiskStop:
664+
OnPDiskStop(ev->Sender, evControl.Cookie);
665+
break;
653666
default:
654667
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie,
655668
"Unexpected control action for pdisk in StateInit"));
@@ -837,12 +850,19 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
837850
break;
838851
}
839852
default:
853+
// Only PDiskStart is allowed in StateError. PDiskStop is not allowed since PDisk in error state should already be stopped
854+
// or in the process of being stopped.
840855
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, StateErrorReason));
841856
PDisk->Mon.YardControl.CountResponse();
842857
break;
843858
}
844859
}
845860

861+
void ErrorHandle(TEvReadFormatResult::TPtr &ev) {
862+
// Just ignore the event, disk is in error state.
863+
Y_UNUSED(ev);
864+
}
865+
846866
void ErrorHandle(NPDisk::TEvAskForCutLog::TPtr &ev) {
847867
// Just ignore the event, can't send cut log in this state.
848868
Y_UNUSED(ev);
@@ -968,12 +988,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
968988
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
969989
break;
970990
case TEvYardControl::PDiskStop:
971-
PDisk->Stop();
972-
*PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped;
973-
*PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped;
974-
*PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl;
975-
InitError("Received TEvYardControl::PDiskStop");
976-
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
991+
OnPDiskStop(ev->Sender, evControl.Cookie);
977992
break;
978993
case TEvYardControl::GetPDiskPointer:
979994
Y_ABORT_UNLESS(!evControl.Cookie);
@@ -1505,6 +1520,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
15051520
hFunc(NPDisk::TEvChunkForget, ErrorHandle);
15061521
hFunc(NPDisk::TEvYardControl, ErrorHandle);
15071522
hFunc(NPDisk::TEvAskForCutLog, ErrorHandle);
1523+
hFunc(NPDisk::TEvReadFormatResult, ErrorHandle);
15081524
hFunc(NPDisk::TEvWhiteboardReportResult, Handle);
15091525
hFunc(NPDisk::TEvHttpInfoResult, Handle);
15101526
hFunc(NPDisk::TEvReadLogContinue, Handle);

ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,43 @@ Y_UNIT_TEST_SUITE(TPDiskTest) {
108108
testCtx.Send(new NActors::TEvents::TEvPoisonPill());
109109
}
110110

111+
Y_UNIT_TEST(TestPDiskActorPDiskStopBroken) {
112+
TActorTestContext testCtx{{}};
113+
114+
testCtx.GetRuntime()->WaitFor("Block device start", [&] {
115+
return testCtx.SafeRunOnPDisk([&] (auto* pdisk) {
116+
// Check that the PDisk is up
117+
return pdisk->BlockDevice->IsGood();
118+
});
119+
});
120+
121+
testCtx.Send(new NPDisk::TEvDeviceError("test"));
122+
123+
// This doesn't stop the PDisk, it will be stopped by TEvDeviceError some time in the future
124+
testCtx.TestResponse<NPDisk::TEvYardControlResult>(
125+
new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr),
126+
NKikimrProto::CORRUPTED);
127+
128+
testCtx.GetRuntime()->WaitFor("Block device stop", [&] {
129+
return testCtx.SafeRunOnPDisk([&] (auto* pdisk) {
130+
// Check that the PDisk is stopped
131+
return !pdisk->BlockDevice->IsGood();
132+
});
133+
});
134+
135+
testCtx.Send(new NActors::TEvents::TEvPoisonPill());
136+
}
137+
138+
Y_UNIT_TEST(TestPDiskActorPDiskStopUninitialized) {
139+
TActorTestContext testCtx{{}};
140+
141+
testCtx.TestResponse<NPDisk::TEvYardControlResult>(
142+
new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr),
143+
NKikimrProto::OK);
144+
145+
testCtx.Send(new NActors::TEvents::TEvPoisonPill());
146+
}
147+
111148
Y_UNIT_TEST(TestChunkWriteRelease) {
112149
for (ui32 i = 0; i < 16; ++i) {
113150
TestChunkWriteReleaseRun();

ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ struct TActorTestContext {
117117
return nullptr;
118118
}
119119

120+
TTestActorRuntime* GetRuntime() {
121+
return Runtime.Get();
122+
}
123+
120124
void UpdateConfigRecreatePDisk(TIntrusivePtr<TPDiskConfig> cfg) {
121125
if (PDiskActor) {
122126
TestResponse<NPDisk::TEvYardControlResult>(

ydb/library/pdisk_io/aio_linux.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,23 +154,33 @@ class TAsyncIoContextLibaio : public IAsyncIoContext {
154154
}
155155

156156
EIoResult Destroy() override {
157+
EIoResult result = EIoResult::Ok;
158+
157159
int ret = io_destroy(IoContext);
158160
if (ret < 0) {
159161
switch (-ret) {
160-
case EFAULT: return EIoResult::BadAddress;
161-
case EINVAL: return EIoResult::InvalidArgument;
162-
case ENOSYS: return EIoResult::FunctionNotImplemented;
163-
default: Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret
164-
<< " strerror# " << strerror(-ret));
162+
case EFAULT:
163+
result = EIoResult::BadAddress;
164+
break;
165+
case EINVAL:
166+
result = EIoResult::InvalidArgument;
167+
break;
168+
case ENOSYS:
169+
result = EIoResult::FunctionNotImplemented;
170+
break;
171+
default:
172+
Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret << " strerror# " << strerror(-ret));
165173
}
166174
}
175+
167176
if (File) {
168177
ret = File->Flock(LOCK_UN);
169178
Y_VERIFY_S(ret == 0, "Error in Flock(LOCK_UN), errno# " << errno << " strerror# " << strerror(errno));
170179
bool isOk = File->Close();
171180
Y_VERIFY_S(isOk, PDiskInfo << " error on file close, errno# " << errno << " strerror# " << strerror(errno));
172181
}
173-
return EIoResult::Ok;
182+
183+
return result;
174184
}
175185

176186
i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override {

0 commit comments

Comments
 (0)