Skip to content

Commit af0f7f8

Browse files
authored
Merge recent PDisk fixes into stable-25-1 (#18379)
2 parents 4c383db + 26ab8a3 commit af0f7f8

File tree

6 files changed

+84
-15
lines changed

6 files changed

+84
-15
lines changed

ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -642,14 +642,27 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
642642
ev->Sender, evYardInit.CutLogID, evYardInit.WhiteboardProxyId, evYardInit.SlotId);
643643
}
644644

645-
void InitHandle(NPDisk::TEvYardControl::TPtr &ev) {
645+
void OnPDiskStop(TActorId &sender, void *cookie) {
646+
if (PDisk) {
647+
PDisk->Stop();
648+
*PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped;
649+
*PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped;
650+
*PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl;
651+
}
652+
InitError("Received TEvYardControl::PDiskStop");
653+
Send(sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, cookie, {}));
654+
}
646655

656+
void InitHandle(NPDisk::TEvYardControl::TPtr &ev) {
647657
const NPDisk::TEvYardControl &evControl = *ev->Get();
648658
switch (evControl.Action) {
649659
case TEvYardControl::PDiskStart:
650660
ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(),
651661
new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
652662
break;
663+
case TEvYardControl::PDiskStop:
664+
OnPDiskStop(ev->Sender, evControl.Cookie);
665+
break;
653666
default:
654667
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie,
655668
"Unexpected control action for pdisk in StateInit"));
@@ -837,12 +850,19 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
837850
break;
838851
}
839852
default:
853+
// Only PDiskStart is allowed in StateError. PDiskStop is not allowed since PDisk in error state should already be stopped
854+
// or in the process of being stopped.
840855
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, StateErrorReason));
841856
PDisk->Mon.YardControl.CountResponse();
842857
break;
843858
}
844859
}
845860

861+
void ErrorHandle(TEvReadFormatResult::TPtr &ev) {
862+
// Just ignore the event, disk is in error state.
863+
Y_UNUSED(ev);
864+
}
865+
846866
void ErrorHandle(NPDisk::TEvAskForCutLog::TPtr &ev) {
847867
// Just ignore the event, can't send cut log in this state.
848868
Y_UNUSED(ev);
@@ -968,12 +988,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
968988
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
969989
break;
970990
case TEvYardControl::PDiskStop:
971-
PDisk->Stop();
972-
*PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped;
973-
*PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped;
974-
*PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl;
975-
InitError("Received TEvYardControl::PDiskStop");
976-
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
991+
OnPDiskStop(ev->Sender, evControl.Cookie);
977992
break;
978993
case TEvYardControl::GetPDiskPointer:
979994
Y_ABORT_UNLESS(!evControl.Cookie);
@@ -1505,6 +1520,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
15051520
hFunc(NPDisk::TEvChunkForget, ErrorHandle);
15061521
hFunc(NPDisk::TEvYardControl, ErrorHandle);
15071522
hFunc(NPDisk::TEvAskForCutLog, ErrorHandle);
1523+
hFunc(NPDisk::TEvReadFormatResult, ErrorHandle);
15081524
hFunc(NPDisk::TEvWhiteboardReportResult, Handle);
15091525
hFunc(NPDisk::TEvHttpInfoResult, Handle);
15101526
hFunc(NPDisk::TEvReadLogContinue, Handle);

ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ void TPDisk::PrintLogChunksInfo(const TString& msg) {
155155
return str.Str();
156156
};
157157

158-
P_LOG(PRI_NOTICE, BPD01, "PrintLogChunksInfo " << msg, (LogChunks, debugPrint()));
158+
P_LOG(PRI_DEBUG, BPD01, "PrintLogChunksInfo " << msg, (LogChunks, debugPrint()));
159159
}
160160

161161
bool TPDisk::LogNonceJump(ui64 previousNonce) {

ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,43 @@ Y_UNIT_TEST_SUITE(TPDiskTest) {
108108
testCtx.Send(new NActors::TEvents::TEvPoisonPill());
109109
}
110110

111+
Y_UNIT_TEST(TestPDiskActorPDiskStopBroken) {
112+
TActorTestContext testCtx{{}};
113+
114+
testCtx.GetRuntime()->WaitFor("Block device start", [&] {
115+
return testCtx.SafeRunOnPDisk([&] (auto* pdisk) {
116+
// Check that the PDisk is up
117+
return pdisk->BlockDevice->IsGood();
118+
});
119+
});
120+
121+
testCtx.Send(new NPDisk::TEvDeviceError("test"));
122+
123+
// This doesn't stop the PDisk, it will be stopped by TEvDeviceError some time in the future
124+
testCtx.TestResponse<NPDisk::TEvYardControlResult>(
125+
new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr),
126+
NKikimrProto::CORRUPTED);
127+
128+
testCtx.GetRuntime()->WaitFor("Block device stop", [&] {
129+
return testCtx.SafeRunOnPDisk([&] (auto* pdisk) {
130+
// Check that the PDisk is stopped
131+
return !pdisk->BlockDevice->IsGood();
132+
});
133+
});
134+
135+
testCtx.Send(new NActors::TEvents::TEvPoisonPill());
136+
}
137+
138+
Y_UNIT_TEST(TestPDiskActorPDiskStopUninitialized) {
139+
TActorTestContext testCtx{{}};
140+
141+
testCtx.TestResponse<NPDisk::TEvYardControlResult>(
142+
new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr),
143+
NKikimrProto::OK);
144+
145+
testCtx.Send(new NActors::TEvents::TEvPoisonPill());
146+
}
147+
111148
Y_UNIT_TEST(TestChunkWriteRelease) {
112149
for (ui32 i = 0; i < 16; ++i) {
113150
TestChunkWriteReleaseRun();

ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ struct TActorTestContext {
117117
return nullptr;
118118
}
119119

120+
TTestActorRuntime* GetRuntime() {
121+
return Runtime.Get();
122+
}
123+
120124
void UpdateConfigRecreatePDisk(TIntrusivePtr<TPDiskConfig> cfg) {
121125
if (PDiskActor) {
122126
TestResponse<NPDisk::TEvYardControlResult>(

ydb/library/pdisk_io/aio.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ enum class EIoResult : i64 {
102102
InvalidSequence = 14, // aka EILSEQ: GetEvents
103103
// for broken disk's error-log: "READ_ERROR: The read data could not be recovered from the media"
104104
NoData = 15, // aka ENODATA: GetEvents
105-
RemoteIOError = 16 // aka EREMOTEIO: GetEvents
105+
RemoteIOError = 16, // aka EREMOTEIO: GetEvents
106+
NoSpaceLeft = 17 // aka ENOSPC: GetEvents
106107
};
107108

108109
struct TAsyncIoOperationResult {

ydb/library/pdisk_io/aio_linux.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,23 +154,33 @@ class TAsyncIoContextLibaio : public IAsyncIoContext {
154154
}
155155

156156
EIoResult Destroy() override {
157+
EIoResult result = EIoResult::Ok;
158+
157159
int ret = io_destroy(IoContext);
158160
if (ret < 0) {
159161
switch (-ret) {
160-
case EFAULT: return EIoResult::BadAddress;
161-
case EINVAL: return EIoResult::InvalidArgument;
162-
case ENOSYS: return EIoResult::FunctionNotImplemented;
163-
default: Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret
164-
<< " strerror# " << strerror(-ret));
162+
case EFAULT:
163+
result = EIoResult::BadAddress;
164+
break;
165+
case EINVAL:
166+
result = EIoResult::InvalidArgument;
167+
break;
168+
case ENOSYS:
169+
result = EIoResult::FunctionNotImplemented;
170+
break;
171+
default:
172+
Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret << " strerror# " << strerror(-ret));
165173
}
166174
}
175+
167176
if (File) {
168177
ret = File->Flock(LOCK_UN);
169178
Y_VERIFY_S(ret == 0, "Error in Flock(LOCK_UN), errno# " << errno << " strerror# " << strerror(errno));
170179
bool isOk = File->Close();
171180
Y_VERIFY_S(isOk, PDiskInfo << " error on file close, errno# " << errno << " strerror# " << strerror(errno));
172181
}
173-
return EIoResult::Ok;
182+
183+
return result;
174184
}
175185

176186
i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override {
@@ -539,6 +549,7 @@ class TAsyncIoContextLiburing : public IAsyncIoContext {
539549
case ENOSYS: return EIoResult::FunctionNotImplemented;
540550
case EILSEQ: return EIoResult::InvalidSequence;
541551
case ENODATA: return EIoResult::NoData;
552+
case ENOSPC: return EIoResult::NoSpaceLeft;
542553
default: Y_FAIL_S(PDiskInfo << " unexpected error in " << info << ", error# " << -ret
543554
<< " strerror# " << strerror(-ret));
544555
}

0 commit comments

Comments
 (0)