Skip to content

Commit 1ff318c

Browse files
authored
New UT to check that all requests are answered on PDisk restart (#10262)
1 parent 89cd280 commit 1ff318c

File tree

4 files changed

+97
-46
lines changed

4 files changed

+97
-46
lines changed

ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -322,36 +322,27 @@ void TPDisk::Stop() {
322322
delete req;
323323
}
324324
JointLogReads.clear();
325+
325326
for (auto& req : JointChunkReads) {
327+
Y_VERIFY_DEBUG_S(req->GetType() == ERequestType::RequestChunkReadPiece,
328+
"Unexpected request type# " << TypeName(*req));
326329
TRequestBase::AbortDelete(req.Get(), PCtx->ActorSystem);
327330
}
328331
JointChunkReads.clear();
329332
for (TRequestBase* req : JointChunkWrites) {
330-
switch (req->GetType()) {
331-
case ERequestType::RequestChunkWrite:
332-
{
333-
TChunkWrite *write = static_cast<TChunkWrite*>(req);
334-
if (write->IsTotallyEnqueued()) {
335-
delete write;
336-
}
337-
break;
338-
}
339-
case ERequestType::RequestChunkWritePiece:
340-
delete req;
341-
break;
342-
default:
343-
Y_FAIL_S("Unexpected request type# " << ui64(req->GetType()) << " in JointChunkWrites");
344-
}
333+
Y_VERIFY_DEBUG_S(req->GetType() == ERequestType::RequestChunkWritePiece,
334+
"Unexpected request type# " << TypeName(req));
335+
TRequestBase::AbortDelete(req, PCtx->ActorSystem);
345336
}
346337
JointChunkWrites.clear();
347338
for (TLogWrite* req : JointLogWrites) {
348-
delete req;
339+
TRequestBase::AbortDelete(req, PCtx->ActorSystem);
349340
}
350341
JointLogWrites.clear();
351342
JointCommits.clear();
352343
JointChunkForgets.clear();
353-
for (const auto& req : FastOperationsQueue) {
354-
TRequestBase::AbortDelete(req.get(), PCtx->ActorSystem);
344+
for (auto& req : FastOperationsQueue) {
345+
TRequestBase::AbortDelete(req.release(), PCtx->ActorSystem);
355346
}
356347
FastOperationsQueue.clear();
357348
for (TRequestBase* req : PausedQueue) {

ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,10 @@ void TRequestBase::AbortDelete(TRequestBase* request, TActorSystem* actorSystem)
3232
}
3333
}
3434

35-
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
36-
// TChunkWrite
37-
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
38-
39-
TAtomic TChunkWrite::LastIndex = 0;
40-
4135
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
4236
// TChunkRead
4337
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
4438

45-
TAtomic TChunkRead::LastIndex = 0;
46-
4739
void TChunkRead::Abort(TActorSystem* actorSystem) {
4840
if (FinalCompletion) {
4941
FinalCompletion->PartDeleted(actorSystem);
@@ -93,4 +85,3 @@ void TChunkReadPiece::OnSuccessfulDestroy(TActorSystem* actorSystem) {
9385

9486
} // NPDisk
9587
} // NKikimr
96-

ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.h

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,10 @@ class TLogWrite : public TRequestBase {
343343
OnDestroy = std::move(onDestroy);
344344
}
345345

346+
void Abort(TActorSystem* actorSystem) override {
347+
actorSystem->Send(Sender, new NPDisk::TEvLogResult(NKikimrProto::CORRUPTED, 0, "TLogWrite is being aborted"));
348+
}
349+
346350
TString ToString() const {
347351
TStringStream str;
348352
str << "TLogWrite {";
@@ -362,7 +366,6 @@ class TCompletionChunkRead;
362366
//
363367
class TChunkRead : public TRequestBase {
364368
protected:
365-
static TAtomic LastIndex;
366369
static constexpr ui64 ReferenceCanary = 890461871990457885ull;
367370
public:
368371
ui32 ChunkIdx;
@@ -373,7 +376,6 @@ class TChunkRead : public TRequestBase {
373376
ui64 CurrentSector = 0;
374377
ui64 RemainingSize;
375378
TCompletionChunkRead *FinalCompletion = nullptr;
376-
TAtomicBase Index;
377379
bool IsReplied = false;
378380

379381
ui64 SlackSize;
@@ -399,7 +401,6 @@ class TChunkRead : public TRequestBase {
399401
, SlackSize(Max<ui32>())
400402
, DoubleFreeCanary(ReferenceCanary)
401403
{
402-
Index = AtomicIncrement(LastIndex);
403404
}
404405

405406
virtual ~TChunkRead() {
@@ -479,8 +480,6 @@ class TChunkReadPiece : public TRequestBase {
479480
// TChunkWrite
480481
//
481482
class TChunkWrite : public TRequestBase {
482-
protected:
483-
static TAtomic LastIndex;
484483
public:
485484
ui32 ChunkIdx;
486485
ui32 Offset;
@@ -494,12 +493,13 @@ class TChunkWrite : public TRequestBase {
494493
ui32 CurrentPart = 0;
495494
ui32 CurrentPartOffset = 0;
496495
ui32 RemainingSize = 0;
497-
ui32 UnenqueuedSize;
498-
TAtomicBase Index;
499496

500497
ui32 SlackSize;
501498
ui32 BytesWritten = 0;
502499

500+
TAtomic Pieces = 0;
501+
TAtomic Aborted = 0;
502+
503503
THolder<NPDisk::TCompletionAction> Completion;
504504

505505
TChunkWrite(const NPDisk::TEvChunkWrite &ev, const TActorId &sender, TReqId reqId, NWilson::TSpan span)
@@ -511,31 +511,31 @@ class TChunkWrite : public TRequestBase {
511511
, DoFlush(ev.DoFlush)
512512
, IsSeqWrite(ev.IsSeqWrite)
513513
{
514-
Index = AtomicIncrement(LastIndex);
515514
if (PartsPtr) {
516515
for (size_t i = 0; i < PartsPtr->Size(); ++i) {
517516
RemainingSize += (*PartsPtr)[i].second;
518517
}
519518
}
520519
TotalSize = RemainingSize;
521-
UnenqueuedSize = RemainingSize;
522520
SlackSize = Max<ui32>();
523521
}
524522

525-
ERequestType GetType() const override {
526-
return ERequestType::RequestChunkWrite;
523+
void RegisterPiece() {
524+
AtomicIncrement(Pieces);
527525
}
528526

529-
void EstimateCost(const TDriveModel &drive) override {
530-
Cost = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)UnenqueuedSize, ChunkIdx, TDriveModel::OP_TYPE_WRITE);
527+
void AbortPiece(TActorSystem *actorSystem) {
528+
if (AtomicDecrement(Pieces) == 0) {
529+
this->Abort(actorSystem);
530+
}
531531
}
532532

533-
bool IsFinalIteration() {
534-
return UnenqueuedSize <= SlackSize;
533+
ERequestType GetType() const override {
534+
return ERequestType::RequestChunkWrite;
535535
}
536536

537-
bool IsTotallyEnqueued() {
538-
return UnenqueuedSize == 0;
537+
void EstimateCost(const TDriveModel &drive) override {
538+
Cost = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)TotalSize, ChunkIdx, TDriveModel::OP_TYPE_WRITE);
539539
}
540540

541541
bool TryStealSlack(ui64& slackNs, const TDriveModel &drive, ui64 appendBlockSize, bool adhesion) override {
@@ -547,14 +547,20 @@ class TChunkWrite : public TRequestBase {
547547
if (SlackSize >= appendBlockSize) {
548548
SlackSize = Min(
549549
SlackSize / appendBlockSize * appendBlockSize,
550-
(UnenqueuedSize + appendBlockSize - 1) / appendBlockSize * appendBlockSize);
550+
(TotalSize + appendBlockSize - 1) / appendBlockSize * appendBlockSize);
551551
ui64 costNs = (adhesion? 0: drive.SeekTimeNs()) + drive.TimeForSizeNs((ui64)SlackSize, ChunkIdx, TDriveModel::OP_TYPE_WRITE);
552552
slackNs -= costNs;
553553
return true;
554554
} else {
555555
return false;
556556
}
557557
}
558+
559+
void Abort(TActorSystem* actorSystem) override {
560+
if (!AtomicSwap(&Aborted, true)) {
561+
actorSystem->Send(Sender, new NPDisk::TEvChunkWriteResult(NKikimrProto::CORRUPTED, ChunkIdx, Cookie, 0, "TChunkWrite is being aborted"));
562+
}
563+
}
558564
};
559565

560566
//
@@ -571,7 +577,9 @@ class TChunkWritePiece : public TRequestBase {
571577
, ChunkWrite(write)
572578
, PieceShift(pieceShift)
573579
, PieceSize(pieceSize)
574-
{}
580+
{
581+
ChunkWrite->RegisterPiece();
582+
}
575583

576584
ERequestType GetType() const override {
577585
return ERequestType::RequestChunkWritePiece;
@@ -581,6 +589,12 @@ class TChunkWritePiece : public TRequestBase {
581589
Cost = drive.SeekTimeNs() +
582590
drive.TimeForSizeNs((ui64)PieceSize, ChunkWrite->ChunkIdx, TDriveModel::OP_TYPE_WRITE);
583591
}
592+
593+
void Abort(TActorSystem* actorSystem) override {
594+
if (ChunkWrite) {
595+
ChunkWrite->AbortPiece(actorSystem);
596+
}
597+
}
584598
};
585599

586600
//

ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,61 @@ Y_UNIT_TEST_SUITE(TPDiskTest) {
956956
chunk1, 0, parts, nullptr, false, 0),
957957
NKikimrProto::ERROR);
958958
}
959+
960+
Y_UNIT_TEST(AllRequestsAreAnsweredOnPDiskRestart) {
961+
TActorTestContext testCtx({ false });
962+
TVDiskMock vdisk(&testCtx);
963+
964+
vdisk.InitFull();
965+
vdisk.ReserveChunk();
966+
vdisk.CommitReservedChunks();
967+
UNIT_ASSERT(vdisk.Chunks[EChunkState::COMMITTED].size() == 1);
968+
auto chunk = *vdisk.Chunks[EChunkState::COMMITTED].begin();
969+
970+
ui32 logBuffSize = 250;
971+
ui32 chunkBuffSize = 128_KB;
972+
973+
for (ui32 testCase = 0; testCase < 2; testCase++) {
974+
vdisk.InitFull();
975+
for (ui32 i = 0; i < 100; ++i) {
976+
testCtx.Send(new NPDisk::TEvLog(
977+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 0, TRcBuf(PrepareData(logBuffSize)), vdisk.GetLsnSeg(), nullptr));
978+
auto data = PrepareData(chunkBuffSize);
979+
auto parts = MakeIntrusive<NPDisk::TEvChunkWrite::TStrokaBackedUpParts>(data);
980+
testCtx.Send(new NPDisk::TEvChunkWrite(
981+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound,
982+
chunk, 0, parts, nullptr, false, 0));
983+
testCtx.Send(new NPDisk::TEvChunkRead(
984+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound,
985+
chunk, 0, chunkBuffSize, 0, nullptr));
986+
}
987+
988+
if (testCase & 1) {
989+
Cerr << "restart" << Endl;
990+
testCtx.RestartPDiskSync();
991+
}
992+
993+
for (ui32 i = 0; i < 100; ++i) {
994+
auto read = testCtx.Recv<NPDisk::TEvChunkReadResult>();
995+
}
996+
Cerr << "all chunk reads are received" << Endl;
997+
998+
for (ui32 i = 0; i < 100; ++i) {
999+
auto write = testCtx.Recv<NPDisk::TEvChunkWriteResult>();
1000+
}
1001+
Cerr << "all chunk writes are received" << Endl;
1002+
1003+
for (ui32 i = 0; i < 100;) {
1004+
auto result = testCtx.Recv<NPDisk::TEvLogResult>();
1005+
if (result->Status == NKikimrProto::OK) {
1006+
i += result->Results.size();
1007+
} else {
1008+
++i;
1009+
}
1010+
}
1011+
Cerr << "all log writes are received" << Endl;
1012+
}
1013+
}
9591014
}
9601015

9611016
Y_UNIT_TEST_SUITE(PDiskCompatibilityInfo) {

0 commit comments

Comments
 (0)