Skip to content

Commit 9b97c8f

Browse files
authored
Fix unimportant deadlock in TBlockDevice and add new UT (#9991)
1 parent 1faa700 commit 9b97c8f

File tree

2 files changed

+59
-5
lines changed

2 files changed

+59
-5
lines changed

ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,14 @@ class TRealBlockDevice : public IBlockDevice {
111111
// Schedule action execution
112112
// pass action = nullptr to quit
113113
void Schedule(TCompletionAction *action) noexcept {
114-
TAtomicBase queueActions = AtomicIncrement(QueuedActions);
115-
if (queueActions >= MaxQueuedActions) {
114+
if (AtomicGet(QueuedActions) >= MaxQueuedActions) {
116115
Device.Mon.L7.Set(true, AtomicGetAndIncrement(SeqnoL7));
117116
while (AtomicGet(QueuedActions) >= MaxQueuedActions) {
118117
SpinLockPause();
119118
}
120119
Device.Mon.L7.Set(false, AtomicGetAndIncrement(SeqnoL7));
121120
}
121+
AtomicIncrement(QueuedActions);
122122
CompletionActions.Push(action);
123123
return;
124124
}

ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_ut.cpp

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,23 +112,27 @@ class TRabbit : public NPDisk::TCompletionAction {
112112
}
113113
};
114114

115-
class TCompletionCounter : public NPDisk::TCompletionAction {
115+
class TCompletionWorkerWithCounter : public NPDisk::TCompletionAction {
116116
public:
117-
TCompletionCounter(TAtomic& counter)
117+
TCompletionWorkerWithCounter(TAtomic& counter, TDuration workTime = TDuration::Zero())
118118
: Counter(counter)
119+
, WorkTime(workTime)
119120
{}
120121

121122
void Exec(TActorSystem *) override {
123+
Sleep(WorkTime);
122124
AtomicIncrement(Counter);
123125
delete this;
124126
}
125127

126128
void Release(TActorSystem *) override {
129+
AtomicIncrement(Counter);
127130
delete this;
128131
}
129132

130133
private:
131134
TAtomic& Counter;
135+
TDuration WorkTime;
132136
};
133137

134138
static TString MakeDatabasePath(const char *dir) {
@@ -250,7 +254,7 @@ void RunWriteTestWithSectorMap(NPDisk::NSectorMap::EDiskMode diskMode, ui32 disk
250254
TAtomic expectedWrites = diskSize / (offsetIncrement);
251255

252256
for (ui64 offset = 0; offset < diskSize; offset += offsetIncrement) {
253-
device->PwriteAsync(data.Get(), data.Size(), offset, new TCompletionCounter(completedWrites), NPDisk::TReqId(NPDisk::TReqId::Test1, 0), {});
257+
device->PwriteAsync(data.Get(), data.Size(), offset, new TCompletionWorkerWithCounter(completedWrites), NPDisk::TReqId(NPDisk::TReqId::Test1, 0), {});
254258
}
255259

256260
WaitForValue(&completedWrites, TIMEOUT, expectedWrites);
@@ -341,6 +345,56 @@ Y_UNIT_TEST_SUITE(TBlockDeviceTest) {
341345
Ctest << "Done" << Endl;
342346
}
343347

348+
Y_UNIT_TEST(WriteReadRestart) {
349+
using namespace NPDisk;
350+
351+
TActorSystemCreator creator;
352+
auto start = TMonotonic::Now();
353+
while ((TMonotonic::Now() - start).Seconds() < 5) {
354+
const TIntrusivePtr<::NMonitoring::TDynamicCounters> counters = new ::NMonitoring::TDynamicCounters;
355+
THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr));
356+
357+
ui32 buffSize = 64_KB;
358+
ui32 bufferPoolSize = 512;
359+
THolder<NPDisk::TBufferPool> bufferPool(NPDisk::CreateBufferPool(buffSize, bufferPoolSize, false, {}));
360+
ui64 inFlight = 128;
361+
ui32 maxQueuedCompletionActions = bufferPoolSize / 2;
362+
ui64 diskSize = 32_GB;
363+
364+
TIntrusivePtr<NPDisk::TSectorMap> sectorMap = new NPDisk::TSectorMap(diskSize, NSectorMap::DM_NONE);
365+
THolder<NPDisk::IBlockDevice> device(CreateRealBlockDevice("", *mon, 0, 0, inFlight, TDeviceMode::None,
366+
maxQueuedCompletionActions, sectorMap));
367+
device->Initialize(std::make_shared<TPDiskCtx>(creator.GetActorSystem()));
368+
369+
TAtomic counter = 0;
370+
const i64 totalRequests = 500;
371+
for (i64 i = 0; i < totalRequests; i++) {
372+
auto *completion = new TCompletionWorkerWithCounter(counter, TDuration::MicroSeconds(100));
373+
NPDisk::TBuffer::TPtr buffer(bufferPool->Pop());
374+
buffer->FlushAction = completion;
375+
auto* data = buffer->Data();
376+
switch (RandomNumber<ui32>(3)) {
377+
case 0:
378+
device->PreadAsync(data, 32_KB, 0, buffer.Release(), TReqId(), nullptr);
379+
break;
380+
case 1:
381+
device->PwriteAsync(data, 32_KB, 0, buffer.Release(), TReqId(), nullptr);
382+
break;
383+
case 2:
384+
device->FlushAsync(completion, TReqId());
385+
buffer->FlushAction = nullptr;
386+
break;
387+
default:
388+
break;
389+
}
390+
}
391+
392+
Cerr << AtomicGet(counter) << Endl;
393+
device.Destroy();
394+
UNIT_ASSERT(AtomicGet(counter) == totalRequests);
395+
}
396+
}
397+
344398
/*
345399
Y_UNIT_TEST(TestRabbitCompletionAction) {
346400
const TIntrusivePtr<::NMonitoring::TDynamicCounters> counters = new ::NMonitoring::TDynamicCounters;

0 commit comments

Comments
 (0)