Skip to content

Commit e55564f

Browse files
authored
Implement configurable number of completion threads (#9715)
1 parent f2805c6 commit e55564f

10 files changed

+184
-104
lines changed

ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ class TPDisk;
6464

6565
IBlockDevice* CreateRealBlockDevice(const TString &path, TPDiskMon &mon,
6666
ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags,
67-
ui32 maxQueuedCompletionActions, TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk = nullptr);
67+
ui32 maxQueuedCompletionActions, ui32 completionThreadsCount, TIntrusivePtr<TSectorMap> sectorMap,
68+
TPDisk * const pdisk = nullptr);
6869
IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags,
6970
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk = nullptr);
7071

ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp

Lines changed: 141 additions & 80 deletions
Large diffs are not rendered by default.

ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_ut.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,11 +288,12 @@ Y_UNIT_TEST_SUITE(TBlockDeviceTest) {
288288
THolder<NPDisk::TBufferPool> bufferPool(NPDisk::CreateBufferPool(buffSize, bufferPoolSize, false, {}));
289289
ui64 inFlight = 128;
290290
ui32 maxQueuedCompletionActions = bufferPoolSize / 2;
291+
ui32 completionThreadsCount = 1;
291292
ui64 diskSize = 32_GB;
292293

293294
TIntrusivePtr<NPDisk::TSectorMap> sectorMap = new NPDisk::TSectorMap(diskSize, NSectorMap::DM_NONE);
294295
THolder<NPDisk::IBlockDevice> device(CreateRealBlockDevice("", *mon, 0, 0, inFlight, TDeviceMode::None,
295-
maxQueuedCompletionActions, sectorMap));
296+
maxQueuedCompletionActions, completionThreadsCount, sectorMap));
296297
device->Initialize(std::make_shared<TPDiskCtx>(creator.GetActorSystem()));
297298

298299
TAtomic counter = 0;

ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ struct TCompletionAction {
2323
// to BlockDevice from Exec() and it's more safe to use WhiteList to allow only
2424
// LogWrite and ChunkWrite to be executed from GetThread
2525
bool ShouldBeExecutedInCompletionThread = true;
26+
bool CanBeExecutedInAdditionalCompletionThread = false;
2627

2728
mutable NLWTrace::TOrbit Orbit;
2829
protected:

ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.cpp

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,16 @@ TCompletionChunkReadPart::TCompletionChunkReadPart(TPDisk *pDisk, TIntrusivePtr<
128128
, PayloadReadSize(payloadReadSize)
129129
, CommonBufferOffset(commonBufferOffset)
130130
, CumulativeCompletion(cumulativeCompletion)
131+
, ChunkNonce(CumulativeCompletion->GetChunkNonce())
131132
, Buffer(PDisk->BufferPool->Pop())
132133
, IsTheLastPart(isTheLastPart)
133134
, Span(std::move(span))
134135
{
136+
TCompletionAction::CanBeExecutedInAdditionalCompletionThread = true;
137+
138+
TBufferWithGaps *commonBuffer = CumulativeCompletion->GetCommonBuffer();
139+
Destination = commonBuffer->RawDataPtr(CommonBufferOffset, PayloadReadSize);
140+
135141
if (!IsTheLastPart) {
136142
CumulativeCompletion->AddPart();
137143
}
@@ -166,8 +172,6 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
166172
Read->Offset + CommonBufferOffset, PayloadReadSize, firstSector, lastSector, sectorOffset);
167173
Y_ABORT_UNLESS(isOk);
168174

169-
TBufferWithGaps *commonBuffer = CumulativeCompletion->GetCommonBuffer();
170-
ui8 *destination = commonBuffer->RawDataPtr(CommonBufferOffset, PayloadReadSize);
171175

172176
ui8* source = Buffer->Data();
173177

@@ -183,8 +187,6 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
183187
sectorOffset = 0;
184188
}
185189

186-
ui64 chunkNonce = CumulativeCompletion->GetChunkNonce();
187-
188190
ui32 beginBadUserOffset = 0xffffffff;
189191
ui32 endBadUserOffset = 0xffffffff;
190192
ui32 userSectorSize = format.SectorPayloadSize();
@@ -193,7 +195,7 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
193195

194196
TSectorRestorator restorator(false, 1, false,
195197
format, PDisk->PCtx.get(), &PDisk->Mon, PDisk->BufferPool.Get());
196-
ui64 lastNonce = Min((ui64)0, chunkNonce - 1);
198+
ui64 lastNonce = Min((ui64)0, ChunkNonce - 1);
197199
restorator.Restore(source, format.Offset(Read->ChunkIdx, sectorIdx), format.MagicDataChunk, lastNonce,
198200
Read->Owner);
199201

@@ -211,7 +213,7 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
211213
<< " for owner# " << Read->Owner
212214
<< " beginBadUserOffet# " << beginBadUserOffset << " endBadUserOffset# " << endBadUserOffset
213215
<< " due to multiple sectors with incorrect hashes. Marker# BPC001");
214-
commonBuffer->AddGap(beginBadUserOffset, endBadUserOffset);
216+
CumulativeCompletion->AddGap(beginBadUserOffset, endBadUserOffset);
215217
beginBadUserOffset = 0xffffffff;
216218
endBadUserOffset = 0xffffffff;
217219
}
@@ -221,35 +223,35 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
221223

222224
// Decrypt data
223225
if (beginBadUserOffset != 0xffffffff) {
224-
memset(destination, 0, sectorPayloadSize);
226+
memset(Destination, 0, sectorPayloadSize);
225227
} else {
226228
TDataSectorFooter *footer = (TDataSectorFooter*) (source + format.SectorSize - sizeof(TDataSectorFooter));
227-
if (footer->Nonce != chunkNonce + sectorIdx) {
229+
if (footer->Nonce != ChunkNonce + sectorIdx) {
228230
ui32 userOffset = sectorIdx * userSectorSize;
229231
LOG_INFO_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PCtx->PDiskId
230232
<< " ReqId# " << Read->ReqId
231233
<< " Can't read chunk chunkIdx# " << Read->ChunkIdx
232234
<< " for owner# " << Read->Owner
233-
<< " nonce mismatch: expected# " << (ui64)(chunkNonce + sectorIdx)
235+
<< " nonce mismatch: expected# " << (ui64)(ChunkNonce + sectorIdx)
234236
<< ", on-disk# " << (ui64)footer->Nonce
235237
<< " for userOffset# " << userOffset
236238
<< " ! Marker# BPC002");
237239
if (beginBadUserOffset == 0xffffffff) {
238240
beginBadUserOffset = userOffset;
239241
}
240242
endBadUserOffset = beginUserOffset + userSectorSize;
241-
memset(destination, 0, sectorPayloadSize);
243+
memset(Destination, 0, sectorPayloadSize);
242244
} else {
243245
cypher.StartMessage(footer->Nonce);
244-
if (sectorOffset > 0 || intptr_t(destination) % 32) {
246+
if (sectorOffset > 0 || intptr_t(Destination) % 32) {
245247
cypher.InplaceEncrypt(source, sectorOffset + sectorPayloadSize);
246248
if (CommonBufferOffset == 0 || !IsTheLastPart) {
247-
memcpy(destination, source + sectorOffset, sectorPayloadSize);
249+
memcpy(Destination, source + sectorOffset, sectorPayloadSize);
248250
} else {
249-
memcpy(destination, source, sectorPayloadSize);
251+
memcpy(Destination, source, sectorPayloadSize);
250252
}
251253
} else {
252-
cypher.Encrypt(destination, source, sectorPayloadSize);
254+
cypher.Encrypt(Destination, source, sectorPayloadSize);
253255
}
254256
if (CanarySize > 0) {
255257
ui32 canaryPosition = sectorOffset + sectorPayloadSize;
@@ -259,7 +261,7 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
259261
}
260262
}
261263
}
262-
destination += sectorPayloadSize;
264+
Destination += sectorPayloadSize;
263265
source += format.SectorSize;
264266
PayloadReadSize -= sectorPayloadSize;
265267
sectorPayloadSize = Min(format.SectorPayloadSize(), PayloadReadSize);
@@ -273,7 +275,7 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
273275
<< " for owner# " << Read->Owner
274276
<< " beginBadUserOffet# " << beginBadUserOffset << " endBadUserOffset# " << endBadUserOffset
275277
<< " due to multiple sectors with incorrect hashes/nonces. Marker# BPC003");
276-
commonBuffer->AddGap(beginBadUserOffset, endBadUserOffset);
278+
CumulativeCompletion->AddGap(beginBadUserOffset, endBadUserOffset);
277279
beginBadUserOffset = 0xffffffff;
278280
endBadUserOffset = 0xffffffff;
279281
}
@@ -407,4 +409,3 @@ void TChunkTrimCompletion::Exec(TActorSystem *actorSystem) {
407409

408410
} // NPDisk
409411
} // NKikimr
410-

ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ class TCompletionChunkRead : public TCompletionAction {
169169
TPDisk *PDisk;
170170
TIntrusivePtr<TChunkRead> Read;
171171
TBufferWithGaps CommonBuffer;
172+
TMutex CommonBufferMutex; // used to protect CommonBuffer when gaps are being add
172173
TAtomic PartsPending;
173174
TAtomic Deletes;
174175
std::function<void()> OnDestroy;
@@ -206,6 +207,11 @@ class TCompletionChunkRead : public TCompletionAction {
206207
return &CommonBuffer;
207208
}
208209

210+
void AddGap(ui32 start, ui32 end) {
211+
TGuard<TMutex> g(CommonBufferMutex);
212+
CommonBuffer.AddGap(start, end);
213+
}
214+
209215
ui64 GetChunkNonce() {
210216
return ChunkNonce;
211217
}
@@ -228,12 +234,14 @@ class TCompletionChunkReadPart : public TCompletionAction {
228234
ui64 PayloadReadSize;
229235
ui64 CommonBufferOffset;
230236
TCompletionChunkRead *CumulativeCompletion;
237+
ui64 ChunkNonce;
238+
ui8 *Destination = nullptr;
231239
TBuffer::TPtr Buffer;
232240
bool IsTheLastPart;
233241
NWilson::TSpan Span;
234242
public:
235243
TCompletionChunkReadPart(TPDisk *pDisk, TIntrusivePtr<TChunkRead> &read, ui64 rawReadSize, ui64 payloadReadSize,
236-
ui64 commonBufferOffset, TCompletionChunkRead *cumulativeCompletion, bool isTheLastPart,
244+
ui64 commonBufferOffset, TCompletionChunkRead *cumulativeCompletion, bool isTheLastPart,
237245
NWilson::TSpan&& span);
238246

239247

ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ struct TPDiskConfig : public TThrRefBase {
156156

157157
NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN;
158158

159+
ui32 CompletionThreadsCount = 1;
160+
159161
bool MetadataOnly = false;
160162

161163
TPDiskConfig(ui64 pDiskGuid, ui32 pdiskId, ui64 pDiskCategory)
@@ -310,6 +312,7 @@ struct TPDiskConfig : public TThrRefBase {
310312
str << " YellowLogChunksMultiplier# " << YellowLogChunksMultiplier << x;
311313
str << " MaxMetadataMegabytes# " << MaxMetadataMegabytes << x;
312314
str << " SpaceColorBorder# " << SpaceColorBorder << x;
315+
str << " CompletionThreadsCount# " << CompletionThreadsCount << x;
313316
str << "}";
314317
return str.Str();
315318
}
@@ -394,8 +397,11 @@ struct TPDiskConfig : public TThrRefBase {
394397
limit = Max<ui32>(13, limit);
395398
ChunkBaseLimit = limit;
396399
}
400+
401+
if (cfg->HasCompletionThreadsCount()) {
402+
CompletionThreadsCount = cfg->GetCompletionThreadsCount();
403+
}
397404
}
398405
};
399406

400407
} // NKikimr
401-

ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ TDriveEstimator::TDriveEstimator(const TString filename)
237237
, ActorSystemCreator(new TActorSystemCreator)
238238
, ActorSystem(ActorSystemCreator->GetActorSystem())
239239
, QueueDepth(4)
240-
, Device(CreateRealBlockDevice(filename, PDiskMon, 50, 0, QueueDepth, TDeviceMode::LockFile, 128, nullptr, nullptr))
240+
, Device(CreateRealBlockDevice(filename, PDiskMon, 50, 0, QueueDepth, TDeviceMode::LockFile, 128, 1, nullptr, nullptr))
241241
, BufferPool(CreateBufferPool(BufferSize, 1, false, {}))
242242
, Buffer(BufferPool->Pop())
243243
{

ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ TPDisk::TPDisk(std::shared_ptr<TPDiskCtx> pCtx, const TIntrusivePtr<TPDiskConfig
4747
, BlockDevice(CreateRealBlockDevice(cfg->GetDevicePath(), Mon,
4848
HPCyclesMs(ReorderingMs), DriveModel.SeekTimeNs(), cfg->DeviceInFlight,
4949
TDeviceMode::LockFile | (cfg->UseSpdkNvmeDriver ? TDeviceMode::UseSpdk : 0),
50-
cfg->MaxQueuedCompletionActions, cfg->SectorMap, this))
50+
cfg->MaxQueuedCompletionActions, cfg->CompletionThreadsCount, cfg->SectorMap, this))
5151
, Cfg(cfg)
5252
, CreationTime(TInstant::Now())
5353
, ExpectedSlotCount(cfg->ExpectedSlotCount)

ydb/core/protos/blobstorage_pdisk_config.proto

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,5 +91,6 @@ message TPDiskConfig {
9191
optional uint64 ExpectedSlotCount = 2001; // Number of slots to calculate per-vdisk disk space limit.
9292

9393
optional uint32 ChunkBaseLimit = 2002; // Free chunk permille that triggers Cyan color (e.g. 100 is 10%). Between 130 (default) and 13.
94-
};
9594

95+
optional uint32 CompletionThreadsCount = 2003;
96+
};

0 commit comments

Comments
 (0)