Skip to content

Commit 7fc4c72

Browse files
Inefficient work with PQ Cache L2 (#15700)
1 parent 470ff0b commit 7fc4c72

File tree

6 files changed

+75
-54
lines changed

6 files changed

+75
-54
lines changed

ydb/core/persqueue/cache_eviction.h

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77
#include <ydb/core/persqueue/events/internal.h>
88
#include <ydb/core/persqueue/map_subrange.h>
99

10-
namespace NKikimr {
11-
namespace NPQ {
12-
10+
namespace NKikimr::NPQ {
1311
struct TBlobId {
1412
TPartitionId Partition;
1513
ui64 Offset;
@@ -26,10 +24,6 @@ namespace NPQ {
2624
{
2725
}
2826

29-
bool operator==(const TBlobId& r) const {
30-
return Partition.IsEqual(r.Partition) && Offset == r.Offset && PartNo == r.PartNo;
31-
}
32-
3327
bool operator<(const TBlobId& r) const {
3428
auto makeTuple = [](const TBlobId& v) {
3529
return std::make_tuple(v.Partition, v.Offset, v.PartNo, v.Count, v.InternalPartsCount);
@@ -38,11 +32,22 @@ namespace NPQ {
3832
return makeTuple(*this) < makeTuple(r);
3933
}
4034

35+
bool operator==(const TBlobId& r) const {
36+
auto makeTuple = [](const TBlobId& v) {
37+
return std::make_tuple(v.Partition, v.Offset, v.PartNo, v.Count, v.InternalPartsCount);
38+
};
39+
40+
return makeTuple(*this) == makeTuple(r);
41+
}
42+
4143
ui64 Hash() const {
42-
return Hash128to32((ui64(Partition.InternalPartitionId) << 17) + (Partition.IsSupportivePartition() ? 0 : (1 << 16)) + PartNo, Offset);
44+
ui64 hash = Hash128to32((ui64(Partition.InternalPartitionId) << 17) + (Partition.IsSupportivePartition() ? 0 : (1 << 16)) + PartNo, Offset);
45+
hash = Hash128to32(hash, Count);
46+
hash = Hash128to32(hash, InternalPartsCount);
47+
return hash;
4348
}
4449
};
45-
}}
50+
}
4651

4752
template <>
4853
struct THash<NKikimr::NPQ::TBlobId> {
@@ -51,8 +56,10 @@ struct THash<NKikimr::NPQ::TBlobId> {
5156
}
5257
};
5358

54-
namespace NKikimr {
55-
namespace NPQ {
59+
namespace NKikimr::NPQ {
60+
inline TBlobId MakeBlobId(const TPartitionId& partitionId, const TRequestedBlob& blob) {
61+
return {partitionId, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount};
62+
}
5663

5764
struct TKvRequest {
5865
enum ERequestType {
@@ -89,7 +96,9 @@ namespace NPQ {
8996
, MetadataWritesCount(0)
9097
{}
9198

92-
TBlobId GetBlobId(ui32 pos) const { return TBlobId(Partition, Blobs[pos].Offset, Blobs[pos].PartNo, Blobs[pos].Count, Blobs[pos].InternalPartsCount); }
99+
TBlobId GetBlobId(ui32 pos) const {
100+
return NPQ::MakeBlobId(Partition, Blobs[pos]);
101+
}
93102

94103
THolder<TEvKeyValue::TEvRequest> MakeKvRequest() const
95104
{
@@ -262,7 +271,7 @@ namespace NPQ {
262271
for (const auto& blob : kvReq.Blobs) {
263272
// Touching blobs in L2. We don't need data here
264273
auto& blobs = blob.Cached ? reqData->RequestedBlobs : reqData->MissedBlobs;
265-
blobs.emplace_back(kvReq.Partition, blob.Offset, blob.PartNo, nullptr);
274+
blobs.emplace_back(kvReq.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, nullptr);
266275
}
267276

268277
auto l2Request = MakeHolder<TEvPqCache::TEvCacheL2Request>(reqData.Release());
@@ -285,11 +294,11 @@ namespace NPQ {
285294
void SaveBlobs(const TKvRequest& kvReq, TCacheL2Request& reqData, const TActorContext& ctx)
286295
{
287296
for (const TRequestedBlob& reqBlob : kvReq.Blobs) {
288-
TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
297+
TBlobId blob = NPQ::MakeBlobId(kvReq.Partition, reqBlob);
289298

290299
// there could be a new blob with same id (for big messages)
291300
if (RemoveExists(ctx, blob)) {
292-
reqData.RemovedBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, nullptr);
301+
reqData.RemovedBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount, nullptr);
293302
}
294303

295304
auto cached = std::make_shared<TCacheValue>(reqBlob.Value, ctx.SelfID, TAppData::TimeProvider->Now());
@@ -299,15 +308,15 @@ namespace NPQ {
299308
if (L1Strategy)
300309
L1Strategy->SaveHeadBlob(blob);
301310

302-
reqData.StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached);
311+
reqData.StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, blob.Count, blob.InternalPartsCount, cached);
303312

304313
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Caching head blob in L1. Partition "
305314
<< blob.Partition << " offset " << blob.Offset << " count " << blob.Count
306315
<< " size " << reqBlob.Value.size() << " actorID " << ctx.SelfID);
307316
}
308317
}
309318

310-
TBlobId MakeBlobId(const TString& s)
319+
static TBlobId MakeBlobId(const TString& s)
311320
{
312321
if (s.length() == TKeyPrefix::MarkPosition()) {
313322
TPartitionId partitionId;
@@ -327,8 +336,8 @@ namespace NPQ {
327336
TBlobId newBlob = MakeBlobId(newKey);
328337
if (RenameExists(ctx, oldBlob, newBlob)) {
329338
reqData.RenamedBlobs.emplace_back(std::piecewise_construct,
330-
std::make_tuple(oldBlob.Partition, oldBlob.Offset, oldBlob.PartNo, nullptr),
331-
std::make_tuple(newBlob.Partition, newBlob.Offset, newBlob.PartNo, nullptr));
339+
std::make_tuple(oldBlob.Partition, oldBlob.Offset, oldBlob.PartNo, oldBlob.Count, oldBlob.InternalPartsCount, nullptr),
340+
std::make_tuple(newBlob.Partition, newBlob.Offset, newBlob.PartNo, newBlob.Count, newBlob.InternalPartsCount, nullptr));
332341

333342
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Renaming head blob in L1. Old partition "
334343
<< oldBlob.Partition << " old offset " << oldBlob.Offset << " old count " << oldBlob.Count
@@ -348,7 +357,7 @@ namespace NPQ {
348357
for (auto i = lowerBound; i != upperBound; ++i) {
349358
const auto& [blob, value] = *i;
350359

351-
reqData.RemovedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, nullptr);
360+
reqData.RemovedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, nullptr);
352361
Counters.Dec(value);
353362

354363
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Deleting head blob in L1. Partition "
@@ -372,7 +381,7 @@ namespace NPQ {
372381
continue;
373382

374383
const TRequestedBlob& reqBlob = kvReq.Blobs[i];
375-
TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
384+
TBlobId blob = NPQ::MakeBlobId(kvReq.Partition, reqBlob);
376385
{
377386
TValueL1 value;
378387
if (CheckExists(ctx, blob, value)) {
@@ -386,7 +395,7 @@ namespace NPQ {
386395
Cache[blob] = valL1; // weak
387396
Counters.Inc(valL1);
388397

389-
reqData->StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached);
398+
reqData->StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount, cached);
390399

391400
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Prefetched blob in L1. Partition "
392401
<< blob.Partition << " offset " << blob.Offset << " count " << blob.Count
@@ -441,7 +450,7 @@ namespace NPQ {
441450
void PrepareTouch(const TActorContext& ctx, THolder<TCacheL2Request>& reqData, const TDeque<TBlobId>& used)
442451
{
443452
for (auto& blob : used) {
444-
reqData->ExpectedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, nullptr);
453+
reqData->ExpectedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, nullptr);
445454

446455
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Touching blob. Partition "
447456
<< blob.Partition << " offset " << blob.Offset << " count " << blob.Count);
@@ -484,7 +493,7 @@ namespace NPQ {
484493
++numCached;
485494
continue;
486495
}
487-
TBlobId blobId(kvReq.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount);
496+
TBlobId blobId = NPQ::MakeBlobId(kvReq.Partition, blob);
488497
TCacheValue::TPtr cached = GetValue(ctx, blobId);
489498
if (cached) {
490499
++numCached;
@@ -566,5 +575,4 @@ namespace NPQ {
566575
}
567576
};
568577

569-
} //NPQ
570-
} //NKikimr
578+
} // NKikimr::NPQ

ydb/core/persqueue/pq_l2_cache.cpp

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ void TPersQueueCacheL2::SendResponses(const TActorContext& ctx, const THashMap<T
5858
}
5959

6060
Y_ABORT_UNLESS(key.TabletId == resp->TabletId, "PQ L2. Multiple topics in one PQ tablet.");
61-
resp->Removed.emplace_back(key.Partition, key.Offset, key.PartNo, evicted);
61+
resp->Removed.emplace_back(key.Partition, key.Offset, key.PartNo, key.Count, key.InternalPartsCount, evicted);
6262

6363
RetentionTime = now - evicted->GetAccessTime();
6464
if (RetentionTime < KeepTime)
@@ -91,8 +91,7 @@ void TPersQueueCacheL2::AddBlobs(const TActorContext& ctx, ui64 tabletId, const
9191
TKey key(tabletId, blob);
9292
// PQ tablet could send some data twice (if it's restored after die)
9393
if (Cache.FindWithoutPromote(key) != Cache.End()) {
94-
LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Same blob insertion. Tablet '" << tabletId
95-
<< "' partition " << key.Partition << " offset " << key.Offset << " size " << blob.Value->DataSize());
94+
LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Same blob insertion. " << key.ToString() << " size " << blob.Value->DataSize());
9695
continue;
9796
}
9897

@@ -108,19 +107,17 @@ void TPersQueueCacheL2::AddBlobs(const TActorContext& ctx, ui64 tabletId, const
108107
tabletId, Cache.Size(), CurrentSize, MaxSize, blob.Value->DataSize(), blobs.size(), outEvicted.size());
109108

110109
TCacheValue::TPtr value = oldest.Value();
111-
outEvicted.insert({oldest.Key(), value});
110+
outEvicted.emplace(oldest.Key(), value);
112111
if (value->GetAccessCount() == 0)
113112
++numUnused;
114113

115-
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Evicting blob. Tablet '" << tabletId
116-
<< "' partition " << oldest.Key().Partition << " offset " << oldest.Key().Offset << " size " << value->DataSize());
114+
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Evicting blob. " << oldest.Key().ToString() << " size " << value->DataSize());
117115

118116
CurrentSize -= value->DataSize();
119117
Cache.Erase(oldest);
120118
}
121119

122-
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Adding blob. Tablet '" << tabletId
123-
<< "' partition " << blob.Partition << " offset " << blob.Offset << " size " << blob.Value->DataSize());
120+
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Adding blob. " << key.ToString() << " size " << blob.Value->DataSize());
124121

125122
Cache.Insert(key, blob.Value);
126123
}
@@ -147,11 +144,9 @@ void TPersQueueCacheL2::RemoveBlobs(const TActorContext& ctx, ui64 tabletId, con
147144
if ((*it)->GetAccessCount() == 0)
148145
++numUnused;
149146
Cache.Erase(it);
150-
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Removed. Tablet '" << tabletId
151-
<< "' partition " << blob.Partition << " offset " << blob.Offset);
147+
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Removed. " << key.ToString());
152148
} else {
153-
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in remove. Tablet '" << tabletId
154-
<< "' partition " << blob.Partition << " offset " << blob.Offset);
149+
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in remove. " << key.ToString());
155150
}
156151
}
157152

@@ -171,6 +166,7 @@ void TPersQueueCacheL2::RenameBlobs(const TActorContext& ctx, ui64 tabletId,
171166

172167
for (const auto& [oldBlob, newBlob] : blobs) {
173168
TKey oldKey(tabletId, oldBlob);
169+
174170
auto it = Cache.FindWithoutPromote(oldKey);
175171
if (it == Cache.End()) {
176172
continue;
@@ -180,9 +176,7 @@ void TPersQueueCacheL2::RenameBlobs(const TActorContext& ctx, ui64 tabletId,
180176
Cache.Insert(newKey, *it);
181177
Cache.Erase(it);
182178

183-
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Renamed. Tablet '" << tabletId
184-
<< "' old partition " << oldBlob.Partition << " old offset " << oldBlob.Offset
185-
<< " new partition " << newBlob.Partition << " new offset " << newBlob.Offset);
179+
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Renamed. old " << oldKey.ToString() << ", new " << newKey.ToString());
186180
}
187181
}
188182

@@ -195,11 +189,9 @@ void TPersQueueCacheL2::TouchBlobs(const TActorContext& ctx, ui64 tabletId, cons
195189
auto it = Cache.Find(key);
196190
if (it != Cache.End()) {
197191
(*it)->Touch(now);
198-
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Touched. Tablet '" << tabletId
199-
<< "' partition " << blob.Partition << " offset " << blob.Offset);
192+
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Touched. " << key.ToString());
200193
} else {
201-
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in touch. Tablet '" << tabletId
202-
<< "' partition " << blob.Partition << " offset " << blob.Offset);
194+
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in touch. " << key.ToString());
203195
}
204196
}
205197

ydb/core/persqueue/pq_l2_cache.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,28 +45,47 @@ class TPersQueueCacheL2 : public TActorBootstrapped<TPersQueueCacheL2> {
4545
TPartitionId Partition;
4646
ui64 Offset;
4747
ui16 PartNo;
48+
ui32 Count;
49+
ui16 InternalPartsCount;
4850

4951
TKey(ui64 tabletId, const TCacheBlobL2& blob)
5052
: TabletId(tabletId)
5153
, Partition(blob.Partition)
5254
, Offset(blob.Offset)
5355
, PartNo(blob.PartNo)
56+
, Count(blob.Count)
57+
, InternalPartsCount(blob.InternalPartsCount)
5458
{
5559
KeyHash = Hash128to32(TabletId, (static_cast<ui64>(Partition.InternalPartitionId) << 17) + PartNo + (Partition.IsSupportivePartition() ? 0 : (1 << 16)));
5660
KeyHash = Hash128to32(KeyHash, Offset);
61+
KeyHash = Hash128to32(KeyHash, Count);
62+
KeyHash = Hash128to32(KeyHash, InternalPartsCount);
5763
}
5864

59-
bool operator == (const TKey& key) const {
65+
bool operator ==(const TKey& key) const {
6066
return TabletId == key.TabletId &&
6167
Partition == key.Partition &&
6268
Offset == key.Offset &&
63-
PartNo == key.PartNo;
69+
PartNo == key.PartNo &&
70+
Count == key.Count &&
71+
InternalPartsCount == key.InternalPartsCount;
6472
}
6573

6674
ui64 Hash() const noexcept {
6775
return KeyHash;
6876
}
6977

78+
TString ToString() const {
79+
TString s;
80+
s += "Tablet '"; s += ::ToString(TabletId); s += "'";
81+
s += " partition "; s += Partition.ToString();
82+
s += " offset "; s += ::ToString(Offset);
83+
s += " partno "; s += ::ToString(PartNo);
84+
s += " count "; s += ::ToString(Count);
85+
s += " parts "; s += ::ToString(InternalPartsCount);
86+
return s;
87+
}
88+
7089
private:
7190
ui64 KeyHash;
7291
};

ydb/core/persqueue/pq_l2_service.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ struct TCacheBlobL2 {
7272
TPartitionId Partition;
7373
ui64 Offset;
7474
ui16 PartNo;
75+
ui32 Count;
76+
ui16 InternalPartsCount;
7577
TCacheValue::TPtr Value;
7678
};
7779

ydb/core/persqueue/read.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ namespace NPQ {
5656
bool CheckInProgress(const TActorContext& ctx, TKvRequest& kvRequest)
5757
{
5858
for (const TRequestedBlob& reqBlob : kvRequest.Blobs) {
59-
TBlobId blob(kvRequest.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
59+
TBlobId blob = MakeBlobId(kvRequest.Partition, reqBlob);
6060
auto it = ReadsInProgress.find(blob);
6161
if (it != ReadsInProgress.end()) {
6262
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Read request is blocked. Partition "
@@ -73,7 +73,7 @@ namespace NPQ {
7373
{
7474
TVector<TKvRequest> unblocked;
7575
for (const TRequestedBlob& reqBlob : blocker.Blobs) {
76-
TBlobId blob(blocker.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
76+
TBlobId blob = MakeBlobId(blocker.Partition, reqBlob);
7777
ReadsInProgress.erase(blob);
7878

7979
auto it = BlockedReads.find(blob);
@@ -352,8 +352,8 @@ namespace NPQ {
352352
THolder<TCacheL2Response> resp(ev->Get()->Data.Release());
353353
Y_ABORT_UNLESS(resp->TabletId == TabletId);
354354

355-
for (TCacheBlobL2& blob : resp->Removed)
356-
Cache.RemoveEvictedBlob(ctx, TBlobId(blob.Partition, blob.Offset, blob.PartNo, 0, 0), blob.Value);
355+
for (const TCacheBlobL2& blob : resp->Removed)
356+
Cache.RemoveEvictedBlob(ctx, TBlobId(blob.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount), blob.Value);
357357

358358
if (resp->Overload) {
359359
LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE,

ydb/services/persqueue_v1/persqueue_ut.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2825,9 +2825,9 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) {
28252825
Cerr << ">>>>> 2" << Endl << Flush;
28262826
auto info16 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 16, 16, "user"}, 16);
28272827

2828-
UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromCache, 3);
2829-
UNIT_ASSERT_VALUES_EQUAL(info16.BlobsFromCache, 2);
2830-
UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromDisk + info16.BlobsFromDisk, 0);
2828+
UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromCache, 2);
2829+
UNIT_ASSERT_VALUES_EQUAL(info16.BlobsFromCache, 1);
2830+
UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromDisk + info16.BlobsFromDisk, 2);
28312831

28322832
for (ui32 i = 0; i < 8; ++i)
28332833
server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", 32+i}, value);

0 commit comments

Comments
 (0)