Inefficient work with PQ Cache L2 (#15700)

Alek5andr-Kotov · web-flow · commit 7fc4c72b42f1 · 2025-03-13T20:49:39.000+03:00
diff --git a/ydb/core/persqueue/cache_eviction.h b/ydb/core/persqueue/cache_eviction.h
@@ -7,9 +7,7 @@
 #include <ydb/core/persqueue/events/internal.h>
 #include <ydb/core/persqueue/map_subrange.h>
 
-namespace NKikimr {
-namespace NPQ {
-
+namespace NKikimr::NPQ {
     struct TBlobId {
         TPartitionId Partition;
         ui64 Offset;
@@ -26,10 +24,6 @@ namespace NPQ {
         {
         }
 
-        bool operator==(const TBlobId& r) const {
-            return Partition.IsEqual(r.Partition) && Offset == r.Offset && PartNo == r.PartNo;
-        }
-
         bool operator<(const TBlobId& r) const {
             auto makeTuple = [](const TBlobId& v) {
                 return std::make_tuple(v.Partition, v.Offset, v.PartNo, v.Count, v.InternalPartsCount);
@@ -38,11 +32,22 @@ namespace NPQ {
             return makeTuple(*this) < makeTuple(r);
         }
 
+        bool operator==(const TBlobId& r) const {
+            auto makeTuple = [](const TBlobId& v) {
+                return std::make_tuple(v.Partition, v.Offset, v.PartNo, v.Count, v.InternalPartsCount);
+            };
+
+            return makeTuple(*this) == makeTuple(r);
+        }
+
         ui64 Hash() const {
-            return Hash128to32((ui64(Partition.InternalPartitionId) << 17) + (Partition.IsSupportivePartition() ? 0 : (1 << 16)) + PartNo, Offset);
+            ui64 hash = Hash128to32((ui64(Partition.InternalPartitionId) << 17) + (Partition.IsSupportivePartition() ? 0 : (1 << 16)) + PartNo, Offset);
+            hash = Hash128to32(hash, Count);
+            hash = Hash128to32(hash, InternalPartsCount);
+            return hash;
         }
     };
-}}
+}
 
 template <>
 struct THash<NKikimr::NPQ::TBlobId> {
@@ -51,8 +56,10 @@ struct THash<NKikimr::NPQ::TBlobId> {
     }
 };
 
-namespace NKikimr {
-namespace NPQ {
+namespace NKikimr::NPQ {
+    inline TBlobId MakeBlobId(const TPartitionId& partitionId, const TRequestedBlob& blob) {
+        return {partitionId, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount};
+    }
 
     struct TKvRequest {
         enum ERequestType {
@@ -89,7 +96,9 @@ namespace NPQ {
         , MetadataWritesCount(0)
         {}
 
-        TBlobId GetBlobId(ui32 pos) const { return TBlobId(Partition, Blobs[pos].Offset, Blobs[pos].PartNo, Blobs[pos].Count, Blobs[pos].InternalPartsCount); }
+        TBlobId GetBlobId(ui32 pos) const {
+            return NPQ::MakeBlobId(Partition, Blobs[pos]);
+        }
 
         THolder<TEvKeyValue::TEvRequest> MakeKvRequest() const
         {
@@ -262,7 +271,7 @@ namespace NPQ {
             for (const auto& blob : kvReq.Blobs) {
                 // Touching blobs in L2. We don't need data here
                 auto& blobs = blob.Cached ? reqData->RequestedBlobs : reqData->MissedBlobs;
-                blobs.emplace_back(kvReq.Partition, blob.Offset, blob.PartNo, nullptr);
+                blobs.emplace_back(kvReq.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, nullptr);
             }
 
             auto l2Request = MakeHolder<TEvPqCache::TEvCacheL2Request>(reqData.Release());
@@ -285,11 +294,11 @@ namespace NPQ {
         void SaveBlobs(const TKvRequest& kvReq, TCacheL2Request& reqData, const TActorContext& ctx)
         {
             for (const TRequestedBlob& reqBlob : kvReq.Blobs) {
-                TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
+                TBlobId blob = NPQ::MakeBlobId(kvReq.Partition, reqBlob);
 
                 // there could be a new blob with same id (for big messages)
                 if (RemoveExists(ctx, blob)) {
-                    reqData.RemovedBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, nullptr);
+                    reqData.RemovedBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount, nullptr);
                 }
 
                 auto cached = std::make_shared<TCacheValue>(reqBlob.Value, ctx.SelfID, TAppData::TimeProvider->Now());
@@ -299,15 +308,15 @@ namespace NPQ {
                 if (L1Strategy)
                     L1Strategy->SaveHeadBlob(blob);
 
-                reqData.StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached);
+                reqData.StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, blob.Count, blob.InternalPartsCount, cached);
 
                 LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Caching head blob in L1. Partition "
                     << blob.Partition << " offset " << blob.Offset << " count " << blob.Count
                     << " size " << reqBlob.Value.size() << " actorID " << ctx.SelfID);
             }
         }
 
-        TBlobId MakeBlobId(const TString& s)
+        static TBlobId MakeBlobId(const TString& s)
         {
             if (s.length() == TKeyPrefix::MarkPosition()) {
                 TPartitionId partitionId;
@@ -327,8 +336,8 @@ namespace NPQ {
                 TBlobId newBlob = MakeBlobId(newKey);
                 if (RenameExists(ctx, oldBlob, newBlob)) {
                     reqData.RenamedBlobs.emplace_back(std::piecewise_construct,
-                                                      std::make_tuple(oldBlob.Partition, oldBlob.Offset, oldBlob.PartNo, nullptr),
-                                                      std::make_tuple(newBlob.Partition, newBlob.Offset, newBlob.PartNo, nullptr));
+                                                      std::make_tuple(oldBlob.Partition, oldBlob.Offset, oldBlob.PartNo, oldBlob.Count, oldBlob.InternalPartsCount, nullptr),
+                                                      std::make_tuple(newBlob.Partition, newBlob.Offset, newBlob.PartNo, newBlob.Count, newBlob.InternalPartsCount, nullptr));
 
                     LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Renaming head blob in L1. Old partition "
                                 << oldBlob.Partition << " old offset " << oldBlob.Offset << " old count " << oldBlob.Count
@@ -348,7 +357,7 @@ namespace NPQ {
                 for (auto i = lowerBound; i != upperBound; ++i) {
                     const auto& [blob, value] = *i;
 
-                    reqData.RemovedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, nullptr);
+                    reqData.RemovedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, nullptr);
                     Counters.Dec(value);
 
                     LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Deleting head blob in L1. Partition "
@@ -372,7 +381,7 @@ namespace NPQ {
                     continue;
 
                 const TRequestedBlob& reqBlob = kvReq.Blobs[i];
-                TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
+                TBlobId blob = NPQ::MakeBlobId(kvReq.Partition, reqBlob);
                 {
                     TValueL1 value;
                     if (CheckExists(ctx, blob, value)) {
@@ -386,7 +395,7 @@ namespace NPQ {
                 Cache[blob] = valL1; // weak
                 Counters.Inc(valL1);
 
-                reqData->StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached);
+                reqData->StoredBlobs.emplace_back(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount, cached);
 
                 LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Prefetched blob in L1. Partition "
                     << blob.Partition << " offset " << blob.Offset << " count " << blob.Count
@@ -441,7 +450,7 @@ namespace NPQ {
         void PrepareTouch(const TActorContext& ctx, THolder<TCacheL2Request>& reqData, const TDeque<TBlobId>& used)
         {
             for (auto& blob : used) {
-                reqData->ExpectedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, nullptr);
+                reqData->ExpectedBlobs.emplace_back(blob.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, nullptr);
 
                 LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Touching blob. Partition "
                     << blob.Partition << " offset " << blob.Offset << " count " << blob.Count);
@@ -484,7 +493,7 @@ namespace NPQ {
                     ++numCached;
                     continue;
                 }
-                TBlobId blobId(kvReq.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount);
+                TBlobId blobId = NPQ::MakeBlobId(kvReq.Partition, blob);
                 TCacheValue::TPtr cached = GetValue(ctx, blobId);
                 if (cached) {
                     ++numCached;
@@ -566,5 +575,4 @@ namespace NPQ {
         }
     };
 
-} //NPQ
-} //NKikimr
+} // NKikimr::NPQ
diff --git a/ydb/core/persqueue/pq_l2_cache.cpp b/ydb/core/persqueue/pq_l2_cache.cpp
@@ -58,7 +58,7 @@ void TPersQueueCacheL2::SendResponses(const TActorContext& ctx, const THashMap<T
         }
 
         Y_ABORT_UNLESS(key.TabletId == resp->TabletId, "PQ L2. Multiple topics in one PQ tablet.");
-        resp->Removed.emplace_back(key.Partition, key.Offset, key.PartNo, evicted);
+        resp->Removed.emplace_back(key.Partition, key.Offset, key.PartNo, key.Count, key.InternalPartsCount, evicted);
 
         RetentionTime = now - evicted->GetAccessTime();
         if (RetentionTime < KeepTime)
@@ -91,8 +91,7 @@ void TPersQueueCacheL2::AddBlobs(const TActorContext& ctx, ui64 tabletId, const
         TKey key(tabletId, blob);
         // PQ tablet could send some data twice (if it's restored after die)
         if (Cache.FindWithoutPromote(key) != Cache.End()) {
-            LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Same blob insertion. Tablet '" << tabletId
-                << "' partition " << key.Partition << " offset " << key.Offset << " size " << blob.Value->DataSize());
+            LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Same blob insertion. " << key.ToString() << " size " << blob.Value->DataSize());
             continue;
         }
 
@@ -108,19 +107,17 @@ void TPersQueueCacheL2::AddBlobs(const TActorContext& ctx, ui64 tabletId, const
                 tabletId, Cache.Size(), CurrentSize, MaxSize, blob.Value->DataSize(), blobs.size(), outEvicted.size());
 
             TCacheValue::TPtr value = oldest.Value();
-            outEvicted.insert({oldest.Key(), value});
+            outEvicted.emplace(oldest.Key(), value);
             if (value->GetAccessCount() == 0)
                 ++numUnused;
 
-            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Evicting blob. Tablet '" << tabletId
-                << "' partition " << oldest.Key().Partition << " offset " << oldest.Key().Offset << " size " << value->DataSize());
+            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Evicting blob. " << oldest.Key().ToString() << " size " << value->DataSize());
 
             CurrentSize -= value->DataSize();
             Cache.Erase(oldest);
         }
 
-        LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Adding blob. Tablet '" << tabletId
-            << "' partition " << blob.Partition << " offset " << blob.Offset << " size " << blob.Value->DataSize());
+        LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Adding blob. " << key.ToString() << " size " << blob.Value->DataSize());
 
         Cache.Insert(key, blob.Value);
     }
@@ -147,11 +144,9 @@ void TPersQueueCacheL2::RemoveBlobs(const TActorContext& ctx, ui64 tabletId, con
             if ((*it)->GetAccessCount() == 0)
                 ++numUnused;
             Cache.Erase(it);
-            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Removed. Tablet '" << tabletId
-                << "' partition " << blob.Partition << " offset " << blob.Offset);
+            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Removed. " << key.ToString());
         } else {
-            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in remove. Tablet '" << tabletId
-                << "' partition " << blob.Partition << " offset " << blob.Offset);
+            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in remove. " << key.ToString());
         }
     }
 
@@ -171,6 +166,7 @@ void TPersQueueCacheL2::RenameBlobs(const TActorContext& ctx, ui64 tabletId,
 
     for (const auto& [oldBlob, newBlob] : blobs) {
         TKey oldKey(tabletId, oldBlob);
+
         auto it = Cache.FindWithoutPromote(oldKey);
         if (it == Cache.End()) {
             continue;
@@ -180,9 +176,7 @@ void TPersQueueCacheL2::RenameBlobs(const TActorContext& ctx, ui64 tabletId,
         Cache.Insert(newKey, *it);
         Cache.Erase(it);
 
-        LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Renamed. Tablet '" << tabletId
-                    << "' old partition " << oldBlob.Partition << " old offset " << oldBlob.Offset
-                    << " new partition " << newBlob.Partition << " new offset " << newBlob.Offset);
+        LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Renamed. old " << oldKey.ToString() << ", new " << newKey.ToString());
     }
 }
 
@@ -195,11 +189,9 @@ void TPersQueueCacheL2::TouchBlobs(const TActorContext& ctx, ui64 tabletId, cons
         auto it = Cache.Find(key);
         if (it != Cache.End()) {
             (*it)->Touch(now);
-            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Touched. Tablet '" << tabletId
-                << "' partition " << blob.Partition << " offset " << blob.Offset);
+            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Touched. " << key.ToString());
         } else {
-            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in touch. Tablet '" << tabletId
-                << "' partition " << blob.Partition << " offset " << blob.Offset);
+            LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "PQ Cache (L2). Miss in touch. " << key.ToString());
         }
     }
 
diff --git a/ydb/core/persqueue/pq_l2_cache.h b/ydb/core/persqueue/pq_l2_cache.h
@@ -45,28 +45,47 @@ class TPersQueueCacheL2 : public TActorBootstrapped<TPersQueueCacheL2> {
         TPartitionId Partition;
         ui64 Offset;
         ui16 PartNo;
+        ui32 Count;
+        ui16 InternalPartsCount;
 
         TKey(ui64 tabletId, const TCacheBlobL2& blob)
             : TabletId(tabletId)
             , Partition(blob.Partition)
             , Offset(blob.Offset)
             , PartNo(blob.PartNo)
+            , Count(blob.Count)
+            , InternalPartsCount(blob.InternalPartsCount)
         {
             KeyHash = Hash128to32(TabletId, (static_cast<ui64>(Partition.InternalPartitionId) << 17) + PartNo + (Partition.IsSupportivePartition() ? 0 : (1 << 16)));
             KeyHash = Hash128to32(KeyHash, Offset);
+            KeyHash = Hash128to32(KeyHash, Count);
+            KeyHash = Hash128to32(KeyHash, InternalPartsCount);
         }
 
-        bool operator == (const TKey& key) const {
+        bool operator ==(const TKey& key) const {
             return TabletId == key.TabletId &&
                 Partition == key.Partition &&
                 Offset == key.Offset &&
-                PartNo == key.PartNo;
+                PartNo == key.PartNo &&
+                Count == key.Count &&
+                InternalPartsCount == key.InternalPartsCount;
         }
 
         ui64 Hash() const noexcept {
             return KeyHash;
         }
 
+        TString ToString() const {
+            TString s;
+            s += "Tablet '"; s += ::ToString(TabletId); s += "'";
+            s += " partition "; s += Partition.ToString();
+            s += " offset "; s += ::ToString(Offset);
+            s += " partno "; s += ::ToString(PartNo);
+            s += " count "; s += ::ToString(Count);
+            s += " parts "; s += ::ToString(InternalPartsCount);
+            return s;
+        }
+
     private:
         ui64 KeyHash;
     };
diff --git a/ydb/core/persqueue/pq_l2_service.h b/ydb/core/persqueue/pq_l2_service.h
@@ -72,6 +72,8 @@ struct TCacheBlobL2 {
     TPartitionId Partition;
     ui64 Offset;
     ui16 PartNo;
+    ui32 Count;
+    ui16 InternalPartsCount;
     TCacheValue::TPtr Value;
 };
 
diff --git a/ydb/core/persqueue/read.h b/ydb/core/persqueue/read.h
@@ -56,7 +56,7 @@ namespace NPQ {
         bool CheckInProgress(const TActorContext& ctx, TKvRequest& kvRequest)
         {
             for (const TRequestedBlob& reqBlob : kvRequest.Blobs) {
-                TBlobId blob(kvRequest.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
+                TBlobId blob = MakeBlobId(kvRequest.Partition, reqBlob);
                 auto it = ReadsInProgress.find(blob);
                 if (it != ReadsInProgress.end()) {
                     LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Read request is blocked. Partition "
@@ -73,7 +73,7 @@ namespace NPQ {
         {
             TVector<TKvRequest> unblocked;
             for (const TRequestedBlob& reqBlob : blocker.Blobs) {
-                TBlobId blob(blocker.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount);
+                TBlobId blob = MakeBlobId(blocker.Partition, reqBlob);
                 ReadsInProgress.erase(blob);
 
                 auto it = BlockedReads.find(blob);
@@ -352,8 +352,8 @@ namespace NPQ {
             THolder<TCacheL2Response> resp(ev->Get()->Data.Release());
             Y_ABORT_UNLESS(resp->TabletId == TabletId);
 
-            for (TCacheBlobL2& blob : resp->Removed)
-                Cache.RemoveEvictedBlob(ctx, TBlobId(blob.Partition, blob.Offset, blob.PartNo, 0, 0), blob.Value);
+            for (const TCacheBlobL2& blob : resp->Removed)
+                Cache.RemoveEvictedBlob(ctx, TBlobId(blob.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount), blob.Value);
 
             if (resp->Overload) {
                 LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE,
diff --git a/ydb/services/persqueue_v1/persqueue_ut.cpp b/ydb/services/persqueue_v1/persqueue_ut.cpp
@@ -2825,9 +2825,9 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) {
         Cerr << ">>>>> 2" << Endl << Flush;
         auto info16 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 16, 16, "user"}, 16);
 
-        UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromCache, 3);
-        UNIT_ASSERT_VALUES_EQUAL(info16.BlobsFromCache, 2);
-        UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromDisk + info16.BlobsFromDisk, 0);
+        UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromCache, 2);
+        UNIT_ASSERT_VALUES_EQUAL(info16.BlobsFromCache, 1);
+        UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromDisk + info16.BlobsFromDisk, 2);
 
         for (ui32 i = 0; i < 8; ++i)
             server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", 32+i}, value);