Skip to content

Commit 2416150

Browse files
authored
Shared Cache S3FIFO implementation (#8692)
1 parent b52420c commit 2416150

11 files changed

+1174
-126
lines changed

ydb/core/driver_lib/run/kikimr_services_initializers.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,10 +1093,7 @@ void TSharedCacheInitializer::InitializeServices(
10931093
config->TotalAsyncQueueInFlyLimit = cfg.GetAsyncQueueInFlyLimit();
10941094
config->TotalScanQueueInFlyLimit = cfg.GetScanQueueInFlyLimit();
10951095
config->ReplacementPolicy = cfg.GetReplacementPolicy();
1096-
1097-
if (cfg.HasActivePagesReservationPercent()) {
1098-
config->ActivePagesReservationPercent = cfg.GetActivePagesReservationPercent();
1099-
}
1096+
config->ActivePagesReservationPercent = cfg.GetActivePagesReservationPercent();
11001097

11011098
TIntrusivePtr<::NMonitoring::TDynamicCounters> tabletGroup = GetServiceCounters(appData->Counters, "tablets");
11021099
TIntrusivePtr<::NMonitoring::TDynamicCounters> sausageGroup = tabletGroup->GetSubgroup("type", "S_CACHE");

ydb/core/protos/shared_cache.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ option java_package = "ru.yandex.kikimr.proto";
33

44
enum TReplacementPolicy {
55
ThreeLeveledLRU = 0;
6+
S3FIFO = 1;
67
}
78

89
message TSharedCacheConfig {

ydb/core/tablet_flat/shared_cache_events.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "defs.h"
44
#include "flat_bio_events.h"
55
#include "shared_handle.h"
6+
#include <ydb/core/protos/shared_cache.pb.h>
67

78
#include <util/generic/map.h>
89
#include <util/generic/set.h>
@@ -24,6 +25,7 @@ namespace NSharedCache {
2425
EvRequest,
2526
EvResult,
2627
EvUpdated,
28+
EvReplacementPolicySwitch,
2729

2830
EvEnd
2931

@@ -127,6 +129,16 @@ namespace NSharedCache {
127129

128130
THashMap<TLogoBlobID, TActions> Actions;
129131
};
132+
133+
struct TEvReplacementPolicySwitch : public TEventLocal<TEvReplacementPolicySwitch, EvReplacementPolicySwitch> {
134+
using TReplacementPolicy = NKikimrSharedCache::TReplacementPolicy;
135+
136+
TReplacementPolicy ReplacementPolicy;
137+
138+
TEvReplacementPolicySwitch(TReplacementPolicy replacementPolicy)
139+
: ReplacementPolicy(replacementPolicy)
140+
{}
141+
};
130142
}
131143
}
132144

Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,346 @@
1+
#pragma once
2+
#include "defs.h"
3+
#include <ydb/core/util/cache_cache_iface.h>
4+
#include <ydb/library/yverify_stream/yverify_stream.h>
5+
#include <library/cpp/monlib/counters/counters.h>
6+
#include <library/cpp/monlib/dynamic_counters/counters.h>
7+
8+
namespace NKikimr::NCache {
9+
10+
// TODO: remove template args and make some page base class
11+
12+
enum class ES3FIFOPageLocation {
13+
None,
14+
SmallQueue,
15+
MainQueue
16+
};
17+
18+
template <typename TPageTraits>
19+
class TS3FIFOGhostPageQueue {
20+
using TPageKey = typename TPageTraits::TPageKey;
21+
22+
struct TGhostPage {
23+
TPageKey Key;
24+
ui64 Size; // zero size is tombstone
25+
26+
TGhostPage(const TPageKey& key, ui64 size)
27+
: Key(key)
28+
, Size(size)
29+
{}
30+
};
31+
32+
struct TGhostPageHash {
33+
using is_transparent = void;
34+
35+
inline size_t operator()(const TGhostPage* ghost) const {
36+
return TPageTraits::GetHash(ghost->Key);
37+
}
38+
39+
inline size_t operator()(const TPageKey& key) const {
40+
return TPageTraits::GetHash(key);
41+
}
42+
};
43+
44+
struct TGhostPageEqual {
45+
using is_transparent = void;
46+
47+
inline bool operator()(const TGhostPage* left, const TGhostPage* right) const {
48+
return TPageTraits::Equals(left->Key, right->Key);
49+
}
50+
51+
inline bool operator()(const TGhostPage* left, const TPageKey& right) const {
52+
return TPageTraits::Equals(left->Key, right);
53+
}
54+
};
55+
56+
public:
57+
TS3FIFOGhostPageQueue(ui64 limit)
58+
: Limit(limit)
59+
{}
60+
61+
void Add(const TPageKey& key, ui64 size) {
62+
if (Y_UNLIKELY(size == 0)) {
63+
Y_DEBUG_ABORT_S("Empty " << TPageTraits::ToString(key) << " page");
64+
return;
65+
}
66+
67+
TGhostPage* ghost = &GhostsQueue.emplace_back(key, size);
68+
if (Y_UNLIKELY(!GhostsSet.emplace(ghost).second)) {
69+
GhostsQueue.pop_back();
70+
Y_DEBUG_ABORT_S("Duplicated " << TPageTraits::ToString(key) << " page");
71+
return;
72+
}
73+
74+
Size += ghost->Size;
75+
76+
EvictWhileFull();
77+
}
78+
79+
bool Erase(const TPageKey& key, ui64 size) {
80+
if (auto it = GhostsSet.find(key); it != GhostsSet.end()) {
81+
TGhostPage* ghost = *it;
82+
Y_DEBUG_ABORT_UNLESS(ghost->Size == size);
83+
Y_ABORT_UNLESS(Size >= ghost->Size);
84+
Size -= ghost->Size;
85+
ghost->Size = 0; // mark as deleted
86+
GhostsSet.erase(it);
87+
return true;
88+
}
89+
return false;
90+
}
91+
92+
void UpdateLimit(ui64 limit) {
93+
Limit = limit;
94+
EvictWhileFull();
95+
}
96+
97+
TString Dump() const {
98+
TStringBuilder result;
99+
size_t count = 0;
100+
ui64 size = 0;
101+
for (auto it = GhostsQueue.begin(); it != GhostsQueue.end(); it++) {
102+
const TGhostPage* ghost = &*it;
103+
if (ghost->Size) { // isn't deleted
104+
Y_DEBUG_ABORT_UNLESS(GhostsSet.contains(ghost));
105+
if (count != 0) result << ", ";
106+
result << "{" << TPageTraits::ToString(ghost->Key) << " " << ghost->Size << "b}";
107+
count++;
108+
size += ghost->Size;
109+
}
110+
}
111+
Y_DEBUG_ABORT_UNLESS(GhostsSet.size() == count);
112+
Y_DEBUG_ABORT_UNLESS(Size == size);
113+
return result;
114+
}
115+
116+
private:
117+
void EvictWhileFull() {
118+
while (!GhostsQueue.empty() && Size > Limit) {
119+
TGhostPage* ghost = &GhostsQueue.front();
120+
if (ghost->Size) { // isn't deleted
121+
Y_ABORT_UNLESS(Size >= ghost->Size);
122+
Size -= ghost->Size;
123+
bool erased = GhostsSet.erase(ghost);
124+
Y_ABORT_UNLESS(erased);
125+
}
126+
GhostsQueue.pop_front();
127+
}
128+
}
129+
130+
ui64 Limit;
131+
ui64 Size = 0;
132+
// TODO: store ghost withing PageMap
133+
THashSet<TGhostPage*, TGhostPageHash, TGhostPageEqual> GhostsSet;
134+
TDeque<TGhostPage> GhostsQueue;
135+
};
136+
137+
template <typename TPage, typename TPageTraits>
138+
class TS3FIFOCache : public ICacheCache<TPage> {
139+
using TPageKey = typename TPageTraits::TPageKey;
140+
141+
struct TLimit {
142+
ui64 SmallQueueLimit;
143+
ui64 MainQueueLimit;
144+
145+
TLimit(ui64 limit)
146+
: SmallQueueLimit(limit / 10)
147+
, MainQueueLimit(limit - SmallQueueLimit)
148+
{}
149+
};
150+
151+
struct TQueue {
152+
TQueue(ES3FIFOPageLocation location)
153+
: Location(location)
154+
{}
155+
156+
ES3FIFOPageLocation Location;
157+
TIntrusiveList<TPage> Queue;
158+
ui64 Size = 0;
159+
};
160+
161+
public:
162+
TS3FIFOCache(ui64 limit)
163+
: Limit(limit)
164+
, SmallQueue(ES3FIFOPageLocation::SmallQueue)
165+
, MainQueue(ES3FIFOPageLocation::MainQueue)
166+
, GhostQueue(limit)
167+
{}
168+
169+
TPage* EvictNext() override {
170+
if (SmallQueue.Queue.Empty() && MainQueue.Queue.Empty()) {
171+
return nullptr;
172+
}
173+
174+
// TODO: account passive pages inside the cache
175+
TLimit savedLimit = std::exchange(Limit, TLimit(SmallQueue.Size + MainQueue.Size - 1));
176+
177+
TPage* evictedPage = EvictOneIfFull();
178+
Y_DEBUG_ABORT_UNLESS(evictedPage);
179+
180+
Limit = savedLimit;
181+
182+
return evictedPage;
183+
}
184+
185+
TIntrusiveList<TPage> Touch(TPage* page) override {
186+
const ES3FIFOPageLocation location = TPageTraits::GetLocation(page);
187+
switch (location) {
188+
case ES3FIFOPageLocation::SmallQueue:
189+
case ES3FIFOPageLocation::MainQueue: {
190+
TouchFast(page);
191+
return {};
192+
}
193+
case ES3FIFOPageLocation::None:
194+
return Insert(page);
195+
default:
196+
Y_ABORT("Unknown page location");
197+
}
198+
}
199+
200+
void Erase(TPage* page) override {
201+
const ES3FIFOPageLocation location = TPageTraits::GetLocation(page);
202+
switch (location) {
203+
case ES3FIFOPageLocation::None:
204+
EraseGhost(page);
205+
break;
206+
case ES3FIFOPageLocation::SmallQueue:
207+
Erase(SmallQueue, page);
208+
break;
209+
case ES3FIFOPageLocation::MainQueue:
210+
Erase(MainQueue, page);
211+
break;
212+
default:
213+
Y_ABORT("Unknown page location");
214+
}
215+
216+
TPageTraits::SetFrequency(page, 0);
217+
}
218+
219+
void UpdateLimit(ui64 limit) override {
220+
Limit = limit;
221+
GhostQueue.UpdateLimit(limit);
222+
}
223+
224+
TString Dump() const {
225+
TStringBuilder result;
226+
227+
auto dump = [&](const TQueue& queue) {
228+
size_t count = 0;
229+
ui64 size = 0;
230+
for (auto it = queue.Queue.begin(); it != queue.Queue.end(); it++) {
231+
const TPage* page = &*it;
232+
if (count != 0) result << ", ";
233+
result << "{" << TPageTraits::GetKeyToString(page) << " " << TPageTraits::GetFrequency(page) << "f " << TPageTraits::GetSize(page) << "b}";
234+
count++;
235+
size += TPageTraits::GetSize(page);
236+
}
237+
Y_DEBUG_ABORT_UNLESS(queue.Size == size);
238+
};
239+
240+
result << "SmallQueue: ";
241+
dump(SmallQueue);
242+
result << Endl << "MainQueue: ";
243+
dump(MainQueue);
244+
result << Endl << "GhostQueue: ";
245+
result << GhostQueue.Dump();
246+
247+
return result;
248+
}
249+
250+
private:
251+
TPage* EvictOneIfFull() {
252+
while (true) {
253+
if (!SmallQueue.Queue.Empty() && SmallQueue.Size > Limit.SmallQueueLimit) {
254+
TPage* page = Pop(SmallQueue);
255+
if (ui32 frequency = TPageTraits::GetFrequency(page); frequency > 1) { // load inserts, first read touches, second read touches
256+
Push(MainQueue, page);
257+
} else {
258+
if (frequency) TPageTraits::SetFrequency(page, 0);
259+
AddGhost(page);
260+
return page;
261+
}
262+
} else if (!MainQueue.Queue.Empty() && MainQueue.Size > Limit.MainQueueLimit) {
263+
TPage* page = Pop(MainQueue);
264+
if (ui32 frequency = TPageTraits::GetFrequency(page); frequency > 0) {
265+
TPageTraits::SetFrequency(page, frequency - 1);
266+
Push(MainQueue, page);
267+
} else {
268+
return page;
269+
}
270+
} else {
271+
break;
272+
}
273+
}
274+
275+
return nullptr;
276+
}
277+
278+
void TouchFast(TPage* page) {
279+
Y_DEBUG_ABORT_UNLESS(TPageTraits::GetLocation(page) != ES3FIFOPageLocation::None);
280+
281+
ui32 frequency = TPageTraits::GetFrequency(page);
282+
if (frequency < 3) {
283+
TPageTraits::SetFrequency(page, frequency + 1);
284+
}
285+
}
286+
287+
TIntrusiveList<TPage> Insert(TPage* page) {
288+
Y_DEBUG_ABORT_UNLESS(TPageTraits::GetLocation(page) == ES3FIFOPageLocation::None);
289+
290+
Push(EraseGhost(page) ? MainQueue : SmallQueue, page);
291+
TPageTraits::SetFrequency(page, 0);
292+
293+
TIntrusiveList<TPage> evictedList;
294+
while (TPage* evictedPage = EvictOneIfFull()) {
295+
evictedList.PushBack(evictedPage);
296+
}
297+
298+
return evictedList;
299+
}
300+
301+
TPage* Pop(TQueue& queue) {
302+
Y_DEBUG_ABORT_UNLESS(!queue.Queue.Empty());
303+
Y_ABORT_UNLESS(TPageTraits::GetLocation(queue.Queue.Front()) == queue.Location);
304+
Y_ABORT_UNLESS(queue.Size >= TPageTraits::GetSize(queue.Queue.Front()));
305+
306+
TPage* page = queue.Queue.PopFront();
307+
queue.Size -= TPageTraits::GetSize(page);
308+
TPageTraits::SetLocation(page, ES3FIFOPageLocation::None);
309+
310+
return page;
311+
}
312+
313+
void Push(TQueue& queue, TPage* page) {
314+
Y_ABORT_UNLESS(TPageTraits::GetLocation(page) == ES3FIFOPageLocation::None);
315+
316+
queue.Queue.PushBack(page);
317+
queue.Size += TPageTraits::GetSize(page);
318+
TPageTraits::SetLocation(page, queue.Location);
319+
}
320+
321+
void Erase(TQueue& queue, TPage* page) {
322+
Y_ABORT_UNLESS(TPageTraits::GetLocation(page) == queue.Location);
323+
Y_ABORT_UNLESS(queue.Size >= TPageTraits::GetSize(page));
324+
325+
page->Unlink();
326+
queue.Size -= TPageTraits::GetSize(page);
327+
TPageTraits::SetLocation(page, ES3FIFOPageLocation::None);
328+
}
329+
330+
void AddGhost(const TPage* page) {
331+
GhostQueue.Add(TPageTraits::GetKey(page), TPageTraits::GetSize(page));
332+
}
333+
334+
bool EraseGhost(const TPage* page) {
335+
return GhostQueue.Erase(TPageTraits::GetKey(page), TPageTraits::GetSize(page));
336+
}
337+
338+
private:
339+
TLimit Limit;
340+
TQueue SmallQueue;
341+
TQueue MainQueue;
342+
TS3FIFOGhostPageQueue<TPageTraits> GhostQueue;
343+
344+
};
345+
346+
}

0 commit comments

Comments
 (0)