Skip to content

Commit ffa5093

Browse files
Snapshot livetime control (#12301)
1 parent 7d9ab3e commit ffa5093

File tree

10 files changed

+84
-56
lines changed

10 files changed

+84
-56
lines changed

ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) {
100100
Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable);
101101
Controller->SetExpectedShardsCount(ShardsCount);
102102
Controller->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1));
103-
Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1));
103+
Controller->SetOverrideMaxReadStaleness(TDuration::Seconds(1));
104104

105105
Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").Initialize();
106106

@@ -117,7 +117,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) {
117117
}
118118

119119
void WaitNormalization() {
120-
Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1));
120+
Controller->SetOverrideMaxReadStaleness(TDuration::Seconds(1));
121121
Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Force);
122122
const auto start = TInstant::Now();
123123
while (!Controller->IsTrivialLinks() && TInstant::Now() - start < TDuration::Seconds(30)) {
@@ -126,11 +126,11 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) {
126126
}
127127
AFL_VERIFY(Controller->IsTrivialLinks());
128128
Controller->CheckInvariants();
129-
Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5));
129+
Controller->SetOverrideMaxReadStaleness(TDuration::Minutes(5));
130130
}
131131

132132
void Execute(const ui64 destinationIdx, const std::vector<ui64>& sourceIdxs, const bool move, const NOlap::TSnapshot& snapshot, const std::set<ui64>& pathIdxs) {
133-
Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1));
133+
Controller->SetOverrideMaxReadStaleness(TDuration::Seconds(1));
134134
AFL_VERIFY(destinationIdx < ShardIds.size());
135135
const ui64 destination = ShardIds[destinationIdx];
136136
std::vector<ui64> sources;
@@ -198,7 +198,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) {
198198
CSTransferStatus->Reset();
199199
AFL_VERIFY(!Controller->IsTrivialLinks());
200200
Controller->CheckInvariants();
201-
Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5));
201+
Controller->SetOverrideMaxReadStaleness(TDuration::Minutes(5));
202202
}
203203
};
204204
Y_UNIT_TEST(BlobsSharingSplit1_1) {

ydb/core/kqp/ut/olap/write_ut.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) {
251251
.ExtractValueSync();
252252
UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString());
253253
}
254-
csController->SetOverrideReadTimeoutClean(TDuration::Zero());
254+
csController->SetOverrideMaxReadStaleness(TDuration::Zero());
255255
csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC);
256256
{
257257
const TInstant start = TInstant::Now();

ydb/core/tx/columnshard/columnshard.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,16 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon
200200
}
201201

202202
void TColumnShard::Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& /*ev*/, const TActorContext& ctx) {
203-
if (auto writeTx =
204-
InFlightReadsTracker.Ping(this, NYDBTest::TControllers::GetColumnShardController()->GetPingCheckPeriod(), TInstant::Now())) {
203+
const TDuration stalenessLivetime = NYDBTest::TControllers::GetColumnShardController()->GetMaxReadStaleness();
204+
const TDuration stalenessInMem = NYDBTest::TControllers::GetColumnShardController()->GetMaxReadStalenessInMem();
205+
const TDuration usedLivetime = NYDBTest::TControllers::GetColumnShardController()->GetUsedSnapshotLivetime();
206+
AFL_VERIFY(usedLivetime < stalenessInMem || (stalenessInMem == usedLivetime && usedLivetime == TDuration::Zero()))("used", usedLivetime)(
207+
"staleness", stalenessInMem);
208+
const TDuration ping = 0.3 * std::min(stalenessInMem - usedLivetime, stalenessLivetime - stalenessInMem);
209+
if (auto writeTx = InFlightReadsTracker.Ping(this, stalenessInMem, usedLivetime, TInstant::Now())) {
205210
Execute(writeTx.release(), ctx);
206211
}
207-
ctx.Schedule(0.3 * GetMaxReadStaleness(), new TEvPrivate::TEvPingSnapshotsUsage());
212+
ctx.Schedule(NYDBTest::TControllers::GetColumnShardController()->GetStalenessLivetimePing(ping), new TEvPrivate::TEvPingSnapshotsUsage());
208213
}
209214

210215
void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx) {

ydb/core/tx/columnshard/columnshard_impl.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ ui64 TColumnShard::GetOutdatedStep() const {
194194
}
195195

196196
NOlap::TSnapshot TColumnShard::GetMinReadSnapshot() const {
197-
ui64 delayMillisec = GetMaxReadStaleness().MilliSeconds();
197+
ui64 delayMillisec = NYDBTest::TControllers::GetColumnShardController()->GetMaxReadStaleness().MilliSeconds();
198198
ui64 passedStep = GetOutdatedStep();
199199
ui64 minReadStep = (passedStep > delayMillisec ? passedStep - delayMillisec : 0);
200200

@@ -1600,8 +1600,4 @@ const NKikimr::NColumnShard::NTiers::TManager* TColumnShard::GetTierManagerPoint
16001600
return Tiers->GetManagerOptional(tierId);
16011601
}
16021602

1603-
TDuration TColumnShard::GetMaxReadStaleness() {
1604-
return NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean();
1605-
}
1606-
16071603
} // namespace NKikimr::NColumnShard

ydb/core/tx/columnshard/columnshard_impl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,6 @@ class TColumnShard: public TActor<TColumnShard>, public NTabletFlatExecutor::TTa
536536
TLimits Limits;
537537
NOlap::TNormalizationController NormalizerController;
538538
NDataShard::TSysLocks SysLocks;
539-
static TDuration GetMaxReadStaleness();
540539

541540
void TryRegisterMediatorTimeCast();
542541
void UnregisterMediatorTimeCast();

ydb/core/tx/columnshard/hooks/abstract/abstract.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ class ICSController {
6161
};
6262

6363
protected:
64+
virtual std::optional<TDuration> DoGetStalenessLivetimePing() const {
65+
return {};
66+
}
6467
virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& /*shard*/) {
6568
return;
6669
}
@@ -85,7 +88,7 @@ class ICSController {
8588
virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString& /*sessionId*/) {
8689
}
8790

88-
virtual TDuration DoGetPingCheckPeriod(const TDuration defaultValue) const {
91+
virtual TDuration DoGetUsedSnapshotLivetime(const TDuration defaultValue) const {
8992
return defaultValue;
9093
}
9194
virtual TDuration DoGetOverridenGCPeriod(const TDuration defaultValue) const {
@@ -109,7 +112,7 @@ class ICSController {
109112
virtual ui64 DoGetSmallPortionSizeDetector(const ui64 defaultValue) const {
110113
return defaultValue;
111114
}
112-
virtual TDuration DoGetReadTimeoutClean(const TDuration defaultValue) const {
115+
virtual TDuration DoGetMaxReadStaleness(const TDuration defaultValue) const {
113116
return defaultValue;
114117
}
115118
virtual TDuration DoGetGuaranteeIndexationInterval(const TDuration defaultValue) const {
@@ -158,9 +161,13 @@ class ICSController {
158161
}
159162
virtual bool CheckPortionForEvict(const NOlap::TPortionInfo& portion) const;
160163

161-
TDuration GetPingCheckPeriod() const {
162-
const TDuration defaultValue = 0.6 * GetReadTimeoutClean();
163-
return DoGetPingCheckPeriod(defaultValue);
164+
TDuration GetStalenessLivetimePing(const TDuration defValue) const {
165+
const auto val = DoGetStalenessLivetimePing();
166+
if (!val || defValue < *val) {
167+
return defValue;
168+
} else {
169+
return *val;
170+
}
164171
}
165172

166173
virtual bool IsBackgroundEnabled(const EBackground /*id*/) const {
@@ -261,9 +268,16 @@ class ICSController {
261268
}
262269
virtual void OnIndexSelectProcessed(const std::optional<bool> /*result*/) {
263270
}
264-
TDuration GetReadTimeoutClean() const {
271+
TDuration GetMaxReadStaleness() const {
265272
const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetMaxReadStaleness_ms());
266-
return DoGetReadTimeoutClean(defaultValue);
273+
return DoGetMaxReadStaleness(defaultValue);
274+
}
275+
TDuration GetMaxReadStalenessInMem() const {
276+
return 0.9 * GetMaxReadStaleness();
277+
}
278+
TDuration GetUsedSnapshotLivetime() const {
279+
const TDuration defaultValue = 0.6 * GetMaxReadStaleness();
280+
return DoGetUsedSnapshotLivetime(defaultValue);
267281
}
268282
virtual EOptimizerCompactionWeightControl GetCompactionControl() const {
269283
return EOptimizerCompactionWeightControl::Force;

ydb/core/tx/columnshard/hooks/testing/controller.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ namespace NKikimr::NYDBTest::NColumnShard {
1212
class TController: public TReadOnlyController {
1313
private:
1414
using TBase = TReadOnlyController;
15-
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideRequestsTracePingCheckPeriod);
15+
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideUsedSnapshotLivetime);
16+
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideStalenessLivetimePing);
1617
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideLagForCompactionBeforeTierings);
1718
YDB_ACCESSOR(std::optional<TDuration>, OverrideGuaranteeIndexationInterval, TDuration::Zero());
1819
YDB_ACCESSOR(std::optional<TDuration>, OverridePeriodicWakeupActivationPeriod, std::nullopt);
@@ -21,7 +22,7 @@ class TController: public TReadOnlyController {
2122
YDB_ACCESSOR(std::optional<TDuration>, OverrideOptimizerFreshnessCheckDuration, TDuration::Zero());
2223
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideCompactionActualizationLag);
2324
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideTasksActualizationLag);
24-
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideReadTimeoutClean);
25+
YDB_ACCESSOR_DEF(std::optional<TDuration>, OverrideMaxReadStaleness);
2526
YDB_ACCESSOR(std::optional<ui64>, OverrideMemoryLimitForPortionReading, 100);
2627
YDB_ACCESSOR_DEF(std::optional<NKikimrProto::EReplyStatus>, OverrideBlobPutResultOnWriteValue);
2728

@@ -142,10 +143,12 @@ class TController: public TReadOnlyController {
142143
return OverrideLagForCompactionBeforeTierings.value_or(def);
143144
}
144145

145-
virtual TDuration DoGetPingCheckPeriod(const TDuration def) const override {
146-
return OverrideRequestsTracePingCheckPeriod.value_or(def);
146+
virtual TDuration DoGetUsedSnapshotLivetime(const TDuration def) const override {
147+
return OverrideUsedSnapshotLivetime.value_or(def);
148+
}
149+
virtual std::optional<TDuration> DoGetStalenessLivetimePing() const override {
150+
return OverrideStalenessLivetimePing;
147151
}
148-
149152
virtual TDuration DoGetCompactionActualizationLag(const TDuration def) const override {
150153
return OverrideCompactionActualizationLag.value_or(def);
151154
}
@@ -180,8 +183,8 @@ class TController: public TReadOnlyController {
180183
virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override {
181184
return OverrideOptimizerFreshnessCheckDuration.value_or(defaultValue);
182185
}
183-
virtual TDuration DoGetReadTimeoutClean(const TDuration def) const override {
184-
return OverrideReadTimeoutClean.value_or(def);
186+
virtual TDuration DoGetMaxReadStaleness(const TDuration def) const override {
187+
return OverrideMaxReadStaleness.value_or(def);
185188
}
186189
virtual ui64 DoGetReduceMemoryIntervalLimit(const ui64 def) const override {
187190
return OverrideReduceMemoryIntervalLimit.value_or(def);

ydb/core/tx/columnshard/inflight_request_tracker.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ NOlap::NReader::TReadMetadataBase::TConstPtr TInFlightReadsTracker::ExtractInFli
1919
{
2020
auto it = SnapshotsLive.find(readMetaBase->GetRequestSnapshot());
2121
AFL_VERIFY(it != SnapshotsLive.end());
22-
if (it->second.DelRequest(cookie, now)) {
23-
SnapshotsLive.erase(it);
24-
}
22+
Y_UNUSED(it->second.DelRequest(cookie, now));
2523
}
2624

2725
if (NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta =
@@ -93,27 +91,29 @@ class TTransactionSavePersistentSnapshots: public NOlap::NDataSharing::TExtended
9391
} // namespace
9492

9593
std::unique_ptr<NTabletFlatExecutor::ITransaction> TInFlightReadsTracker::Ping(
96-
TColumnShard* self, const TDuration critDuration, const TInstant now) {
94+
TColumnShard* self, const TDuration stalenessInMem, const TDuration usedSnapshotLivetime, const TInstant now) {
9795
std::set<NOlap::TSnapshot> snapshotsToSave;
98-
std::set<NOlap::TSnapshot> snapshotsToFree;
96+
std::set<NOlap::TSnapshot> snapshotsToFreeInDB;
97+
std::set<NOlap::TSnapshot> snapshotsToFreeInMem;
9998
for (auto&& i : SnapshotsLive) {
100-
if (i.second.Ping(critDuration, now)) {
99+
if (i.second.IsExpired(usedSnapshotLivetime, now)) {
101100
if (i.second.GetIsLock()) {
102-
Counters->OnSnapshotLocked();
103-
snapshotsToSave.emplace(i.first);
104-
} else {
105101
Counters->OnSnapshotUnlocked();
106-
snapshotsToFree.emplace(i.first);
102+
snapshotsToFreeInDB.emplace(i.first);
107103
}
104+
snapshotsToFreeInMem.emplace(i.first);
105+
} else if (i.second.CheckToLock(stalenessInMem, usedSnapshotLivetime, now)) {
106+
Counters->OnSnapshotLocked();
107+
snapshotsToSave.emplace(i.first);
108108
}
109109
}
110-
for (auto&& i : snapshotsToFree) {
110+
for (auto&& i : snapshotsToFreeInMem) {
111111
SnapshotsLive.erase(i);
112112
}
113113
Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean());
114-
if (snapshotsToFree.size() || snapshotsToSave.size()) {
115-
NYDBTest::TControllers::GetColumnShardController()->OnRequestTracingChanges(snapshotsToSave, snapshotsToFree);
116-
return std::make_unique<TTransactionSavePersistentSnapshots>(self, std::move(snapshotsToSave), std::move(snapshotsToFree));
114+
if (snapshotsToFreeInDB.size() || snapshotsToSave.size()) {
115+
NYDBTest::TControllers::GetColumnShardController()->OnRequestTracingChanges(snapshotsToSave, snapshotsToFreeInMem);
116+
return std::make_unique<TTransactionSavePersistentSnapshots>(self, std::move(snapshotsToSave), std::move(snapshotsToFreeInDB));
117117
} else {
118118
return nullptr;
119119
}

ydb/core/tx/columnshard/inflight_request_tracker.h

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ using NOlap::IBlobInUseTracker;
1717
class TSnapshotLiveInfo {
1818
private:
1919
const NOlap::TSnapshot Snapshot;
20-
std::optional<TInstant> LastPingInstant;
2120
std::optional<TInstant> LastRequestFinishedInstant;
2221
THashSet<ui32> Requests;
2322
YDB_READONLY(bool, IsLock, false);
@@ -48,22 +47,32 @@ class TSnapshotLiveInfo {
4847

4948
static TSnapshotLiveInfo BuildFromDatabase(const NOlap::TSnapshot& reqSnapshot) {
5049
TSnapshotLiveInfo result(reqSnapshot);
51-
result.LastPingInstant = TInstant::Now();
52-
result.LastRequestFinishedInstant = result.LastPingInstant;
50+
result.LastRequestFinishedInstant = TInstant::Now();
5351
result.IsLock = true;
5452
return result;
5553
}
5654

57-
bool Ping(const TDuration critDuration, const TInstant now) {
58-
LastPingInstant = now;
59-
if (Requests.empty()) {
60-
AFL_VERIFY(LastRequestFinishedInstant);
61-
if (critDuration < *LastPingInstant - *LastRequestFinishedInstant && IsLock) {
62-
IsLock = false;
55+
bool IsExpired(const TDuration critDuration, const TInstant now) const {
56+
if (Requests.size()) {
57+
return false;
58+
}
59+
AFL_VERIFY(LastRequestFinishedInstant);
60+
return critDuration < now - *LastRequestFinishedInstant;
61+
}
62+
63+
bool CheckToLock(const TDuration snapshotLivetime, const TDuration usedSnapshotGuaranteeLivetime, const TInstant now) {
64+
if (IsLock) {
65+
return false;
66+
}
67+
68+
if (Requests.size()) {
69+
if (now + usedSnapshotGuaranteeLivetime > Snapshot.GetPlanInstant() + snapshotLivetime) {
70+
IsLock = true;
6371
return true;
6472
}
6573
} else {
66-
if (critDuration < *LastPingInstant - Snapshot.GetPlanInstant() && !IsLock) {
74+
AFL_VERIFY(LastRequestFinishedInstant);
75+
if (*LastRequestFinishedInstant + usedSnapshotGuaranteeLivetime > Snapshot.GetPlanInstant() + snapshotLivetime) {
6776
IsLock = true;
6877
return true;
6978
}
@@ -88,7 +97,8 @@ class TInFlightReadsTracker {
8897

8998
bool LoadFromDatabase(NTable::TDatabase& db);
9099

91-
[[nodiscard]] std::unique_ptr<NTabletFlatExecutor::ITransaction> Ping(TColumnShard* self, const TDuration critDuration, const TInstant now);
100+
[[nodiscard]] std::unique_ptr<NTabletFlatExecutor::ITransaction> Ping(
101+
TColumnShard* self, const TDuration stalenessInMem, const TDuration usedSnapshotLivetime, const TInstant now);
92102

93103
// Returns a unique cookie associated with this request
94104
[[nodiscard]] ui64 AddInFlightRequest(

ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ void TestWriteReadDup(const TestTableDescription& table = {}) {
552552
void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString codec = "") {
553553
auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard<TDefaultTestsController>();
554554
csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction);
555-
csControllerGuard->SetOverrideReadTimeoutClean(TDuration::Max());
555+
csControllerGuard->SetOverrideMaxReadStaleness(TDuration::Max());
556556
TTestBasicRuntime runtime;
557557
TTester::Setup(runtime);
558558

@@ -2654,7 +2654,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) {
26542654
PlanCommit(runtime, sender, planStep, txId);
26552655
}
26562656
UNIT_ASSERT_EQUAL(cleanupsHappened, 0);
2657-
csDefaultControllerGuard->SetOverrideRequestsTracePingCheckPeriod(TDuration::Zero());
2657+
csDefaultControllerGuard->SetOverrideStalenessLivetimePing(TDuration::Zero());
2658+
csDefaultControllerGuard->SetOverrideUsedSnapshotLivetime(TDuration::Zero());
26582659
{
26592660
auto read = std::make_unique<NColumnShard::TEvPrivate::TEvPingSnapshotsUsage>();
26602661
ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release());

0 commit comments

Comments
 (0)