Skip to content

Commit e0df5a9

Browse files
snauryblinkov
authored andcommitted
Avoid persistent and in-memory tx status getting out-of-sync (#14781)
1 parent 29a0b4b commit e0df5a9

15 files changed

+668
-213
lines changed

ydb/core/tablet_flat/flat_boot_stages.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,9 @@ namespace NBoot {
194194

195195
const auto was = Back->DatabaseImpl->Rewind(Back->Serial);
196196

197+
// Notify database that all merges have completed
198+
Back->DatabaseImpl->MergeDone();
199+
197200
result.Database = new NTable::TDatabase(Back->DatabaseImpl.Release());
198201

199202
if (auto logl = Env->Logger()->Log(ELnLev::Info)) {

ydb/core/tablet_flat/flat_database.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -572,9 +572,14 @@ TEpoch TDatabase::TxSnapTable(ui32 table)
572572
return DatabaseImpl->FlushTable(table);
573573
}
574574

575-
TAutoPtr<TSubset> TDatabase::Subset(ui32 table, TArrayRef<const TLogoBlobID> bundle, TEpoch before) const
575+
TAutoPtr<TSubset> TDatabase::CompactionSubset(ui32 table, TEpoch before, TArrayRef<const TLogoBlobID> bundle) const
576576
{
577-
return Require(table)->Subset(bundle, before);
577+
return Require(table)->CompactionSubset(before, bundle);
578+
}
579+
580+
TAutoPtr<TSubset> TDatabase::PartSwitchSubset(ui32 table, TEpoch before, TArrayRef<const TLogoBlobID> bundle, TArrayRef<const TLogoBlobID> txStatus) const
581+
{
582+
return Require(table)->PartSwitchSubset(before, bundle, txStatus);
578583
}
579584

580585
TAutoPtr<TSubset> TDatabase::Subset(ui32 table, TEpoch before, TRawVals from, TRawVals to) const
@@ -617,14 +622,13 @@ void TDatabase::ReplaceSlices(ui32 table, TBundleSlicesMap slices)
617622
return DatabaseImpl->ReplaceSlices(table, std::move(slices));
618623
}
619624

620-
void TDatabase::Replace(ui32 table, TArrayRef<const TPartView> partViews, const TSubset &subset)
625+
void TDatabase::Replace(
626+
ui32 table,
627+
const TSubset& subset,
628+
TArrayRef<const TPartView> newParts,
629+
TArrayRef<const TIntrusiveConstPtr<TTxStatusPart>> newTxStatus)
621630
{
622-
return DatabaseImpl->Replace(table, partViews, subset);
623-
}
624-
625-
void TDatabase::ReplaceTxStatus(ui32 table, TArrayRef<const TIntrusiveConstPtr<TTxStatusPart>> txStatus, const TSubset &subset)
626-
{
627-
return DatabaseImpl->ReplaceTxStatus(table, txStatus, subset);
631+
return DatabaseImpl->Replace(table, subset, newParts, newTxStatus);
628632
}
629633

630634
void TDatabase::Merge(ui32 table, TPartView partView)
@@ -642,6 +646,11 @@ void TDatabase::Merge(ui32 table, TIntrusiveConstPtr<TTxStatusPart> txStatus)
642646
return DatabaseImpl->Merge(table, std::move(txStatus));
643647
}
644648

649+
void TDatabase::MergeDone(ui32 table)
650+
{
651+
return DatabaseImpl->MergeDone(table);
652+
}
653+
645654
TAlter& TDatabase::Alter()
646655
{
647656
Y_ABORT_UNLESS(Redo, "Scheme change must be done within a transaction");

ydb/core/tablet_flat/flat_database.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ class TDatabase {
226226
void UpdateApproximateFreeSharesByChannel(const THashMap<ui32, float>& approximateFreeSpaceShareByChannel);
227227
TString SnapshotToLog(ui32 table, TTxStamp);
228228

229-
TAutoPtr<TSubset> Subset(ui32 table, TArrayRef<const TLogoBlobID> bundle, TEpoch before) const;
229+
TAutoPtr<TSubset> CompactionSubset(ui32 table, TEpoch before, TArrayRef<const TLogoBlobID> bundle) const;
230+
TAutoPtr<TSubset> PartSwitchSubset(ui32 table, TEpoch before, TArrayRef<const TLogoBlobID> bundle, TArrayRef<const TLogoBlobID> txStatus) const;
230231
TAutoPtr<TSubset> Subset(ui32 table, TEpoch before, TRawVals from, TRawVals to) const;
231232
TAutoPtr<TSubset> ScanSnapshot(ui32 table, TRowVersion snapshot = TRowVersion::Max());
232233

@@ -235,11 +236,15 @@ class TDatabase {
235236
TBundleSlicesMap LookupSlices(ui32 table, TArrayRef<const TLogoBlobID> bundles) const;
236237
void ReplaceSlices(ui32 table, TBundleSlicesMap slices);
237238

238-
void Replace(ui32 table, TArrayRef<const TPartView>, const TSubset&);
239-
void ReplaceTxStatus(ui32 table, TArrayRef<const TIntrusiveConstPtr<TTxStatusPart>>, const TSubset&);
239+
void Replace(
240+
ui32 table,
241+
const TSubset&,
242+
TArrayRef<const TPartView>,
243+
TArrayRef<const TIntrusiveConstPtr<TTxStatusPart>>);
240244
void Merge(ui32 table, TPartView);
241245
void Merge(ui32 table, TIntrusiveConstPtr<TColdPart>);
242246
void Merge(ui32 table, TIntrusiveConstPtr<TTxStatusPart>);
247+
void MergeDone(ui32 table);
243248

244249
void DebugDumpTable(ui32 table, IOutputStream& str, const NScheme::TTypeRegistry& typeRegistry) const;
245250
void DebugDump(IOutputStream& str, const NScheme::TTypeRegistry& typeRegistry) const;

ydb/core/tablet_flat/flat_dbase_naked.h

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -481,51 +481,62 @@ namespace NTable {
481481
wrap.Aggr(Stats, true /* enter */);
482482
}
483483

484-
void Replace(ui32 tid, TArrayRef<const TPartView> partViews, const TSubset &subset) noexcept
484+
void Replace(
485+
ui32 tid,
486+
const TSubset &subset,
487+
TArrayRef<const TPartView> newParts,
488+
TArrayRef<const TIntrusiveConstPtr<TTxStatusPart>> newTxStatus) noexcept
485489
{
486490
auto &wrap = Get(tid, true);
487491

488492
wrap.Aggr(Stats, false /* leave */);
489-
wrap->Replace(partViews, subset);
493+
wrap->Replace(subset, newParts, newTxStatus);
490494
wrap.Aggr(Stats, true /* enter */);
491495
}
492496

493-
void ReplaceTxStatus(ui32 tid, TArrayRef<const TIntrusiveConstPtr<TTxStatusPart>> txStatus, const TSubset &subset) noexcept
497+
void Merge(ui32 tid, TPartView partView) noexcept
494498
{
495499
auto &wrap = Get(tid, true);
496500

497501
wrap.Aggr(Stats, false /* leave */);
498-
wrap->ReplaceTxStatus(txStatus, subset);
502+
wrap->Merge(std::move(partView));
499503
wrap.Aggr(Stats, true /* enter */);
500504
}
501505

502-
void Merge(ui32 tid, TPartView partView) noexcept
506+
void Merge(ui32 tid, TIntrusiveConstPtr<TColdPart> part) noexcept
503507
{
504508
auto &wrap = Get(tid, true);
505509

506510
wrap.Aggr(Stats, false /* leave */);
507-
wrap->Merge(std::move(partView));
511+
wrap->Merge(std::move(part));
508512
wrap.Aggr(Stats, true /* enter */);
509513
}
510514

511-
void Merge(ui32 tid, TIntrusiveConstPtr<TColdPart> part) noexcept
515+
void Merge(ui32 tid, TIntrusiveConstPtr<TTxStatusPart> txStatus) noexcept
512516
{
513517
auto &wrap = Get(tid, true);
514518

515519
wrap.Aggr(Stats, false /* leave */);
516-
wrap->Merge(std::move(part));
520+
wrap->Merge(std::move(txStatus));
517521
wrap.Aggr(Stats, true /* enter */);
518522
}
519523

520-
void Merge(ui32 tid, TIntrusiveConstPtr<TTxStatusPart> txStatus) noexcept
524+
void MergeDone(ui32 tid) noexcept
521525
{
522526
auto &wrap = Get(tid, true);
523527

524528
wrap.Aggr(Stats, false /* leave */);
525-
wrap->Merge(std::move(txStatus));
529+
wrap->MergeDone();
526530
wrap.Aggr(Stats, true /* enter */);
527531
}
528532

533+
void MergeDone() noexcept
534+
{
535+
for (auto &pr : Tables) {
536+
MergeDone(pr.first);
537+
}
538+
}
539+
529540
bool ApplySchema(const TSchemeChanges &delta)
530541
{
531542
TModifier modifier(*Scheme);

ydb/core/tablet_flat/flat_executor.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,7 +1479,7 @@ void TExecutor::ApplyExternalPartSwitch(TPendingPartSwitch &partSwitch) {
14791479
}
14801480

14811481
if (partSwitch.FollowerUpdateStep) {
1482-
auto subset = Database->Subset(partSwitch.TableId, partSwitch.Leaving, partSwitch.Head);
1482+
auto subset = Database->PartSwitchSubset(partSwitch.TableId, partSwitch.Head, partSwitch.Leaving, partSwitch.LeavingTxStatus);
14831483

14841484
if (partSwitch.Head != subset->Head) {
14851485
Y_ABORT("Follower table epoch head has diverged from leader");
@@ -1488,30 +1488,36 @@ void TExecutor::ApplyExternalPartSwitch(TPendingPartSwitch &partSwitch) {
14881488
}
14891489

14901490
Y_ABORT_UNLESS(newColdParts.empty(), "Unexpected cold part at a follower");
1491-
Database->Replace(partSwitch.TableId, std::move(newParts), *subset);
1492-
Database->ReplaceTxStatus(partSwitch.TableId, std::move(newTxStatus), *subset);
1491+
Database->Replace(partSwitch.TableId, *subset, std::move(newParts), std::move(newTxStatus));
14931492

14941493
for (auto &gone : subset->Flatten)
14951494
DropCachesOfBundle(*gone);
14961495

14971496
Send(Owner->Tablet(), new TEvTablet::TEvFGcAck(Owner->TabletID(), Generation(), partSwitch.FollowerUpdateStep));
14981497
} else {
1498+
bool merged = false;
14991499
for (auto &partView : newParts) {
15001500
Database->Merge(partSwitch.TableId, partView);
1501+
merged = true;
15011502

15021503
if (CompactionLogic) {
15031504
CompactionLogic->BorrowedPart(partSwitch.TableId, std::move(partView));
15041505
}
15051506
}
15061507
for (auto &part : newColdParts) {
15071508
Database->Merge(partSwitch.TableId, part);
1509+
merged = true;
15081510

15091511
if (CompactionLogic) {
15101512
CompactionLogic->BorrowedPart(partSwitch.TableId, std::move(part));
15111513
}
15121514
}
15131515
for (auto &txStatus : newTxStatus) {
15141516
Database->Merge(partSwitch.TableId, txStatus);
1517+
merged = true;
1518+
}
1519+
if (merged) {
1520+
Database->MergeDone(partSwitch.TableId);
15151521
}
15161522
}
15171523

@@ -1533,7 +1539,7 @@ void TExecutor::ApplyExternalPartSwitch(TPendingPartSwitch &partSwitch) {
15331539
// N.B. there should be a single source table per part switch
15341540
for (auto& [sourceTable, state] : perTable) {
15351541
// Rebase source parts to their respective new epochs
1536-
auto srcSubset = Database->Subset(sourceTable, state.Bundles, NTable::TEpoch::Zero());
1542+
auto srcSubset = Database->PartSwitchSubset(sourceTable, NTable::TEpoch::Zero(), state.Bundles, { });
15371543
TVector<NTable::TPartView> rebased(Reserve(srcSubset->Flatten.size()));
15381544
for (const auto& partView : srcSubset->Flatten) {
15391545
Y_ABORT_UNLESS(!partView->TxIdStats, "Cannot move parts with uncommitted deltas");
@@ -1542,7 +1548,7 @@ void TExecutor::ApplyExternalPartSwitch(TPendingPartSwitch &partSwitch) {
15421548
}
15431549

15441550
// Remove source parts from the source table
1545-
Database->Replace(sourceTable, { }, *srcSubset);
1551+
Database->Replace(sourceTable, *srcSubset, { }, { });
15461552

15471553
if (CompactionLogic) {
15481554
CompactionLogic->RemovedParts(sourceTable, state.Bundles);
@@ -1557,6 +1563,8 @@ void TExecutor::ApplyExternalPartSwitch(TPendingPartSwitch &partSwitch) {
15571563
}
15581564
}
15591565
}
1566+
1567+
Database->MergeDone(partSwitch.TableId);
15601568
}
15611569
}
15621570

@@ -2310,7 +2318,7 @@ void TExecutor::CommitTransactionLog(TAutoPtr<TSeat> seat, TPageCollectionTxEnv
23102318
}
23112319

23122320
// Remove source parts from the source table
2313-
Database->Replace(src, { }, *srcSubset);
2321+
Database->Replace(src, *srcSubset, { }, { });
23142322

23152323
const auto logicResult = CompactionLogic->RemovedParts(src, labels);
23162324

@@ -2342,6 +2350,7 @@ void TExecutor::CommitTransactionLog(TAutoPtr<TSeat> seat, TPageCollectionTxEnv
23422350
Database->Merge(dst, partView);
23432351
CompactionLogic->BorrowedPart(dst, partView);
23442352
}
2353+
Database->MergeDone(dst);
23452354

23462355
// Serialize rebased parts as moved from the source table
23472356
NKikimrExecutorFlat::TTablePartSwitch proto;
@@ -3050,7 +3059,7 @@ THolder<TScanSnapshot> TExecutor::PrepareScanSnapshot(ui32 table, const NTable::
30503059
TAutoPtr<NTable::TSubset> subset;
30513060

30523061
if (params) {
3053-
subset = Database->Subset(table, { }, params->Edge.Head);
3062+
subset = Database->CompactionSubset(table, params->Edge.Head, { });
30543063

30553064
if (params->Parts) {
30563065
subset->Flatten.insert(subset->Flatten.end(), params->Parts.begin(), params->Parts.end());
@@ -3397,8 +3406,7 @@ void TExecutor::Handle(NOps::TEvResult *ops, TProdCompact *msg, bool cancelled)
33973406
newParts.emplace_back(result.Part);
33983407
}
33993408

3400-
Database->Replace(tableId, newParts, *ops->Subset);
3401-
Database->ReplaceTxStatus(tableId, newTxStatus, *ops->Subset);
3409+
Database->Replace(tableId, *ops->Subset, newParts, newTxStatus);
34023410

34033411
TVector<TLogoBlobID> bundles(Reserve(ops->Subset->Flatten.size() + ops->Subset->ColdParts.size()));
34043412
for (auto &part: ops->Subset->Flatten) {
@@ -4525,23 +4533,28 @@ ui64 TExecutor::BeginCompaction(THolder<NTable::TCompactionParams> params)
45254533
if (!memTableSnapshot->GetCommittedTransactions().empty() || !memTableSnapshot->GetRemovedTransactions().empty()) {
45264534
// We must compact tx status when mem table has changes
45274535
compactTxStatus = true;
4536+
break;
45284537
}
45294538
}
45304539
for (const auto& txStatus : snapshot->Subset->TxStatus) {
45314540
if (txStatus->Label.TabletID() != Owner->TabletID()) {
45324541
// We want to compact borrowed tx status
45334542
compactTxStatus = true;
4543+
break;
45344544
}
45354545
}
4546+
if (snapshot->Subset->TxStatus && snapshot->Subset->GarbageTransactions) {
4547+
// We want to remove garbage transactions
4548+
compactTxStatus = true;
4549+
}
45364550

45374551
if (compactTxStatus) {
4538-
comp->CommittedTransactions = snapshot->Subset->CommittedTransactions;
4539-
comp->RemovedTransactions = snapshot->Subset->RemovedTransactions;
45404552
comp->Frozen.reserve(snapshot->Subset->Frozen.size());
45414553
for (auto& memTableSnapshot : snapshot->Subset->Frozen) {
45424554
comp->Frozen.push_back(memTableSnapshot.MemTable);
45434555
}
45444556
comp->TxStatus = snapshot->Subset->TxStatus;
4557+
comp->GarbageTransactions = snapshot->Subset->GarbageTransactions;
45454558
} else {
45464559
// We are not compacting tx status, avoid deleting current blobs
45474560
snapshot->Subset->TxStatus.clear();

ydb/core/tablet_flat/flat_executor_misc.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,11 @@ namespace NTabletFlatExecutor {
3434
THolder<NTable::TCompactionParams> Params;
3535
NTable::TRowVersionRanges::TSnapshot RemovedRowVersions;
3636

37-
// Non-empty when compaction also needs to write a tx status table part
38-
NTable::TTransactionMap CommittedTransactions;
39-
NTable::TTransactionSet RemovedTransactions;
40-
// The above may contain extra keys, these allow them to be narrowed
37+
// Non-empty when compaction also needs to produce a tx status table part
4138
TVector<TIntrusiveConstPtr<NTable::TMemTable>> Frozen;
4239
TVector<TIntrusiveConstPtr<NTable::TTxStatusPart>> TxStatus;
40+
// Non-empty for transactions that no longer need their status maintained
41+
NTable::TTransactionSet GarbageTransactions;
4342
};
4443

4544
}

0 commit comments

Comments
 (0)