Skip to content

Commit 57f6f63

Browse files
authored
Add db counters for uncommitted changes and suspicious commits (#12966)
1 parent 1049a71 commit 57f6f63

13 files changed

+148
-8
lines changed

ydb/core/protos/counters_datashard.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ enum ECumulativeCounters {
153153
COUNTER_WRITE_BYTES = 111 [(CounterOpts) = {Name: "WriteBytes"}];
154154
COUNTER_WRITE_DISK_SPACE_EXHAUSTED = 112 [(CounterOpts) = {Name: "WriteDiskSpaceExhausted"}];
155155
COUNTER_PREPARE_DISK_SPACE_EXHAUSTED = 113 [(CounterOpts) = {Name: "PrepareSpaceExhausted"}];
156+
COUNTER_REMOVED_COMMITTED_TXS = 114 [(CounterOpts) = {Name: "RemovedCommittedTxs"}];
156157
}
157158

158159
enum EPercentileCounters {

ydb/core/tablet_flat/flat_database.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,11 @@ const TDbStats& TDatabase::Counters() const noexcept
518518
return DatabaseImpl->Stats;
519519
}
520520

521+
TDbRuntimeStats TDatabase::RuntimeCounters() const noexcept
522+
{
523+
return DatabaseImpl->GetRuntimeStats();
524+
}
525+
521526
void TDatabase::UpdateApproximateFreeSharesByChannel(const THashMap<ui32, float>& approximateFreeSpaceShareByChannel)
522527
{
523528
for (auto& [channel, value] : approximateFreeSpaceShareByChannel) {

ydb/core/tablet_flat/flat_database.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ class TDatabase {
4242
public:
4343
using TMemGlobs = TVector<NPageCollection::TMemGlob>;
4444
using TCookieAllocator = NPageCollection::TCookieAllocator;
45-
using TCounters = TDbStats;
4645

4746
struct TProd {
4847
THolder<TChange> Change;
@@ -221,7 +220,9 @@ class TDatabase {
221220
ui64 GetTableIndexSize(ui32 table) const;
222221
ui64 GetTableSearchHeight(ui32 table) const;
223222
ui64 EstimateRowSize(ui32 table) const;
224-
const TCounters& Counters() const noexcept;
223+
const TDbStats& Counters() const noexcept;
224+
TDbRuntimeStats RuntimeCounters() const noexcept;
225+
225226
void UpdateApproximateFreeSharesByChannel(const THashMap<ui32, float>& approximateFreeSpaceShareByChannel);
226227
TString SnapshotToLog(ui32 table, TTxStamp);
227228

ydb/core/tablet_flat/flat_dbase_misc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,7 @@ namespace NTable {
3232
THashMap<ui32, float> NormalizedFreeSpaceShareByChannel;
3333
};
3434

35+
using TDbRuntimeStats = TTableRuntimeStats;
36+
3537
}
3638
}

ydb/core/tablet_flat/flat_dbase_naked.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,16 @@ namespace NTable {
781781
}
782782
}
783783

784+
public:
785+
TDbRuntimeStats GetRuntimeStats() const {
786+
TDbRuntimeStats stats;
787+
for (auto& pr : Tables) {
788+
// TODO: use a lazy aggregate to balance many idle tables vs frequent updates
789+
stats += pr.second->RuntimeStats();
790+
}
791+
return stats;
792+
}
793+
784794
private:
785795
const TIntrusivePtr<TKeyRangeCacheNeedGCList> GCList;
786796
const TTxStamp Weak; /* db bootstrap upper stamp */

ydb/core/tablet_flat/flat_executor.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,6 +3565,16 @@ void TExecutor::UpdateCounters(const TActorContext &ctx) {
35653565
Counters->Simple()[TExecutorCounters::USED_TABLET_MEMORY].Set(UsedTabletMemory);
35663566
}
35673567

3568+
// Runtime stats related to uncommitted changes
3569+
auto runtimeCounters = Database->RuntimeCounters();
3570+
{
3571+
Counters->Simple()[TExecutorCounters::DB_OPEN_TX_COUNT].Set(runtimeCounters.OpenTxCount);
3572+
Counters->Simple()[TExecutorCounters::DB_TXS_WITH_DATA_COUNT].Set(runtimeCounters.TxsWithDataCount);
3573+
Counters->Simple()[TExecutorCounters::DB_COMMITTED_TX_COUNT].Set(runtimeCounters.CommittedTxCount);
3574+
Counters->Simple()[TExecutorCounters::DB_REMOVED_TX_COUNT].Set(runtimeCounters.RemovedTxCount);
3575+
Counters->Simple()[TExecutorCounters::DB_REMOVED_COMMITTED_TXS].Set(runtimeCounters.RemovedCommittedTxs);
3576+
}
3577+
35683578
if (CommitManager) /* exists only on leader, mostly storage usage data */ {
35693579
auto redo = LogicRedo->LogStats();
35703580
Counters->Simple()[TExecutorCounters::LOG_REDO_COUNT].Set(redo.Items);

ydb/core/tablet_flat/flat_executor_counters.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ namespace NTabletFlatExecutor {
6565
XX(DB_FLAT_INDEX_BYTES, "DbFlatIndexBytes") \
6666
XX(DB_B_TREE_INDEX_BYTES, "DbBTreeIndexBytes") \
6767
XX(CACHE_TOTAL_USED, "CacheTotalUsed") \
68+
XX(DB_OPEN_TX_COUNT, "DbOpenTxCount") \
69+
XX(DB_TXS_WITH_DATA_COUNT, "DbTxsWithDataCount") \
70+
XX(DB_COMMITTED_TX_COUNT, "DbCommittedTxCount") \
71+
XX(DB_REMOVED_TX_COUNT, "DbRemovedTxCount") \
72+
XX(DB_REMOVED_COMMITTED_TXS, "DbRemovedCommittedTxs") \
6873

6974
// don't change order!
7075
#define FLAT_EXECUTOR_CUMULATIVE_COUNTERS_MAP(XX) \

ydb/core/tablet_flat/flat_table.cpp

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,11 @@ void TTable::Merge(TIntrusiveConstPtr<TTxStatusPart> txStatus) noexcept
632632
if (const auto* prev = CommittedTransactions.Find(txId); Y_LIKELY(!prev) || *prev > rowVersion) {
633633
CommittedTransactions.Add(txId, rowVersion);
634634
if (!prev) {
635-
RemovedTransactions.Remove(txId);
635+
if (RemovedTransactions.Remove(txId)) {
636+
// Transaction was in a removed set and now it's committed
637+
// This is not an error in some cases, but may be suspicious
638+
RemovedCommittedTxs++;
639+
}
636640
}
637641
}
638642
if (!TxRefs.contains(txId)) {
@@ -645,6 +649,10 @@ void TTable::Merge(TIntrusiveConstPtr<TTxStatusPart> txStatus) noexcept
645649
const ui64 txId = item.GetTxId();
646650
if (const auto* prev = CommittedTransactions.Find(txId); Y_LIKELY(!prev)) {
647651
RemovedTransactions.Add(txId);
652+
} else {
653+
// Transaction is in a committed set but also removed
654+
// This is not an error in some cases, but may be suspicious
655+
RemovedCommittedTxs++;
648656
}
649657
if (!TxRefs.contains(txId)) {
650658
CheckTransactions.insert(txId);
@@ -944,7 +952,11 @@ void TTable::CommitTx(ui64 txId, TRowVersion rowVersion)
944952
if (RollbackState && RemovedTransactions.Contains(txId)) {
945953
RollbackOps.emplace_back(TRollbackAddRemovedTx{ txId });
946954
}
947-
RemovedTransactions.Remove(txId);
955+
if (RemovedTransactions.Remove(txId)) {
956+
// Transaction was in a removed set and now it's committed
957+
// This is not an error in some cases, but may be suspicious
958+
RemovedCommittedTxs++;
959+
}
948960
}
949961
if (auto it = OpenTxs.find(txId); it != OpenTxs.end()) {
950962
if (RollbackState) {
@@ -982,6 +994,10 @@ void TTable::RemoveTx(ui64 txId)
982994
}
983995
OpenTxs.erase(it);
984996
}
997+
} else {
998+
// Transaction is in a committed set but also removed
999+
// This is not an error in some cases, but may be suspicious
1000+
RemovedCommittedTxs++;
9851001
}
9861002
}
9871003

@@ -1015,6 +1031,32 @@ size_t TTable::GetOpenTxCount() const
10151031
return OpenTxs.size();
10161032
}
10171033

1034+
size_t TTable::GetTxsWithDataCount() const
1035+
{
1036+
return TxRefs.size();
1037+
}
1038+
1039+
size_t TTable::GetCommittedTxCount() const
1040+
{
1041+
return CommittedTransactions.Size();
1042+
}
1043+
1044+
size_t TTable::GetRemovedTxCount() const
1045+
{
1046+
return RemovedTransactions.Size();
1047+
}
1048+
1049+
TTableRuntimeStats TTable::RuntimeStats() const noexcept
1050+
{
1051+
return TTableRuntimeStats{
1052+
.OpenTxCount = OpenTxs.size(),
1053+
.TxsWithDataCount = TxRefs.size(),
1054+
.CommittedTxCount = CommittedTransactions.Size(),
1055+
.RemovedTxCount = RemovedTransactions.Size(),
1056+
.RemovedCommittedTxs = RemovedCommittedTxs,
1057+
};
1058+
}
1059+
10181060
TMemTable& TTable::MemTable()
10191061
{
10201062
if (!Mutable) {

ydb/core/tablet_flat/flat_table.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ class TTable: public TAtomicRefCount<TTable> {
184184

185185
const absl::flat_hash_set<ui64>& GetOpenTxs() const;
186186
size_t GetOpenTxCount() const;
187+
size_t GetTxsWithDataCount() const;
188+
size_t GetCommittedTxCount() const;
189+
size_t GetRemovedTxCount() const;
187190

188191
TPartView GetPartView(const TLogoBlobID &bundle) const
189192
{
@@ -240,6 +243,8 @@ class TTable: public TAtomicRefCount<TTable> {
240243
return Stat_;
241244
}
242245

246+
TTableRuntimeStats RuntimeStats() const noexcept;
247+
243248
ui64 GetMemSize(TEpoch epoch = TEpoch::Max()) const noexcept
244249
{
245250
if (Y_LIKELY(epoch == TEpoch::Max())) {
@@ -364,6 +369,8 @@ class TTable: public TAtomicRefCount<TTable> {
364369
TTransactionSet DecidedTransactions;
365370
TIntrusivePtr<ITableObserver> TableObserver;
366371

372+
ui64 RemovedCommittedTxs = 0;
373+
367374
private:
368375
struct TRollbackRemoveTxRef {
369376
ui64 TxId;

ydb/core/tablet_flat/flat_table_committed.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,20 @@ namespace NTable {
226226
Unshare()[txId] = value;
227227
}
228228

229-
void Remove(ui64 txId) {
229+
bool Remove(ui64 txId) {
230230
if (State_ && State_->contains(txId)) {
231231
Unshare().erase(txId);
232+
return true;
233+
} else {
234+
return false;
235+
}
236+
}
237+
238+
size_t Size() const {
239+
if (State_) {
240+
return State_->size();
241+
} else {
242+
return 0;
232243
}
233244
}
234245

@@ -345,13 +356,24 @@ namespace NTable {
345356
State_.Reset();
346357
}
347358

348-
void Add(ui64 txId) {
349-
Unshare().insert(txId);
359+
bool Add(ui64 txId) {
360+
return Unshare().insert(txId).second;
350361
}
351362

352-
void Remove(ui64 txId) {
363+
bool Remove(ui64 txId) {
353364
if (State_ && State_->contains(txId)) {
354365
Unshare().erase(txId);
366+
return true;
367+
} else {
368+
return false;
369+
}
370+
}
371+
372+
size_t Size() const {
373+
if (State_) {
374+
return State_->size();
375+
} else {
376+
return 0;
355377
}
356378
}
357379

0 commit comments

Comments
 (0)