Skip to content

Commit e936c2b

Browse files
authored
24-3: Replication counters (#11081)
1 parent c85b979 commit e936c2b

File tree

4 files changed

+33
-0
lines changed

4 files changed

+33
-0
lines changed

ydb/core/protos/counters_replication.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,17 @@ option (TabletTypeName) = "ReplicationController"; // Used as prefix for all cou
77

88
enum ESimpleCounters {
99
COUNTER_SIMPLE_IGNORE = 0;
10+
COUNTER_SESSIONS = 1 [(CounterOpts) = {Name: "Sessions"}];
11+
COUNTER_WORKERS = 2 [(CounterOpts) = {Name: "Workers"}];
12+
COUNTER_BOOT_QUEUE = 3 [(CounterOpts) = {Name: "BootQueue"}];
13+
COUNTER_STOP_QUEUE = 4 [(CounterOpts) = {Name: "StopQueue"}];
14+
COUNTER_DATA_LAG = 5 [(CounterOpts) = {Name: "DataLag"}];
1015
}
1116

1217
enum ECumulativeCounters {
1318
COUNTER_CUMULATIVE_IGNORE = 0;
19+
COUNTER_CREATE_SESSION = 1 [(CounterOpts) = {Name: "CreateSession"}];
20+
COUNTER_DELETE_SESSION = 2 [(CounterOpts) = {Name: "DeleteSession"}];
1421
}
1522

1623
enum EPercentileCounters {

ydb/core/tx/replication/controller/controller.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <ydb/core/base/appdata.h>
55
#include <ydb/core/discovery/discovery.h>
66
#include <ydb/core/engine/minikql/flat_local_tx_factory.h>
7+
#include <ydb/core/tablet/tablet_counters_protobuf.h>
78

89
namespace NKikimr::NReplication {
910

@@ -13,6 +14,13 @@ TController::TController(const TActorId& tablet, TTabletStorageInfo* info)
1314
: TActor(&TThis::StateInit)
1415
, TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory)
1516
, LogPrefix(this)
17+
, TabletCountersPtr(new TProtobufTabletCounters<
18+
ESimpleCounters_descriptor,
19+
ECumulativeCounters_descriptor,
20+
EPercentileCounters_descriptor,
21+
ETxTypes_descriptor
22+
>())
23+
, TabletCounters(TabletCountersPtr.Get())
1624
{
1725
}
1826

@@ -30,6 +38,7 @@ void TController::OnTabletDead(TEvTablet::TEvTabletDead::TPtr&, const TActorCont
3038

3139
void TController::OnActivateExecutor(const TActorContext& ctx) {
3240
CLOG_T(ctx, "OnActivateExecutor");
41+
Executor()->RegisterExternalTabletCounters(TabletCountersPtr.Release());
3342
RunTxInitSchema(ctx);
3443
}
3544

@@ -292,9 +301,11 @@ void TController::Handle(TEvDiscovery::TEvError::TPtr& ev, const TActorContext&
292301
void TController::CreateSession(ui32 nodeId, const TActorContext& ctx) {
293302
CLOG_D(ctx, "Create session"
294303
<< ": nodeId# " << nodeId);
304+
TabletCounters->Cumulative()[COUNTER_CREATE_SESSION] += 1;
295305

296306
Y_ABORT_UNLESS(!Sessions.contains(nodeId));
297307
Sessions.emplace(nodeId, TSessionInfo());
308+
TabletCounters->Simple()[COUNTER_SESSIONS] = Sessions.size();
298309

299310
auto ev = MakeHolder<TEvService::TEvHandshake>(TabletID(), Executor()->Generation());
300311
ui32 flags = 0;
@@ -308,6 +319,7 @@ void TController::CreateSession(ui32 nodeId, const TActorContext& ctx) {
308319
void TController::DeleteSession(ui32 nodeId, const TActorContext& ctx) {
309320
CLOG_D(ctx, "Delete session"
310321
<< ": nodeId# " << nodeId);
322+
TabletCounters->Cumulative()[COUNTER_DELETE_SESSION] += 1;
311323

312324
Y_ABORT_UNLESS(Sessions.contains(nodeId));
313325
auto& session = Sessions[nodeId];
@@ -327,6 +339,8 @@ void TController::DeleteSession(ui32 nodeId, const TActorContext& ctx) {
327339
}
328340

329341
Sessions.erase(nodeId);
342+
TabletCounters->Simple()[COUNTER_SESSIONS] = Sessions.size();
343+
330344
CloseSession(nodeId, ctx);
331345
ScheduleProcessQueues();
332346
}
@@ -431,6 +445,9 @@ void TController::UpdateLag(const TWorkerId& id, TDuration lag) {
431445
}
432446

433447
target->UpdateLag(id.WorkerId(), lag);
448+
if (const auto lag = replication->GetLag()) {
449+
TabletCounters->Simple()[COUNTER_DATA_LAG] = lag->MilliSeconds();
450+
}
434451
}
435452

436453
void TController::Handle(TEvService::TEvRunWorker::TPtr& ev, const TActorContext& ctx) {
@@ -486,6 +503,7 @@ TWorkerInfo* TController::GetOrCreateWorker(const TWorkerId& id, NKikimrReplicat
486503
auto it = Workers.find(id);
487504
if (it == Workers.end()) {
488505
it = Workers.emplace(id, cmd).first;
506+
TabletCounters->Simple()[COUNTER_WORKERS] = Workers.size();
489507
}
490508

491509
auto replication = Find(id.ReplicationId());
@@ -499,6 +517,9 @@ TWorkerInfo* TController::GetOrCreateWorker(const TWorkerId& id, NKikimrReplicat
499517
}
500518

501519
void TController::ScheduleProcessQueues() {
520+
TabletCounters->Simple()[COUNTER_BOOT_QUEUE] = BootQueue.size();
521+
TabletCounters->Simple()[COUNTER_STOP_QUEUE] = StopQueue.size();
522+
502523
if (ProcessQueuesScheduled || (!BootQueue && !StopQueue)) {
503524
return;
504525
}
@@ -652,6 +673,7 @@ void TController::RemoveWorker(const TWorkerId& id, const TActorContext& ctx) {
652673

653674
RemoveQueue.erase(id);
654675
Workers.erase(id);
676+
TabletCounters->Simple()[COUNTER_WORKERS] = Workers.size();
655677

656678
auto replication = Find(id.ReplicationId());
657679
if (!replication) {

ydb/core/tx/replication/controller/controller_impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <ydb/core/base/blobstorage.h>
1313
#include <ydb/core/base/defs.h>
1414
#include <ydb/core/protos/counters_replication.pb.h>
15+
#include <ydb/core/tablet/tablet_counters.h>
1516
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
1617
#include <ydb/core/tx/replication/service/service.h>
1718
#include <ydb/library/actors/core/interconnect.h>
@@ -167,6 +168,8 @@ class TController
167168

168169
private:
169170
const TTabletLogPrefix LogPrefix;
171+
THolder<TTabletCountersBase> TabletCountersPtr;
172+
TTabletCountersBase* TabletCounters;
170173

171174
TSysParams SysParams;
172175
THashMap<ui64, TReplication::TPtr> Replications;

ydb/core/tx/replication/controller/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ PEERDIR(
55
ydb/core/discovery
66
ydb/core/engine/minikql
77
ydb/core/protos
8+
ydb/core/tablet
89
ydb/core/tablet_flat
910
ydb/core/tx/replication/common
1011
ydb/core/tx/replication/ydb_proxy

0 commit comments

Comments
 (0)