9
9
#include < ydb/core/node_whiteboard/node_whiteboard.h>
10
10
#include < ydb/core/base/nameservice.h>
11
11
#include < ydb/core/base/counters.h>
12
+ #include < ydb/core/util/cpuinfo.h>
12
13
#include < ydb/core/util/tuples.h>
13
14
14
15
#include < util/string/split.h>
@@ -46,6 +47,7 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
46
47
SystemStateInfo.SetNodeName (nodeName);
47
48
}
48
49
SystemStateInfo.SetNumberOfCpus (NSystemInfo::NumberOfCpus ());
50
+ SystemStateInfo.SetRealNumberOfCpus (NKikimr::RealNumberOfCpus ());
49
51
auto version = GetProgramRevision ();
50
52
if (!version.empty ()) {
51
53
SystemStateInfo.SetVersion (version);
@@ -56,8 +58,13 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
56
58
SystemStateInfo.SetStartTime (ctx.Now ().MilliSeconds ());
57
59
ctx.Send (ctx.SelfID , new TEvPrivate::TEvUpdateRuntimeStats ());
58
60
59
- auto group = NKikimr::GetServiceCounters (NKikimr::AppData ()->Counters , " utils" )
60
- ->GetSubgroup (" subsystem" , " whiteboard" );
61
+ auto utils = NKikimr::GetServiceCounters (NKikimr::AppData ()->Counters , " utils" );
62
+ UserTime = utils->GetCounter (" Process/UserTime" , true );
63
+ SysTime = utils->GetCounter (" Process/SystemTime" , true );
64
+ MinorPageFaults = utils->GetCounter (" Process/MinorPageFaults" , true );
65
+ MajorPageFaults = utils->GetCounter (" Process/MajorPageFaults" , true );
66
+ NumThreads = utils->GetCounter (" Process/NumThreads" , false );
67
+ auto group = utils->GetSubgroup (" subsystem" , " whiteboard" );
61
68
MaxClockSkewWithPeerUsCounter = group->GetCounter (" MaxClockSkewWithPeerUs" );
62
69
MaxClockSkewPeerIdCounter = group->GetCounter (" MaxClockSkewPeerId" );
63
70
@@ -78,8 +85,19 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
78
85
NKikimrWhiteboard::TSystemStateInfo SystemStateInfo;
79
86
THolder<NTracing::ITraceCollection> TabletIntrospectionData;
80
87
81
- ::NMonitoring::TDynamicCounters::TCounterPtr MaxClockSkewWithPeerUsCounter;
82
- ::NMonitoring::TDynamicCounters::TCounterPtr MaxClockSkewPeerIdCounter;
88
+ NMonitoring::TDynamicCounters::TCounterPtr MaxClockSkewWithPeerUsCounter;
89
+ NMonitoring::TDynamicCounters::TCounterPtr MaxClockSkewPeerIdCounter;
90
+ NMonitoring::TDynamicCounters::TCounterPtr UserTime;
91
+ ui64 SavedUserTime = 0 ;
92
+ NMonitoring::TDynamicCounters::TCounterPtr SysTime;
93
+ ui64 SavedSysTime = 0 ;
94
+ NMonitoring::TDynamicCounters::TCounterPtr MinorPageFaults;
95
+ ui64 SavedMinorPageFaults = 0 ;
96
+ NMonitoring::TDynamicCounters::TCounterPtr MajorPageFaults;
97
+ ui64 SavedMajorPageFaults = 0 ;
98
+ NMonitoring::TDynamicCounters::TCounterPtr NumThreads;
99
+
100
+ TSystemThreadsMonitor ThreadsMonitor;
83
101
84
102
template <typename PropertyType>
85
103
static ui64 GetDifference (PropertyType a, PropertyType b) {
@@ -721,6 +739,9 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
721
739
auto & endpoint = *SystemStateInfo.AddEndpoints ();
722
740
endpoint.SetName (ev->Get ()->Name );
723
741
endpoint.SetAddress (ev->Get ()->Address );
742
+ std::sort (SystemStateInfo.MutableEndpoints ()->begin (), SystemStateInfo.MutableEndpoints ()->end (), [](const auto & a, const auto & b) {
743
+ return a.GetName () < b.GetName ();
744
+ });
724
745
SystemStateInfo.SetChangeTime (ctx.Now ().MilliSeconds ());
725
746
}
726
747
@@ -838,14 +859,25 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
838
859
}
839
860
}
840
861
862
+ static std::unordered_set<TTabletId> BuildIndex (const ::google::protobuf::RepeatedField<::NProtoBuf::uint64>& array) {
863
+ std::unordered_set<TTabletId> result;
864
+ result.reserve (array.size ());
865
+ for (auto id : array) {
866
+ result.insert (id);
867
+ }
868
+ return result;
869
+ }
870
+
841
871
void Handle (TEvWhiteboard::TEvTabletStateRequest::TPtr &ev, const TActorContext &ctx) {
842
872
auto now = TMonotonic::Now ();
843
873
const auto & request = ev->Get ()->Record ;
844
874
auto matchesFilter = [
845
875
changedSince = request.has_changedsince () ? request.changedsince () : 0 ,
876
+ filterTabletId = BuildIndex (request.filtertabletid ()),
846
877
filterTenantId = request.has_filtertenantid () ? NKikimr::TSubDomainKey (request.filtertenantid ()) : NKikimr::TSubDomainKey ()
847
878
](const NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo) {
848
879
return tabletStateInfo.changetime () >= changedSince
880
+ && (filterTabletId.empty () || filterTabletId.count (tabletStateInfo.tabletid ()))
849
881
&& (!filterTenantId || filterTenantId == NKikimr::TSubDomainKey (tabletStateInfo.tenantid ()));
850
882
};
851
883
std::unique_ptr<TEvWhiteboard::TEvTabletStateResponse> response = std::make_unique<TEvWhiteboard::TEvTabletStateResponse>();
@@ -868,22 +900,10 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
868
900
}
869
901
} else {
870
902
if (request.groupby ().empty ()) {
871
- if (request.filtertabletid_size () == 0 ) {
872
- for (const auto & pr : TabletStateInfo) {
873
- if (matchesFilter (pr.second )) {
874
- NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo ();
875
- Copy (tabletStateInfo, pr.second , request);
876
- }
877
- }
878
- } else {
879
- for (auto tabletId : request.filtertabletid ()) {
880
- auto it = TabletStateInfo.find ({tabletId, 0 });
881
- if (it != TabletStateInfo.end ()) {
882
- if (matchesFilter (it->second )) {
883
- NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo ();
884
- Copy (tabletStateInfo, it->second , request);
885
- }
886
- }
903
+ for (const auto & pr : TabletStateInfo) {
904
+ if (matchesFilter (pr.second )) {
905
+ NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo ();
906
+ Copy (tabletStateInfo, pr.second , request);
887
907
}
888
908
}
889
909
} else if (request.groupby () == " Type,State" || request.groupby () == " NodeId,Type,State" ) { // the only supported group-by for now
@@ -1097,15 +1117,18 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
1097
1117
}
1098
1118
1099
1119
void Handle (TEvPrivate::TEvUpdateRuntimeStats::TPtr &, const TActorContext &ctx) {
1100
- static constexpr TDuration UPDATE_PERIOD = TDuration::Seconds (15 );
1120
+ static constexpr int UPDATE_PERIOD_SECONDS = 15 ;
1121
+ static constexpr TDuration UPDATE_PERIOD = TDuration::Seconds (UPDATE_PERIOD_SECONDS);
1122
+ auto now = TActivationContext::Now ();
1123
+
1101
1124
{
1102
1125
NKikimrWhiteboard::TSystemStateInfo systemStatsUpdate;
1103
1126
TVector<double > loadAverage = GetLoadAverage ();
1104
1127
for (double d : loadAverage) {
1105
1128
systemStatsUpdate.AddLoadAverage (d);
1106
1129
}
1107
1130
if (CheckedMerge (SystemStateInfo, systemStatsUpdate)) {
1108
- SystemStateInfo.SetChangeTime (ctx. Now () .MilliSeconds ());
1131
+ SystemStateInfo.SetChangeTime (now .MilliSeconds ());
1109
1132
}
1110
1133
}
1111
1134
@@ -1122,12 +1145,24 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
1122
1145
SystemStateInfo.SetNetworkUtilization (MaxNetworkUtilization);
1123
1146
MaxNetworkUtilization = 0 ;
1124
1147
}
1125
-
1126
1148
{
1127
- SystemStateInfo.SetNetworkWriteThroughput (SumNetworkWriteThroughput / UPDATE_PERIOD. Seconds () );
1149
+ SystemStateInfo.SetNetworkWriteThroughput (SumNetworkWriteThroughput / UPDATE_PERIOD_SECONDS );
1128
1150
SumNetworkWriteThroughput = 0 ;
1129
1151
}
1130
-
1152
+ auto threadPools = ThreadsMonitor.GetThreadPools (now);
1153
+ SystemStateInfo.ClearThreads ();
1154
+ for (const auto & threadPool : threadPools) {
1155
+ auto * threadInfo = SystemStateInfo.AddThreads ();
1156
+ threadInfo->SetName (threadPool.Name );
1157
+ threadInfo->SetThreads (threadPool.Threads );
1158
+ threadInfo->SetSystemUsage (threadPool.SystemUsage );
1159
+ threadInfo->SetUserUsage (threadPool.UserUsage );
1160
+ threadInfo->SetMajorPageFaults (threadPool.MajorPageFaults );
1161
+ threadInfo->SetMinorPageFaults (threadPool.MinorPageFaults );
1162
+ for (const auto & state : threadPool.States ) {
1163
+ threadInfo->MutableStates ()->emplace (state.first , state.second );
1164
+ }
1165
+ }
1131
1166
UpdateSystemState (ctx);
1132
1167
ctx.Schedule (UPDATE_PERIOD, new TEvPrivate::TEvUpdateRuntimeStats ());
1133
1168
}
0 commit comments