Skip to content

Commit fc33a19

Browse files
authored
correct counting of resources in cluster and tenantinfo handlers (#10819)
1 parent 95e4ddf commit fc33a19

File tree

10 files changed

+141
-57
lines changed

10 files changed

+141
-57
lines changed

ydb/core/base/pool_stats_collector.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,24 @@ class TStatsCollectingActor : public NActors::TStatsCollectingActor {
5353
void OnWakeup(const TActorContext &ctx) override {
5454
MiniKQLPoolStats.Update();
5555

56-
TVector<std::tuple<TString, double, ui32, ui32>> pools;
56+
auto systemUpdate = std::make_unique<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate>();
57+
ui32 coresTotal = 0;
58+
double coresUsed = 0;
5759
for (const auto& pool : PoolCounters) {
58-
pools.emplace_back(pool.Name, pool.Usage, pool.Threads, pool.LimitThreads);
60+
auto& pb = *systemUpdate->Record.AddPoolStats();
61+
pb.SetName(pool.Name);
62+
pb.SetUsage(pool.Usage);
63+
pb.SetThreads(static_cast<ui32>(pool.Threads));
64+
pb.SetLimit(static_cast<ui32>(pool.LimitThreads));
65+
if (pool.Name != "IO") {
66+
coresTotal += static_cast<ui32>(pool.DefaultThreads);
67+
}
68+
coresUsed += pool.Usage * pool.LimitThreads;
5969
}
70+
systemUpdate->Record.SetCoresTotal(coresTotal);
71+
systemUpdate->Record.SetCoresUsed(coresUsed);
6072

61-
ctx.Send(NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId()), new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate(pools));
73+
ctx.Send(NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId()), systemUpdate.release());
6274
}
6375

6476
private:

ydb/core/node_whiteboard/node_whiteboard.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,5 +531,8 @@ struct WhiteboardResponse<TEvWhiteboard::TEvNodeStateRequest> {
531531
using Type = TEvWhiteboard::TEvNodeStateResponse;
532532
};
533533

534+
template<typename TResponseType>
535+
::google::protobuf::RepeatedField<int> GetDefaultWhiteboardFields();
536+
534537
} // NNodeWhiteboard
535538
} // NKikimr

ydb/core/protos/node_whiteboard.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,8 @@ message TSystemStateInfo {
341341
optional uint32 TotalSessions = 36 [(DefaultField) = true];
342342
optional string NodeName = 37 [(DefaultField) = true];
343343
optional NKikimrMemory.TMemoryStats MemoryStats = 38;
344+
optional double CoresUsed = 39;
345+
optional uint32 CoresTotal = 40;
344346
}
345347

346348
message TEvSystemStateRequest {

ydb/core/tablet/node_whiteboard.cpp

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
470470
}
471471
}
472472

473-
static void SelectiveCopy(::google::protobuf::Message& protoTo, const ::google::protobuf::Message& protoFrom, const ::google::protobuf::RepeatedField<arc_i32>& fields) {
473+
static void SelectiveCopy(::google::protobuf::Message& protoTo, const ::google::protobuf::Message& protoFrom, const ::google::protobuf::RepeatedField<int>& fields) {
474474
using namespace ::google::protobuf;
475475
const Descriptor& descriptor = *protoTo.GetDescriptor();
476476
const Reflection& reflectionTo = *protoTo.GetReflection();
@@ -483,24 +483,6 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
483483
}
484484
}
485485

486-
template<typename TMessage>
487-
static ::google::protobuf::RepeatedField<arc_i32> GetDefaultFields(const TMessage& message) {
488-
using namespace ::google::protobuf;
489-
const Descriptor& descriptor = *message.GetDescriptor();
490-
::google::protobuf::RepeatedField<arc_i32> defaultFields;
491-
int fieldCount = descriptor.field_count();
492-
for (int index = 0; index < fieldCount; ++index) {
493-
const FieldDescriptor* field = descriptor.field(index);
494-
const auto& options(field->options());
495-
if (options.HasExtension(NKikimrWhiteboard::DefaultField)) {
496-
if (options.GetExtension(NKikimrWhiteboard::DefaultField)) {
497-
defaultFields.Add(field->number());
498-
}
499-
}
500-
}
501-
return defaultFields;
502-
}
503-
504486
template<typename TMessage, typename TRequest>
505487
static void Copy(TMessage& to, const TMessage& from, const TRequest& request) {
506488
if (request.FieldsRequiredSize() > 0) {
@@ -510,8 +492,7 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
510492
SelectiveCopy(to, from, request.GetFieldsRequired());
511493
}
512494
} else {
513-
static auto defaultFields = GetDefaultFields(to);
514-
SelectiveCopy(to, from, defaultFields);
495+
SelectiveCopy(to, from, GetDefaultWhiteboardFields<TMessage>());
515496
}
516497
}
517498

@@ -1145,6 +1126,30 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
11451126
}
11461127
};
11471128

1129+
template<typename TMessage>
1130+
::google::protobuf::RepeatedField<int> InitDefaultWhiteboardFields() {
1131+
using namespace ::google::protobuf;
1132+
const Descriptor& descriptor = *TMessage::GetDescriptor();
1133+
::google::protobuf::RepeatedField<int> defaultFields;
1134+
int fieldCount = descriptor.field_count();
1135+
for (int index = 0; index < fieldCount; ++index) {
1136+
const FieldDescriptor* field = descriptor.field(index);
1137+
const auto& options(field->options());
1138+
if (options.HasExtension(NKikimrWhiteboard::DefaultField)) {
1139+
if (options.GetExtension(NKikimrWhiteboard::DefaultField)) {
1140+
defaultFields.Add(field->number());
1141+
}
1142+
}
1143+
}
1144+
return defaultFields;
1145+
}
1146+
1147+
template<typename TMessage>
1148+
::google::protobuf::RepeatedField<int> GetDefaultWhiteboardFields() {
1149+
static ::google::protobuf::RepeatedField<int> defaultFields = InitDefaultWhiteboardFields<TMessage>();
1150+
return defaultFields;
1151+
}
1152+
11481153
IActor* CreateNodeWhiteboardService() {
11491154
return new TNodeWhiteboardService();
11501155
}

ydb/core/viewer/json_handlers_viewer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ void InitViewerStorageUsageJsonHandler(TJsonHandlers &handlers) {
204204
}
205205

206206
void InitViewerClusterJsonHandler(TJsonHandlers& handlers) {
207-
handlers.AddHandler("/viewer/cluster", new TJsonHandler<TJsonCluster>(TJsonCluster::GetSwagger()), 4);
207+
handlers.AddHandler("/viewer/cluster", new TJsonHandler<TJsonCluster>(TJsonCluster::GetSwagger()), 5);
208208
}
209209

210210
void InitViewerLabeledCountersJsonHandler(TJsonHandlers &handlers) {
@@ -220,7 +220,7 @@ void InitViewerHiveStatsJsonHandler(TJsonHandlers& handlers) {
220220
}
221221

222222
void InitViewerTenantInfoJsonHandler(TJsonHandlers &handlers) {
223-
handlers.AddHandler("/viewer/tenantinfo", new TJsonHandler<TJsonTenantInfo>(TJsonTenantInfo::GetSwagger()), 2);
223+
handlers.AddHandler("/viewer/tenantinfo", new TJsonHandler<TJsonTenantInfo>(TJsonTenantInfo::GetSwagger()), 3);
224224
}
225225

226226
void InitViewerWhoAmIJsonHandler(TJsonHandlers& handlers) {
@@ -244,7 +244,7 @@ void InitViewerHealthCheckJsonHandler(TJsonHandlers& handlers) {
244244
}
245245

246246
void InitViewerNodesJsonHandler(TJsonHandlers& handlers) {
247-
handlers.AddHandler("/viewer/nodes", new TJsonHandler<TJsonNodes>(TJsonNodes::GetSwagger()), 10);
247+
handlers.AddHandler("/viewer/nodes", new TJsonHandler<TJsonNodes>(TJsonNodes::GetSwagger()), 11);
248248
}
249249

250250
void InitViewerACLJsonHandler(TJsonHandlers &jsonHandlers) {

ydb/core/viewer/protos/viewer.proto

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ message TClusterInfo {
322322
uint32 NodesTotal = 10;
323323
uint32 NodesAlive = 11;
324324
uint32 NumberOfCpus = 20;
325+
uint32 CoresTotal = 24;
325326
double CoresUsed = 21;
326327
double LoadAverage = 22;
327328
repeated NKikimrWhiteboard.TSystemStateInfo.TPoolStats PoolStats = 23;
@@ -358,7 +359,7 @@ message TStorageUsage {
358359
SSD = 2;
359360
}
360361
EType Type = 1;
361-
uint64 Size = 2;
362+
optional uint64 Size = 2;
362363
uint64 Limit = 3;
363364
uint64 SoftQuota = 4;
364365
uint64 HardQuota = 5;
@@ -394,6 +395,7 @@ message TTenant {
394395
Ydb.Cms.DatabaseQuotas DatabaseQuotas = 42;
395396
repeated TStorageUsage TablesStorage = 44;
396397
repeated TStorageUsage DatabaseStorage = 45;
398+
uint32 CoresTotal = 50;
397399
}
398400

399401
message TTenants {

ydb/core/viewer/viewer_cluster.h

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,10 @@ class TJsonCluster : public TViewerPipeClient {
342342
}
343343

344344
void InitSystemWhiteboardRequest(NKikimrWhiteboard::TEvSystemStateRequest* request) {
345-
//request->AddFieldsRequired(-1);
346-
Y_UNUSED(request);
345+
request->MutableFieldsRequired()->CopyFrom(GetDefaultWhiteboardFields<NKikimrWhiteboard::TSystemStateInfo>());
346+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kMemoryStatsFieldNumber);
347+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresUsedFieldNumber);
348+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresTotalFieldNumber);
347349
}
348350

349351
void InitTabletWhiteboardRequest(NKikimrWhiteboard::TEvTabletStateRequest* request) {
@@ -462,23 +464,40 @@ class TJsonCluster : public TViewerPipeClient {
462464
}
463465
}
464466

467+
struct TMemoryStats {
468+
ui64 Total = 0;
469+
ui64 Limit = 0;
470+
};
471+
472+
std::unordered_set<TString> hostPassed;
473+
std::unordered_map<TString, TMemoryStats> memoryStats;
474+
465475
for (TNode& node : NodeData) {
466476
const NKikimrWhiteboard::TSystemStateInfo& systemState = node.SystemState;
467477
(*ClusterInfo.MutableMapDataCenters())[node.DataCenter]++;
468-
if (systemState.HasNumberOfCpus()) {
478+
if (hostPassed.insert(systemState.GetHost()).second) {
469479
ClusterInfo.SetNumberOfCpus(ClusterInfo.GetNumberOfCpus() + systemState.GetNumberOfCpus());
470-
}
471-
if (systemState.LoadAverageSize() > 0) {
472-
ClusterInfo.SetLoadAverage(ClusterInfo.GetLoadAverage() + systemState.GetLoadAverage(0));
480+
if (systemState.LoadAverageSize() > 0) {
481+
ClusterInfo.SetLoadAverage(ClusterInfo.GetLoadAverage() + systemState.GetLoadAverage(0));
482+
}
473483
}
474484
if (systemState.HasVersion()) {
475485
(*ClusterInfo.MutableMapVersions())[systemState.GetVersion()]++;
476486
}
477487
if (systemState.HasClusterName() && !ClusterInfo.GetName()) {
478488
ClusterInfo.SetName(systemState.GetClusterName());
479489
}
480-
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + systemState.GetMemoryLimit());
481490
ClusterInfo.SetMemoryUsed(ClusterInfo.GetMemoryUsed() + systemState.GetMemoryUsed());
491+
if (systemState.HasMemoryStats()) {
492+
TMemoryStats& stats = memoryStats[systemState.GetHost()];
493+
if (systemState.GetMemoryLimit() > 0) {
494+
stats.Limit += systemState.GetMemoryLimit();
495+
} else {
496+
stats.Total = systemState.GetMemoryStats().GetMemTotal();
497+
}
498+
} else {
499+
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + systemState.GetMemoryLimit());
500+
}
482501
if (!node.Disconnected && node.SystemState.HasSystemState()) {
483502
ClusterInfo.SetNodesAlive(ClusterInfo.GetNodesAlive() + 1);
484503
}
@@ -500,14 +519,32 @@ class TJsonCluster : public TViewerPipeClient {
500519
targetPoolStat->SetName(poolName);
501520
}
502521
double poolUsage = targetPoolStat->GetUsage() * targetPoolStat->GetThreads();
503-
poolUsage += poolStat.GetUsage() * poolStat.GetThreads();
522+
ui32 usageThreads = poolStat.GetLimit() ? poolStat.GetLimit() : poolStat.GetThreads();
523+
poolUsage += poolStat.GetUsage() * usageThreads;
504524
ui32 poolThreads = targetPoolStat->GetThreads() + poolStat.GetThreads();
505525
if (poolThreads != 0) {
506526
double threadUsage = poolUsage / poolThreads;
507527
targetPoolStat->SetUsage(threadUsage);
508528
targetPoolStat->SetThreads(poolThreads);
509529
}
510-
ClusterInfo.SetCoresUsed(ClusterInfo.GetCoresUsed() + poolStat.GetUsage() * poolStat.GetThreads());
530+
if (systemState.GetCoresTotal() == 0) {
531+
ClusterInfo.SetCoresUsed(ClusterInfo.GetCoresUsed() + poolStat.GetUsage() * usageThreads);
532+
if (poolStat.GetName() != "IO") {
533+
ClusterInfo.SetCoresTotal(ClusterInfo.GetCoresTotal() + poolStat.GetThreads());
534+
}
535+
}
536+
}
537+
if (systemState.GetCoresTotal() != 0) {
538+
ClusterInfo.SetCoresUsed(ClusterInfo.GetCoresUsed() + systemState.GetCoresUsed());
539+
ClusterInfo.SetCoresTotal(ClusterInfo.GetCoresTotal() + systemState.GetCoresTotal());
540+
}
541+
}
542+
543+
for (const auto& memStats : memoryStats) {
544+
if (memStats.second.Total > 0) {
545+
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + memStats.second.Total);
546+
} else {
547+
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + memStats.second.Limit);
511548
}
512549
}
513550

ydb/core/viewer/viewer_nodes.h

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -261,11 +261,18 @@ class TJsonNodes : public TViewerPipeClient {
261261
}
262262

263263
void CalcCpuUsage() {
264-
float usage = 0;
265-
int threads = 0;
266-
for (const auto& pool : SystemState.GetPoolStats()) {
267-
usage += pool.GetUsage() * pool.GetThreads();
268-
threads += pool.GetThreads();
264+
float usage = SystemState.GetCoresUsed();
265+
int threads = SystemState.GetCoresTotal();
266+
if (threads == 0) {
267+
for (const auto& pool : SystemState.GetPoolStats()) {
268+
ui32 usageThreads = pool.GetLimit() ? pool.GetLimit() : pool.GetThreads();
269+
usage += pool.GetUsage() * usageThreads;
270+
if (pool.GetName() != "IO") {
271+
threads += pool.GetThreads();
272+
}
273+
}
274+
SystemState.SetCoresUsed(usage);
275+
SystemState.SetCoresTotal(threads);
269276
}
270277
CpuUsage = usage / threads;
271278
}
@@ -1720,11 +1727,11 @@ class TJsonNodes : public TViewerPipeClient {
17201727
if (AllWhiteboardFields) {
17211728
request->AddFieldsRequired(-1);
17221729
} else {
1723-
for (auto field : {1, 2, 4, 5, 6, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 36, 37}) { // node_whiteboard.proto
1724-
request->AddFieldsRequired(field);
1725-
}
1730+
request->MutableFieldsRequired()->CopyFrom(GetDefaultWhiteboardFields<NKikimrWhiteboard::TSystemStateInfo>());
1731+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresUsedFieldNumber);
1732+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresTotalFieldNumber);
17261733
if (FieldsRequired.test(+ENodeFields::MemoryDetailed)) {
1727-
request->AddFieldsRequired(38);
1734+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kMemoryStatsFieldNumber);
17281735
}
17291736
}
17301737
}

ydb/core/viewer/viewer_tenantinfo.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,11 @@ class TJsonTenantInfo : public TViewerPipeClient {
262262
void SendWhiteboardSystemStateRequest(const TNodeId nodeId) {
263263
Subscribers.insert(nodeId);
264264
if (SystemStateResponse.count(nodeId) == 0) {
265-
SystemStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, new TEvWhiteboard::TEvSystemStateRequest()));
265+
auto request = std::make_unique<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest>();
266+
request->Record.MutableFieldsRequired()->CopyFrom(GetDefaultWhiteboardFields<NKikimrWhiteboard::TSystemStateInfo>());
267+
request->Record.AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresUsedFieldNumber);
268+
request->Record.AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresTotalFieldNumber);
269+
SystemStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request.release()));
266270
}
267271
}
268272

@@ -731,7 +735,8 @@ class TJsonTenantInfo : public TViewerPipeClient {
731735

732736
if (tablesStorageByType.empty() && entry.DomainDescription->Description.HasDiskSpaceUsage()) {
733737
tablesStorageByType[GuessStorageType(entry.DomainDescription->Description)] =
734-
entry.DomainDescription->Description.GetDiskSpaceUsage().GetTables().GetTotalSize();
738+
entry.DomainDescription->Description.GetDiskSpaceUsage().GetTables().GetTotalSize()
739+
+ entry.DomainDescription->Description.GetDiskSpaceUsage().GetTopics().GetDataSize();
735740
}
736741

737742
if (storageQuotasByType.empty()) {
@@ -784,14 +789,24 @@ class TJsonTenantInfo : public TViewerPipeClient {
784789
targetPoolStat->SetName(poolName);
785790
}
786791
double poolUsage = targetPoolStat->GetUsage() * targetPoolStat->GetThreads();
787-
poolUsage += poolStat.GetUsage() * poolStat.GetThreads();
792+
ui32 usageThreads = poolStat.GetLimit() ? poolStat.GetLimit() : poolStat.GetThreads();
793+
poolUsage += poolStat.GetUsage() * usageThreads;
788794
ui32 poolThreads = targetPoolStat->GetThreads() + poolStat.GetThreads();
789795
if (poolThreads != 0) {
790796
double threadUsage = poolUsage / poolThreads;
791797
targetPoolStat->SetUsage(threadUsage);
792798
targetPoolStat->SetThreads(poolThreads);
793799
}
794-
tenant.SetCoresUsed(tenant.GetCoresUsed() + poolStat.GetUsage() * poolStat.GetThreads());
800+
if (nodeInfo.GetCoresTotal() == 0) {
801+
tenant.SetCoresUsed(tenant.GetCoresUsed() + poolStat.GetUsage() * usageThreads);
802+
if (poolStat.GetName() != "IO") {
803+
tenant.SetCoresTotal(tenant.GetCoresTotal() + poolStat.GetThreads());
804+
}
805+
}
806+
}
807+
if (nodeInfo.GetCoresTotal() > 0) {
808+
tenant.SetCoresUsed(tenant.GetCoresUsed() + nodeInfo.GetCoresUsed());
809+
tenant.SetCoresTotal(tenant.GetCoresTotal() + nodeInfo.GetCoresTotal());
795810
}
796811
if (nodeInfo.HasMemoryUsed()) {
797812
tenant.SetMemoryUsed(tenant.GetMemoryUsed() + nodeInfo.GetMemoryUsed());

0 commit comments

Comments
 (0)