@@ -32,7 +32,8 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
32
32
SVLOG_D (" NSysView::TPartitionStatsCollector bootstrapped" );
33
33
34
34
if (AppData ()->UsePartitionStatsCollectorForTests ) {
35
- OverloadedPartitionBound = 0.0 ;
35
+ OverloadedByCpuPartitionBound = 0.0 ;
36
+ OverloadedByTliPartitionBound = 0 ;
36
37
ProcessOverloadedInterval = TDuration::Seconds (1 );
37
38
}
38
39
@@ -88,25 +89,32 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
88
89
89
90
auto & oldPartitions = table.Partitions ;
90
91
std::unordered_map<TShardIdx, TPartitionStats> newPartitions;
91
- std::set<TOverloadedFollower> overloaded ;
92
+ std::set<TFollowerStats> overloadedByCpu, overloadedByTli ;
92
93
93
94
for (auto shardIdx : ev->Get ()->ShardIndices ) {
94
95
auto old = oldPartitions.find (shardIdx);
95
96
if (old != oldPartitions.end ()) {
96
97
newPartitions[shardIdx] = old->second ;
97
98
98
99
for (const auto & followerStat: old->second .FollowerStats ) {
99
- if (IsPartitionOverloaded (followerStat.second ))
100
- overloaded.insert ({shardIdx, followerStat.first });
100
+ if (IsPartitionOverloadedByCpu (followerStat.second ))
101
+ overloadedByCpu.insert ({shardIdx, followerStat.first });
102
+ if (IsPartitionOverloadedByTli (followerStat.second ))
103
+ overloadedByTli.insert ({shardIdx, followerStat.first });
101
104
}
102
105
}
103
106
}
104
107
105
- if (!overloaded .empty ()) {
106
- tables.Overloaded [pathId].swap (overloaded );
108
+ if (!overloadedByCpu .empty ()) {
109
+ tables.OverloadedByCpu [pathId].swap (overloadedByCpu );
107
110
} else {
108
- tables.Overloaded .erase (pathId);
111
+ tables.OverloadedByCpu .erase (pathId);
109
112
}
113
+ if (!overloadedByTli.empty ()) {
114
+ tables.OverloadedByTli [pathId].swap (overloadedByTli);
115
+ } else {
116
+ tables.OverloadedByTli .erase (pathId);
117
+ }
110
118
111
119
oldPartitions.swap (newPartitions);
112
120
table.ShardIndices .swap (ev->Get ()->ShardIndices );
@@ -125,7 +133,8 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
125
133
126
134
auto & tables = DomainTables[domainKey];
127
135
tables.Stats .erase (pathId);
128
- tables.Overloaded .erase (pathId);
136
+ tables.OverloadedByCpu .erase (pathId);
137
+ tables.OverloadedByTli .erase (pathId);
129
138
}
130
139
131
140
void Handle (TEvSysView::TEvSendPartitionStats::TPtr& ev) {
@@ -153,18 +162,29 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
153
162
154
163
auto & followerStats = partitionStats.FollowerStats [followerId];
155
164
156
- TOverloadedFollower overloadedFollower = {shardIdx, followerId};
157
- if (IsPartitionOverloaded (newStats)) {
158
- tables.Overloaded [pathId].insert (overloadedFollower);
165
+ TFollowerStats overloadedFollower = {shardIdx, followerId};
166
+ if (IsPartitionOverloadedByCpu (newStats)) {
167
+ tables.OverloadedByCpu [pathId].insert (overloadedFollower);
159
168
} else {
160
- auto overloadedFound = tables.Overloaded .find (pathId);
161
- if (overloadedFound != tables.Overloaded .end ()) {
169
+ auto overloadedFound = tables.OverloadedByCpu .find (pathId);
170
+ if (overloadedFound != tables.OverloadedByCpu .end ()) {
162
171
overloadedFound->second .erase (overloadedFollower);
163
172
if (overloadedFound->second .empty ()) {
164
- tables.Overloaded .erase (pathId);
173
+ tables.OverloadedByCpu .erase (pathId);
165
174
}
166
175
}
167
176
}
177
+ if (IsPartitionOverloadedByTli (newStats)) {
178
+ tables.OverloadedByTli [pathId].insert (overloadedFollower);
179
+ } else {
180
+ auto overloadedFound = tables.OverloadedByTli .find (pathId);
181
+ if (overloadedFound != tables.OverloadedByTli .end ()) {
182
+ overloadedFound->second .erase (overloadedFollower);
183
+ if (overloadedFound->second .empty ()) {
184
+ tables.OverloadedByTli .erase (pathId);
185
+ }
186
+ }
187
+ }
168
188
169
189
if (followerStats.HasTtlStats ()) {
170
190
newStats.MutableTtlStats ()->Swap (followerStats.MutableTtlStats ());
@@ -376,37 +396,54 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
376
396
}
377
397
auto & domainTables = domainFound->second ;
378
398
379
- struct TPartition {
399
+ struct TPartitionByCpu {
380
400
TPathId PathId;
381
401
TShardIdx ShardIdx;
382
402
ui32 FollowerId;
383
403
double CPUCores;
384
404
};
385
- std::vector<TPartition> sorted ;
405
+ std::vector<TPartitionByCpu> sortedByCpu ;
386
406
387
- for (const auto & [pathId, overloadedFollowers] : domainTables.Overloaded ) {
388
- for (const TOverloadedFollower& overloadedFollower : overloadedFollowers) {
407
+ struct TPartitionByTli {
408
+ TPathId PathId;
409
+ TShardIdx ShardIdx;
410
+ ui32 FollowerId;
411
+ ui64 LocksBroken;
412
+ };
413
+ std::vector<TPartitionByTli> sortedByTli;
414
+
415
+ for (const auto & [pathId, overloadedFollowers] : domainTables.OverloadedByCpu ) {
416
+ for (const TFollowerStats& overloadedFollower : overloadedFollowers) {
389
417
const auto & table = domainTables.Stats [pathId];
390
418
const auto & partition = table.Partitions .at (overloadedFollower.ShardIdx ).FollowerStats .at (overloadedFollower.FollowerId );
391
- sorted .emplace_back (TPartition {pathId, overloadedFollower.ShardIdx , overloadedFollower.FollowerId , partition.GetCPUCores ()});
419
+ sortedByCpu .emplace_back (TPartitionByCpu {pathId, overloadedFollower.ShardIdx , overloadedFollower.FollowerId , partition.GetCPUCores ()});
392
420
}
393
421
}
422
+ for (const auto & [pathId, overloadedFollowers] : domainTables.OverloadedByTli ) {
423
+ for (const TFollowerStats& overloadedFollower : overloadedFollowers) {
424
+ const auto & table = domainTables.Stats [pathId];
425
+ const auto & partition = table.Partitions .at (overloadedFollower.ShardIdx ).FollowerStats .at (overloadedFollower.FollowerId );
426
+ sortedByTli.emplace_back (TPartitionByTli{pathId, overloadedFollower.ShardIdx , overloadedFollower.FollowerId , partition.GetLocksBroken ()});
427
+ }
428
+ }
394
429
395
- std::sort (sorted .begin (), sorted .end (),
430
+ std::sort (sortedByCpu .begin (), sortedByCpu .end (),
396
431
[] (const auto & l, const auto & r) { return l.CPUCores > r.CPUCores ; });
432
+ std::sort (sortedByTli.begin (), sortedByTli.end (),
433
+ [] (const auto & l, const auto & r) { return l.LocksBroken > r.LocksBroken ; });
397
434
398
435
auto now = TActivationContext::Now ();
399
436
auto nowUs = now.MicroSeconds ();
400
437
401
438
size_t count = 0 ;
402
439
auto sendEvent = MakeHolder<TEvSysView::TEvSendTopPartitions>();
403
- for (const auto & entry : sorted ) {
440
+ for (const auto & entry : sortedByCpu ) {
404
441
const auto & table = domainTables.Stats [entry.PathId ];
405
442
const auto & followerStats = table.Partitions .at (entry.ShardIdx ).FollowerStats ;
406
443
const auto & partition = followerStats.at (entry.FollowerId );
407
444
const auto & leaderPartition = followerStats.at (0 );
408
445
409
- auto * result = sendEvent->Record .AddPartitions ();
446
+ auto * result = sendEvent->Record .AddPartitionsByCpu ();
410
447
result->SetTabletId (partition.GetTabletId ());
411
448
result->SetPath (table.Path );
412
449
result->SetPeakTimeUs (nowUs);
@@ -422,11 +459,34 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
422
459
break ;
423
460
}
424
461
}
462
+ for (const auto & entry : sortedByTli) {
463
+ const auto & table = domainTables.Stats [entry.PathId ];
464
+ const auto & followerStats = table.Partitions .at (entry.ShardIdx ).FollowerStats ;
465
+ const auto & partition = followerStats.at (entry.FollowerId );
466
+ const auto & leaderPartition = followerStats.at (0 );
467
+
468
+ auto * result = sendEvent->Record .AddPartitionsByTli ();
469
+ result->SetTabletId (partition.GetTabletId ());
470
+ result->SetPath (table.Path );
471
+ result->SetLocksAcquired (partition.GetLocksAcquired ());
472
+ result->SetLocksWholeShard (partition.GetLocksWholeShard ());
473
+ result->SetLocksBroken (partition.GetLocksBroken ());
474
+ result->SetNodeId (partition.GetNodeId ());
475
+ result->SetDataSize (leaderPartition.GetDataSize ());
476
+ result->SetRowCount (leaderPartition.GetRowCount ());
477
+ result->SetIndexSize (leaderPartition.GetIndexSize ());
478
+ result->SetFollowerId (partition.GetFollowerId ());
479
+
480
+ if (++count == TOP_PARTITIONS_COUNT) {
481
+ break ;
482
+ }
483
+ }
425
484
426
485
sendEvent->Record .SetTimeUs (nowUs);
427
486
428
487
SVLOG_D (" NSysView::TPartitionStatsCollector: TEvProcessOverloaded "
429
- << " top size# " << sorted.size ()
488
+ << " , top size by CPU # " << sortedByCpu.size ()
489
+ << " , top size by TLI # " << sortedByTli.size ()
430
490
<< " , time# " << now);
431
491
432
492
Send (MakePipePerNodeCacheID (false ),
@@ -447,8 +507,11 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
447
507
TBase::PassAway ();
448
508
}
449
509
450
- bool IsPartitionOverloaded (const NKikimrSysView::TPartitionStats& stats) const {
451
- return stats.GetCPUCores () >= OverloadedPartitionBound;
510
+ bool IsPartitionOverloadedByCpu (const NKikimrSysView::TPartitionStats& stats) const {
511
+ return stats.GetCPUCores () >= OverloadedByCpuPartitionBound;
512
+ }
513
+ bool IsPartitionOverloadedByTli (const NKikimrSysView::TPartitionStats& stats) const {
514
+ return stats.GetLocksBroken () >= OverloadedByTliPartitionBound;
452
515
}
453
516
454
517
private:
@@ -458,7 +521,8 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
458
521
TPathId DomainKey;
459
522
ui64 SysViewProcessorId = 0 ;
460
523
461
- double OverloadedPartitionBound = 0.7 ;
524
+ double OverloadedByCpuPartitionBound = 0.7 ;
525
+ ui64 OverloadedByTliPartitionBound = 1 ;
462
526
TDuration ProcessOverloadedInterval = TDuration::Seconds(15 );
463
527
464
528
typedef ui32 TFollowerId;
@@ -473,22 +537,23 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
473
537
TString Path;
474
538
};
475
539
476
- struct TOverloadedFollower {
540
+ struct TFollowerStats {
477
541
TShardIdx ShardIdx;
478
542
TFollowerId FollowerId;
479
543
480
- bool operator <(const TOverloadedFollower &other) const {
544
+ bool operator <(const TFollowerStats &other) const {
481
545
return std::tie (ShardIdx, FollowerId) < std::tie (other.ShardIdx , other.FollowerId );
482
546
}
483
547
484
- bool operator ==(const TOverloadedFollower &other) const {
548
+ bool operator ==(const TFollowerStats &other) const {
485
549
return std::tie (ShardIdx, FollowerId) == std::tie (other.ShardIdx , other.FollowerId );
486
550
}
487
551
};
488
552
489
553
struct TDomainTables {
490
554
std::map<TPathId, TTableStats> Stats;
491
- std::unordered_map<TPathId, std::set<TOverloadedFollower>> Overloaded;
555
+ std::unordered_map<TPathId, std::set<TFollowerStats>> OverloadedByCpu;
556
+ std::unordered_map<TPathId, std::set<TFollowerStats>> OverloadedByTli;
492
557
};
493
558
std::unordered_map<TPathId, TDomainTables> DomainTables;
494
559
@@ -694,6 +759,15 @@ class TPartitionStatsScan : public TScanActorBase<TPartitionStatsScan> {
694
759
insert ({TSchema::FollowerId::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
695
760
return TCell::Make<ui32>(stats.GetFollowerId ());
696
761
}});
762
+ insert ({TSchema::LocksAcquired::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
763
+ return TCell::Make<ui64>(stats.GetLocksAcquired ());
764
+ }});
765
+ insert ({TSchema::LocksWholeShard::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
766
+ return TCell::Make<ui64>(stats.GetLocksWholeShard ());
767
+ }});
768
+ insert ({TSchema::LocksBroken::ColumnId, [] (const TPartitionStatsResult&, const TPartitionStats&, const TPartitionStats& stats) {
769
+ return TCell::Make<ui64>(stats.GetLocksBroken ());
770
+ }});
697
771
}
698
772
};
699
773
static TExtractorsMap extractors;
0 commit comments