@@ -4334,9 +4334,9 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4334
4334
// - change BalancerPolicy to BALANCER_BALANCE for all remaining tablets
4335
4335
// - test that balancer also moved out former BALANCER_IGNORE tablets
4336
4336
//
4337
- static const int NUM_NODES = 4 ;
4338
- static const int NUM_TABLETS = 3 ;
4339
- static const ui64 SINGLE_TABLET_NETWORK_USAGE = 15 '000'000 ;
4337
+ static const int NUM_NODES = 6 ;
4338
+ static const int NUM_TABLETS = 6 ;
4339
+ static const ui64 SINGLE_TABLET_NETWORK_USAGE = 5 '000'000 ;
4340
4340
4341
4341
TTestBasicRuntime runtime (NUM_NODES, false );
4342
4342
@@ -4350,6 +4350,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4350
4350
app.HiveConfig .SetMaxNodeUsageToKick (0.01 );
4351
4351
app.HiveConfig .SetNodeUsageRangeToKick (0 );
4352
4352
app.HiveConfig .SetEmergencyBalancerInflight (1 ); // to ensure fair distribution
4353
+ app.HiveConfig .SetResourceOvercommitment (1 );
4353
4354
});
4354
4355
4355
4356
TActorId senderA = runtime.AllocateEdgeActor ();
@@ -4413,7 +4414,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4413
4414
for (int i = 0 ; i < NUM_TABLETS; ++i) {
4414
4415
THolder<TEvHive::TEvCreateTablet> ev (new TEvHive::TEvCreateTablet (testerTablet, 100500 + i, tabletType, BINDED_CHANNELS));
4415
4416
ev->Record .SetObjectId (i);
4416
- switch (i % NUM_TABLETS ) {
4417
+ switch (i % 3 ) {
4417
4418
case 0 : // policy not explicitly set
4418
4419
break ;
4419
4420
case 1 : // policy explicitly set to default value
@@ -4433,7 +4434,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4433
4434
// check that tablets retain their BalancerPolicy flags...
4434
4435
for (const auto & i : tabletInfos_A) {
4435
4436
Ctest << " Step A: tablet index " << i.ObjectId << " , tablet id " << i.TabletId << " , node index " << i.NodeIndex << " , balancer policy " << NKikimrHive::EBalancerPolicy_Name (i.BalancerPolicy ) << Endl;
4436
- switch (i.ObjectId % NUM_TABLETS ) {
4437
+ switch (i.ObjectId % 3 ) {
4437
4438
case 0 :
4438
4439
case 1 :
4439
4440
UNIT_ASSERT_EQUAL_C (i.BalancerPolicy , NKikimrHive::EBalancerPolicy::POLICY_BALANCE, " objectId# " << i.ObjectId << " value# " << (ui64)i.BalancerPolicy << " name# " << NKikimrHive::EBalancerPolicy_Name (i.BalancerPolicy ));
@@ -4547,15 +4548,15 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4547
4548
Ctest << Endl;
4548
4549
auto minmax = std::minmax_element (nodeTablets.begin (), nodeTablets.end ());
4549
4550
UNIT_ASSERT_VALUES_EQUAL (*minmax.first , 0 );
4550
- UNIT_ASSERT_VALUES_EQUAL (*minmax.second , 1 );
4551
- UNIT_ASSERT_VALUES_EQUAL (nodeTablets[0 ], 1 );
4551
+ UNIT_ASSERT_VALUES_EQUAL (*minmax.second , NUM_TABLETS / 3 );
4552
+ UNIT_ASSERT_VALUES_EQUAL (nodeTablets[0 ], NUM_TABLETS / 3 );
4552
4553
}
4553
4554
4554
4555
Ctest << " Step D: change tablets BalancerPolicy" << Endl;
4555
4556
4556
4557
// set all tablets with BalancerPolicy "ignore" back to "balance"
4557
4558
for (int i = 0 ; i < NUM_TABLETS; ++i) {
4558
- switch (i % NUM_TABLETS ) {
4559
+ switch (i % 3 ) {
4559
4560
case 0 :
4560
4561
case 1 :
4561
4562
break ;
@@ -4580,12 +4581,12 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4580
4581
4581
4582
Ctest << " Step D: raise metrics for previously ignored tablets" << Endl;
4582
4583
for (const auto & i: tabletInfos_D) {
4583
- switch (i.ObjectId % NUM_TABLETS ) {
4584
+ switch (i.ObjectId % 3 ) {
4584
4585
case 0 :
4585
4586
case 1 :
4586
4587
break ;
4587
4588
case 2 :
4588
- reportTabletMetrics (i.TabletId , NUM_TABLETS * SINGLE_TABLET_NETWORK_USAGE, true );
4589
+ reportTabletMetrics (i.TabletId , 2 * SINGLE_TABLET_NETWORK_USAGE, true );
4589
4590
break ;
4590
4591
}
4591
4592
}
@@ -4605,7 +4606,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4605
4606
bool ignoredTabletsAreMoved = false ;
4606
4607
for (const auto & i : tabletInfos_E) {
4607
4608
Ctest << " Step E: tablet index " << i.ObjectId << " , tablet id " << i.TabletId << " , node index " << i.NodeIndex << " , balancer policy " << NKikimrHive::EBalancerPolicy_Name (i.BalancerPolicy ) << Endl;
4608
- switch (i.ObjectId % NUM_TABLETS ) {
4609
+ switch (i.ObjectId % 3 ) {
4609
4610
case 0 :
4610
4611
case 1 :
4611
4612
break ;
@@ -4620,7 +4621,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4620
4621
}
4621
4622
UNIT_ASSERT_VALUES_EQUAL (ignoredTabletsAreMoved, true );
4622
4623
}
4623
- // ...and that the original node is completely void of tablets
4624
+ // ...and that the original node has only one tablet left
4624
4625
{
4625
4626
std::array<int , NUM_NODES> nodeTablets = {};
4626
4627
for (auto & i : tabletInfos_E) {
@@ -4632,9 +4633,9 @@ Y_UNIT_TEST_SUITE(THiveTest) {
4632
4633
}
4633
4634
Ctest << Endl;
4634
4635
auto minmax = std::minmax_element (nodeTablets.begin (), nodeTablets.end ());
4635
- UNIT_ASSERT_VALUES_EQUAL (*minmax.first , 0 );
4636
+ UNIT_ASSERT_VALUES_EQUAL (*minmax.first , 1 );
4636
4637
UNIT_ASSERT_VALUES_EQUAL (*minmax.second , 1 );
4637
- UNIT_ASSERT_VALUES_EQUAL (nodeTablets[0 ], 0 );
4638
+ UNIT_ASSERT_VALUES_EQUAL (nodeTablets[0 ], 1 );
4638
4639
}
4639
4640
}
4640
4641
@@ -5456,6 +5457,126 @@ Y_UNIT_TEST_SUITE(THiveTest) {
5456
5457
UNIT_ASSERT_VALUES_EQUAL (newDistribution[0 ].size (), newDistribution[1 ].size ());
5457
5458
}
5458
5459
5460
+ Y_UNIT_TEST (TestHiveBalancerOneTabletHighUsage) {
5461
+ static constexpr ui64 NUM_NODES = 4 ;
5462
+ static constexpr ui64 NUM_TABLETS = NUM_NODES * NUM_NODES;
5463
+ TTestBasicRuntime runtime (NUM_NODES, false );
5464
+ Setup (runtime, true , 1 , [](TAppPrepare& app) {
5465
+ app.HiveConfig .SetTabletKickCooldownPeriod (3 );
5466
+ app.HiveConfig .SetResourceChangeReactionPeriod (0 );
5467
+ app.HiveConfig .SetMinPeriodBetweenEmergencyBalance (0 );
5468
+ });
5469
+ const int nodeBase = runtime.GetNodeId (0 );
5470
+ TActorId senderA = runtime.AllocateEdgeActor ();
5471
+ const ui64 hiveTablet = MakeDefaultHiveID ();
5472
+ const ui64 testerTablet = MakeTabletID (false , 1 );
5473
+
5474
+ using TDistribution = std::array<std::vector<ui64>, NUM_NODES>;
5475
+ auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> TDistribution {
5476
+ std::array<std::vector<ui64>, NUM_NODES> nodeTablets = {};
5477
+ {
5478
+ runtime.SendToPipe (hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo ());
5479
+ TAutoPtr<IEventHandle> handle;
5480
+ TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow <TEvHive::TEvResponseHiveInfo>(handle);
5481
+ for (const NKikimrHive::TTabletInfo& tablet : response->Record .GetTablets ()) {
5482
+ if (tablet.GetNodeID () == 0 ) {
5483
+ continue ;
5484
+ }
5485
+ UNIT_ASSERT_C (((int )tablet.GetNodeID () - nodeBase >= 0 ) && (tablet.GetNodeID () - nodeBase < NUM_NODES),
5486
+ " nodeId# " << tablet.GetNodeID () << " nodeBase# " << nodeBase);
5487
+ nodeTablets[tablet.GetNodeID () - nodeBase].push_back (tablet.GetTabletID ());
5488
+ }
5489
+ }
5490
+ return nodeTablets;
5491
+ };
5492
+
5493
+ auto tabletNode = [](const TDistribution& distribution, ui64 tabletId) -> std::optional<size_t > {
5494
+ auto hasTablet = [tabletId](const std::vector<ui64>& tablets) {
5495
+ return std::find (tablets.begin (), tablets.end (), tabletId) != tablets.end ();
5496
+ };
5497
+ auto it = std::find_if (distribution.begin (), distribution.end (), hasTablet);
5498
+ if (it == distribution.end ()) {
5499
+ return std::nullopt;
5500
+ }
5501
+ return it - distribution.begin ();
5502
+ };
5503
+
5504
+ CreateTestBootstrapper (runtime, CreateTestTabletInfo (hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
5505
+
5506
+ // wait for creation of nodes
5507
+ {
5508
+ TDispatchOptions options;
5509
+ options.FinalEvents .emplace_back (TEvLocal::EvStatus, NUM_NODES);
5510
+ runtime.DispatchEvents (options);
5511
+ }
5512
+
5513
+ TTabletTypes::EType tabletType = TTabletTypes::Dummy;
5514
+ std::vector<ui64> tablets;
5515
+ tablets.reserve (NUM_TABLETS);
5516
+ for (size_t i = 0 ; i < NUM_TABLETS; ++i) {
5517
+ THolder<TEvHive::TEvCreateTablet> ev (new TEvHive::TEvCreateTablet (testerTablet, 100500 + i, tabletType, BINDED_CHANNELS));
5518
+ ev->Record .SetObjectId (i);
5519
+ ui64 tabletId = SendCreateTestTablet (runtime, hiveTablet, testerTablet, std::move (ev), 0 , true );
5520
+ tablets.push_back (tabletId);
5521
+ MakeSureTabletIsUp (runtime, tabletId, 0 );
5522
+ }
5523
+
5524
+ const ui64 overloadingTablet = tablets.front ();
5525
+ auto distribution = getDistribution ();
5526
+ auto nodeWithTablet = tabletNode (distribution, overloadingTablet);
5527
+ Ctest << " picked tablet " << overloadingTablet << Endl;
5528
+ unsigned moves = 0 ;
5529
+
5530
+ for (int i = 0 ; i < 20 ; ++i) {
5531
+ for (int j = 0 ; j < 5 ; ++j) {
5532
+ for (ui32 node = 0 ; node < NUM_NODES; ++node) {
5533
+ TActorId sender = runtime.AllocateEdgeActor (node);
5534
+ THolder<TEvHive::TEvTabletMetrics> metrics = MakeHolder<TEvHive::TEvTabletMetrics>();
5535
+ metrics->Record .SetTotalNodeUsage (node == nodeWithTablet ? .99 : .05 );
5536
+
5537
+ runtime.SendToPipe (hiveTablet, sender, metrics.Release (), node);
5538
+ }
5539
+ }
5540
+
5541
+ TDispatchOptions options;
5542
+ options.FinalEvents .emplace_back (NHive::TEvPrivate::EvBalancerOut);
5543
+ runtime.DispatchEvents (options, TDuration::MilliSeconds (10 ));
5544
+ runtime.AdvanceCurrentTime (TDuration::MilliSeconds (500 ));
5545
+
5546
+ distribution = getDistribution ();
5547
+ auto newNodeWithTablet = tabletNode (distribution, overloadingTablet);
5548
+ if (newNodeWithTablet != nodeWithTablet) {
5549
+ nodeWithTablet = newNodeWithTablet;
5550
+ if (newNodeWithTablet) {
5551
+ ++moves;
5552
+ }
5553
+ }
5554
+
5555
+ Ctest << " distribution: " ;
5556
+ for (size_t i = 0 ; i < NUM_NODES; ++i) {
5557
+ if (i == nodeWithTablet) {
5558
+ Ctest << " *" ;
5559
+ }
5560
+ Ctest << distribution[i].size () << " " ;
5561
+ }
5562
+ Ctest << Endl;
5563
+ }
5564
+
5565
+ UNIT_ASSERT_LE (moves, 2 );
5566
+
5567
+ std::set<size_t > tabletsOnNodes;
5568
+ Ctest << " Final distribution: " ;
5569
+ for (size_t i = 0 ; i < NUM_NODES; ++i) {
5570
+ Ctest << distribution[i].size () << " " ;
5571
+ if (i != nodeWithTablet) {
5572
+ tabletsOnNodes.insert (distribution[i].size ());
5573
+ }
5574
+ }
5575
+ Ctest << Endl;
5576
+ UNIT_ASSERT_VALUES_EQUAL (distribution[*nodeWithTablet].size (), 1 );
5577
+ UNIT_ASSERT_VALUES_EQUAL (tabletsOnNodes.size (), 1 );
5578
+ }
5579
+
5459
5580
Y_UNIT_TEST (TestUpdateTabletsObjectUpdatesMetrics) {
5460
5581
TTestBasicRuntime runtime (1 , false );
5461
5582
Setup (runtime, true );
0 commit comments