Skip to content

Commit 0e700ae

Browse files
authored
Add jitter and exponential delay to configs request (#19299)
1 parent e1b310a commit 0e700ae

File tree

4 files changed

+48
-13
lines changed

4 files changed

+48
-13
lines changed

ydb/core/config/init/init.cpp

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -399,28 +399,61 @@ class TDefaultDynConfigClient
399399
std::shared_ptr<IConfigurationResult> res;
400400
bool success = false;
401401
TString error;
402-
403402
SetRandomSeed(TInstant::Now().MicroSeconds());
404-
int minAttempts = 10;
405-
int attempts = 0;
406-
while (!success && attempts < minAttempts) {
403+
404+
const int maxRounds = 10;
405+
const TDuration baseRoundDelay = TDuration::MilliSeconds(500);
406+
const TDuration maxIntraAddrDelay = TDuration::Minutes(3);
407+
const TDuration maxDelay = TDuration::Minutes(5);
408+
const TDuration baseAddressDelay = TDuration::MilliSeconds(250);
409+
410+
auto sleepWithJitteredExponentialDelay = [&env](TDuration baseDelay, TDuration maxDelay, int exponent) {
411+
ui64 multiplier = 1ULL << exponent;
412+
TDuration delay = baseDelay * multiplier;
413+
delay = Min(delay, maxDelay);
414+
415+
ui64 maxMs = delay.MilliSeconds();
416+
ui64 jitteredMs = RandomNumber<ui64>(maxMs + 1);
417+
TDuration jitteredDelay = TDuration::MilliSeconds(jitteredMs);
418+
419+
env.Sleep(jitteredDelay);
420+
};
421+
422+
int round = 0;
423+
int totalAttempts = 0;
424+
425+
while (!success && round < maxRounds) {
426+
int addressIndex = 0;
407427
for (auto addr : addrs) {
428+
// internal timeout is 5 seconds
408429
success = TryToLoadConfigForDynamicNodeFromCMS(grpcSettings, addr, settings, env, logger, res, error);
409-
++attempts;
430+
++totalAttempts;
431+
410432
if (success) {
411433
break;
412434
}
435+
436+
// Exponential delay between individual addresses - delay grows with each address in the round
437+
if (addrs.size() > 1) {
438+
sleepWithJitteredExponentialDelay(baseAddressDelay, maxIntraAddrDelay, Max(addressIndex, round));
439+
}
440+
441+
++addressIndex;
413442
}
414-
// Randomized backoff
443+
415444
if (!success) {
416-
env.Sleep(TDuration::MilliSeconds(500 + RandomNumber<ui64>(1000)));
417-
} else {
418-
break;
445+
++round;
446+
447+
if (round < maxRounds) {
448+
sleepWithJitteredExponentialDelay(baseRoundDelay, maxDelay, round - 1);
449+
}
419450
}
420451
}
421452

422453
if (!success) {
423-
logger.Err() << "WARNING: couldn't load config from CMS: " << error << Endl;
454+
logger.Err() << "WARNING: couldn't load config from Console after "
455+
<< totalAttempts << " attempts across " << round
456+
<< " rounds: " << error << Endl;
424457
}
425458

426459
return res;

ydb/tests/functional/ydb_cli/test_ydb_backup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,7 +1319,7 @@ def setup_class(cls):
13191319
storage_pool_units_count={
13201320
'hdd': 1
13211321
},
1322-
timeout_seconds=100,
1322+
timeout_seconds=240,
13231323
token=cls.cluster.config.default_clusteradmin
13241324
)
13251325

@@ -1527,7 +1527,7 @@ def restore(cls, command, input, additional_args=[], token=""):
15271527
"--endpoint", f"grpc://localhost:{cls.restore_cluster.nodes[1].grpc_port}",
15281528
]
15291529
+ command
1530-
+ ["--input", backup_files_dir, "-w", "60s"]
1530+
+ ["--input", backup_files_dir, "-w", "240s"]
15311531
+ additional_args,
15321532
env={"YDB_TOKEN": token}
15331533
)

ydb/tests/functional/ydb_cli/ya.make

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,7 @@ PEERDIR(
3737
)
3838

3939
FORK_TEST_FILES()
40+
FORK_SUBTESTS()
41+
SPLIT_FACTOR(30)
4042

4143
END()

ydb/tests/library/harness/kikimr_cluster_interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def __wait_tenant_up(
153153
self,
154154
database_name,
155155
expected_computational_units=None,
156-
timeout_seconds=120,
156+
timeout_seconds=240,
157157
token=None
158158
):
159159
def predicate():

0 commit comments

Comments
 (0)