Skip to content

Commit c9f3084

Browse files
eivanov89CyberROFL
andauthored
ydb debug latency (#12803)
Co-authored-by: Ilnaz Nizametdinov <i.nizametdinov@gmail.com>
1 parent 2efbeed commit c9f3084

File tree

6 files changed

+416
-12
lines changed

6 files changed

+416
-12
lines changed

ydb/public/lib/ydb_cli/commands/ya.make

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ SRCS(
1212
ydb_cluster.cpp
1313
ydb_debug.cpp
1414
ydb_dynamic_config.cpp
15+
ydb_latency.cpp
1516
ydb_ping.cpp
1617
ydb_profile.cpp
1718
ydb_root_common.cpp
@@ -72,6 +73,7 @@ PEERDIR(
7273
)
7374

7475
GENERATE_ENUM_SERIALIZATION(ydb_ping.h)
76+
GENERATE_ENUM_SERIALIZATION(ydb_latency.h)
7577

7678
END()
7779

ydb/public/lib/ydb_cli/commands/ydb_debug.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
#include "ydb_debug.h"
22

3+
#include "ydb_latency.h"
34
#include "ydb_ping.h"
45

56
namespace NYdb::NConsoleClient {
67

78
TCommandDebug::TCommandDebug()
89
: TClientCommandTree("debug", {}, "YDB cluster debug operations")
910
{
11+
AddCommand(std::make_unique<TCommandLatency>());
1012
AddCommand(std::make_unique<TCommandPing>());
1113
}
1214

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
#include "ydb_latency.h"
2+
3+
#include <ydb/public/sdk/cpp/client/ydb_debug/client.h>
4+
#include <ydb/public/sdk/cpp/client/ydb_query/client.h>
5+
6+
#include <library/cpp/histogram/hdr/histogram.h>
7+
8+
#include <util/generic/serialized_enum.h>
9+
#include <util/system/hp_timer.h>
10+
11+
#include <atomic>
12+
#include <functional>
13+
#include <thread>
14+
#include <vector>
15+
16+
namespace NYdb::NConsoleClient {
17+
18+
namespace {
19+
20+
constexpr int DEFAULT_WARMUP_SECONDS = 1;
21+
constexpr int DEFAULT_INTERVAL_SECONDS = 5;
22+
constexpr int DEFAULT_MAX_INFLIGHT = 128;
23+
constexpr int DEFAULT_PERCENTILE = 99.0;
24+
25+
constexpr TCommandLatency::EFormat DEFAULT_FORMAT = TCommandLatency::EFormat::Plain;
26+
constexpr TCommandPing::EPingKind DEFAULT_RUN_KIND = TCommandPing::EPingKind::AllKinds;
27+
28+
// 1-16, 32, 64, ...
29+
constexpr int INCREMENT_UNTIL_THREAD_COUNT = 16;
30+
31+
const TString QUERY = "SELECT 1;";
32+
33+
// Factory returns callable, which makes requests to the DB.
34+
// In case of QueryService, this callable contains associated session.
35+
using TRequestMaker = std::function<bool()>;
36+
using TCallableFactory = std::function<TRequestMaker()>;
37+
38+
struct TResult {
39+
TCommandPing::EPingKind Kind;
40+
int ThreadCount = 0;
41+
int LatencyUs = 0;
42+
int Throughput = 0;
43+
};
44+
45+
struct alignas(64) TEvaluateResult {
46+
TEvaluateResult()
47+
: LatencyHistogramUs(1, 1024, 5)
48+
{
49+
}
50+
51+
ui64 OkCount = 0;
52+
ui64 ErrorCount = 0;
53+
int LatencyUs = 0;
54+
55+
NHdr::THistogram LatencyHistogramUs;
56+
};
57+
58+
void Evaluate(
59+
TEvaluateResult& total,
60+
ui64 warmupSeconds,
61+
ui64 intervalSeconds,
62+
int threadCount,
63+
TCallableFactory factory,
64+
double percentile)
65+
{
66+
std::atomic<bool> startMeasure{false};
67+
std::atomic<bool> stop{false};
68+
69+
// May delay for ~50ms to compute frequency, better to delay here
70+
volatile auto clockRate = NHPTimer::GetClockRate();
71+
Y_UNUSED(clockRate);
72+
73+
auto timer = std::thread([&startMeasure, &stop, warmupSeconds, intervalSeconds]() {
74+
std::this_thread::sleep_for(std::chrono::seconds(warmupSeconds));
75+
startMeasure.store(true, std::memory_order_relaxed);
76+
77+
std::this_thread::sleep_for(std::chrono::seconds(intervalSeconds));
78+
stop.store(true, std::memory_order_relaxed);
79+
});
80+
81+
std::vector<TEvaluateResult> results(threadCount);
82+
std::vector<std::thread> threads;
83+
84+
for (int i = 0; i < threadCount; ++i) {
85+
threads.emplace_back([i, &results, &startMeasure, &stop, &factory]() {
86+
auto& result = results[i];
87+
88+
THPTimer timer;
89+
90+
TRequestMaker requester;
91+
try {
92+
requester = factory();
93+
} catch (yexception ex) {
94+
Cerr << "Failed to create request maker: " << ex.what() << Endl;
95+
return;
96+
}
97+
98+
while (!startMeasure.load(std::memory_order_relaxed)) {
99+
try {
100+
requester();
101+
} catch (...) {
102+
continue;
103+
}
104+
}
105+
106+
while (!stop.load(std::memory_order_relaxed)) {
107+
try {
108+
timer.Reset();
109+
if (requester()) {
110+
int usecPassed = static_cast<int>(timer.Passed() * 1'000'000);
111+
result.LatencyHistogramUs.RecordValue(usecPassed);
112+
++result.OkCount;
113+
} else {
114+
++result.ErrorCount;
115+
}
116+
} catch (...) {
117+
++result.ErrorCount;
118+
}
119+
}
120+
});
121+
}
122+
123+
timer.join();
124+
for (auto& thread : threads) {
125+
thread.join();
126+
}
127+
128+
for (const auto& result: results) {
129+
total.OkCount += result.OkCount;
130+
total.ErrorCount += result.ErrorCount;
131+
total.LatencyHistogramUs.Add(result.LatencyHistogramUs);
132+
}
133+
134+
total.LatencyUs = total.LatencyHistogramUs.GetValueAtPercentile(percentile);
135+
}
136+
137+
} // anonymous
138+
139+
TCommandLatency::TCommandLatency()
140+
: TYdbCommand("latency", {}, "Check basic latency with variable inflight")
141+
, IntervalSeconds(DEFAULT_INTERVAL_SECONDS)
142+
, MaxInflight(DEFAULT_MAX_INFLIGHT)
143+
, Format(DEFAULT_FORMAT)
144+
, RunKind(DEFAULT_RUN_KIND)
145+
, Percentile(DEFAULT_PERCENTILE)
146+
{}
147+
148+
void TCommandLatency::Config(TConfig& config) {
149+
TYdbCommand::Config(config);
150+
151+
const TString& availableKinds = GetEnumAllNames<TCommandPing::EPingKind>();
152+
const TString& availableFormats = GetEnumAllNames<TCommandLatency::EFormat>();
153+
154+
config.Opts->AddLongOption(
155+
'i', "interval", TStringBuilder() << "Seconds for each latency kind")
156+
.RequiredArgument("INT").StoreResult(&IntervalSeconds).DefaultValue(DEFAULT_INTERVAL_SECONDS);
157+
config.Opts->AddLongOption(
158+
'm', "max-inflight", TStringBuilder() << "Max inflight")
159+
.RequiredArgument("INT").StoreResult(&MaxInflight).DefaultValue(DEFAULT_MAX_INFLIGHT);
160+
config.Opts->AddLongOption(
161+
'p', "percentile", TStringBuilder() << "Latency percentile")
162+
.RequiredArgument("DOUBLE").StoreResult(&Percentile).DefaultValue(DEFAULT_PERCENTILE);
163+
config.Opts->AddLongOption(
164+
'f', "format", TStringBuilder() << "Output format. Available options: " << availableFormats)
165+
.OptionalArgument("STRING").StoreResult(&Format).DefaultValue(DEFAULT_FORMAT);
166+
config.Opts->AddLongOption(
167+
'k', "kind", TStringBuilder() << "Use only specified ping kind. Available options: "<< availableKinds)
168+
.OptionalArgument("STRING").StoreResult(&RunKind).DefaultValue(DEFAULT_RUN_KIND);
169+
}
170+
171+
void TCommandLatency::Parse(TConfig& config) {
172+
TClientCommand::Parse(config);
173+
}
174+
175+
int TCommandLatency::Run(TConfig& config) {
176+
TDriver driver = CreateDriver(config);
177+
178+
SetInterruptHandlers();
179+
180+
auto debugClient = std::make_shared<NDebug::TDebugClient>(driver);
181+
auto queryClient = std::make_shared<NQuery::TQueryClient>(driver);
182+
183+
auto plainGrpcPingFactory = [debugClient] () {
184+
return [debugClient] () {
185+
return TCommandPing::PingPlainGrpc(*debugClient);
186+
};
187+
};
188+
189+
auto grpcPingFactory = [debugClient] () {
190+
return [debugClient] () {
191+
return TCommandPing::PingGrpcProxy(*debugClient);
192+
};
193+
};
194+
195+
auto plainKqpPingFactory = [debugClient] () {
196+
return [debugClient] () {
197+
return TCommandPing::PingPlainKqp(*debugClient);
198+
};
199+
};
200+
201+
auto schemeCachePingFactory = [debugClient] () {
202+
return [debugClient] () {
203+
return TCommandPing::PingSchemeCache(*debugClient);
204+
};
205+
};
206+
207+
auto txProxyPingFactory = [debugClient] () {
208+
return [debugClient] () {
209+
return TCommandPing::PingTxProxy(*debugClient);
210+
};
211+
};
212+
213+
auto select1Factory = [queryClient] () {
214+
// note, that each thread has own session
215+
auto session = std::make_shared<NQuery::TSession>(queryClient->GetSession().GetValueSync().GetSession());
216+
return [session] () {
217+
return TCommandPing::PingKqpSelect1(*session, QUERY);
218+
};
219+
};
220+
221+
using TTaskPair = std::pair<TCommandPing::EPingKind, TCallableFactory>;
222+
const std::vector<TTaskPair> allTasks = {
223+
{ TCommandPing::EPingKind::PlainGrpc, plainGrpcPingFactory },
224+
{ TCommandPing::EPingKind::GrpcProxy, grpcPingFactory },
225+
{ TCommandPing::EPingKind::PlainKqp, plainKqpPingFactory },
226+
{ TCommandPing::EPingKind::Select1, select1Factory },
227+
{ TCommandPing::EPingKind::SchemeCache, schemeCachePingFactory },
228+
{ TCommandPing::EPingKind::TxProxy, txProxyPingFactory },
229+
};
230+
231+
std::vector<TTaskPair> runTasks;
232+
if (RunKind == TCommandPing::EPingKind::AllKinds) {
233+
runTasks = allTasks;
234+
} else {
235+
for (size_t i = 0; i < allTasks.size(); ++i) {
236+
if (allTasks[i].first == RunKind) {
237+
runTasks = { allTasks[i] };
238+
break;
239+
}
240+
}
241+
}
242+
243+
if (runTasks.empty()) {
244+
return -1; // sanity check, never happens
245+
}
246+
247+
std::vector<TResult> results;
248+
for (const auto& [taskKind, factory]: runTasks) {
249+
for (int threadCount = 1; threadCount <= MaxInflight && !IsInterrupted(); ) {
250+
TEvaluateResult result;
251+
Evaluate(result, DEFAULT_WARMUP_SECONDS, IntervalSeconds, threadCount, factory, Percentile);
252+
253+
bool skip = false;
254+
if (result.ErrorCount) {
255+
auto totalRequests = result.ErrorCount + result.OkCount;
256+
double errorsPercent = 100.0 * result.ErrorCount / totalRequests;
257+
if (errorsPercent >= 1) {
258+
Cerr << "Skipping " << taskKind << ", threads=" << threadCount
259+
<< ": error rate=" << errorsPercent << "%" << Endl;
260+
skip = true;
261+
}
262+
}
263+
264+
if (!skip) {
265+
ui64 throughput = result.OkCount / IntervalSeconds;
266+
ui64 throughputPerThread = throughput / threadCount;
267+
ui64 latencyUsec = result.LatencyUs;
268+
269+
results.emplace_back(taskKind, threadCount, latencyUsec, throughput);
270+
271+
if (Format == EFormat::Plain) {
272+
Cout << taskKind << " threads=" << threadCount
273+
<< ", throughput: " << throughput
274+
<< ", per thread: " << throughputPerThread
275+
<< ", latency p" << Percentile << " usec: " << latencyUsec
276+
<< ", ok: " << result.OkCount
277+
<< ", error: " << result.ErrorCount << Endl;
278+
}
279+
}
280+
281+
if (threadCount < INCREMENT_UNTIL_THREAD_COUNT) {
282+
++threadCount;
283+
} else {
284+
threadCount *= 2;
285+
}
286+
}
287+
}
288+
289+
if (Format == EFormat::Plain) {
290+
return 0;
291+
}
292+
293+
TMap<TCommandPing::EPingKind, std::vector<int>> latencies;
294+
TMap<TCommandPing::EPingKind, std::vector<int>> throughputs;
295+
for (const auto& result: results) {
296+
latencies[result.Kind].push_back(result.LatencyUs);
297+
throughputs[result.Kind].push_back(result.Throughput);
298+
}
299+
300+
if (Format == EFormat::CSV) {
301+
const int maxThreadsMeasured = results.back().ThreadCount;
302+
303+
Cout << Endl;
304+
Cout << "Latencies" << Endl;
305+
306+
TStringStream ss;
307+
ss << "Kind";
308+
for (int i = 1; i <= maxThreadsMeasured;) {
309+
ss << "," << i;
310+
if (i < INCREMENT_UNTIL_THREAD_COUNT) {
311+
++i;
312+
} else {
313+
i *= 2;
314+
}
315+
}
316+
ss << Endl;
317+
TString header = ss.Str();
318+
319+
Cout << header;
320+
for (const auto& [kind, vec]: latencies) {
321+
Cout << kind;
322+
for (auto value: vec) {
323+
Cout << "," << value;
324+
}
325+
Cout << Endl;
326+
}
327+
Cout << Endl;
328+
329+
Cout << "Througputs" << Endl;
330+
Cout << header;
331+
for (const auto& [kind, vec]: throughputs) {
332+
Cout << kind;
333+
for (auto value: vec) {
334+
Cout << "," << value;
335+
}
336+
Cout << Endl;
337+
}
338+
}
339+
340+
return 0;
341+
}
342+
343+
} // NYdb::NConsoleClient

0 commit comments

Comments
 (0)