Skip to content

Commit eebeb70

Browse files
authored
persqueue: add user-agent counters (#10603)
1 parent 78f6425 commit eebeb70

File tree

14 files changed

+475
-10
lines changed

14 files changed

+475
-10
lines changed

ydb/core/base/counters.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ static const THashSet<TString> DATABASE_SERVICES
4040
TString("pqproxy|readSession"),
4141
TString("pqproxy|schemecache"),
4242
TString("pqproxy|mirrorWriteTimeLag"),
43+
TString("pqproxy|userAgents"),
4344
TString("datastreams"),
4445
}};
4546

ydb/public/sdk/cpp/client/ydb_persqueue_public/ut/ut_utils/data_plane_helpers.cpp

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "data_plane_helpers.h"
2+
#include <ydb/public/sdk/cpp/client/resources/ydb_resources.h>
3+
#include <ydb/public/sdk/cpp/client/ydb_topic/topic.h>
24

35
namespace NKikimr::NPersQueueTests {
46

@@ -51,7 +53,8 @@ namespace NKikimr::NPersQueueTests {
5153
std::optional<ui32> partitionGroup,
5254
std::optional<TString> codec,
5355
std::optional<bool> reconnectOnFailure,
54-
THashMap<TString, TString> sessionMeta
56+
THashMap<TString, TString> sessionMeta,
57+
const TString& userAgent
5558
) {
5659
auto settings = TWriteSessionSettings().Path(topic).MessageGroupId(sourceId);
5760
if (partitionGroup) settings.PartitionGroupId(*partitionGroup);
@@ -66,6 +69,9 @@ namespace NKikimr::NPersQueueTests {
6669
}
6770
settings.MaxMemoryUsage(1024*1024*1024*1024ll);
6871
settings.Meta_.Fields = sessionMeta;
72+
if (!userAgent.empty()) {
73+
settings.Header({{NYdb::YDB_APPLICATION_NAME, userAgent}});
74+
}
6975
return CreateSimpleWriter(driver, settings);
7076
}
7177

@@ -79,6 +85,21 @@ namespace NKikimr::NPersQueueTests {
7985
return TPersQueueClient(driver, clientSettings).CreateReadSession(TReadSessionSettings(settings).DisableClusterDiscovery(true));
8086
}
8187

88+
std::shared_ptr<NYdb::NTopic::IReadSession> CreateReader(
89+
NYdb::TDriver& driver,
90+
const NYdb::NTopic::TReadSessionSettings& settings,
91+
std::shared_ptr<NYdb::ICredentialsProviderFactory> creds,
92+
const TString& userAgent
93+
) {
94+
NYdb::NTopic::TTopicClientSettings clientSettings;
95+
if (creds) clientSettings.CredentialsProviderFactory(creds);
96+
auto readerSettings = settings;
97+
if (!userAgent.empty()) {
98+
readerSettings.Header({{NYdb::YDB_APPLICATION_NAME, userAgent}});
99+
}
100+
return NYdb::NTopic::TTopicClient(driver, clientSettings).CreateReadSession(readerSettings);
101+
}
102+
82103
TMaybe<TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<IReadSession>& reader, TDuration timeout) {
83104
while (true) {
84105
auto future = reader->WaitEvent();
@@ -99,4 +120,25 @@ namespace NKikimr::NPersQueueTests {
99120
}
100121
return {};
101122
}
123+
124+
TMaybe<NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<NYdb::NTopic::IReadSession>& reader, TDuration timeout) {
125+
while (true) {
126+
auto future = reader->WaitEvent();
127+
future.Wait(timeout);
128+
129+
TMaybe<NYdb::NTopic::TReadSessionEvent::TEvent> event = reader->GetEvent(false, 1);
130+
if (!event)
131+
return {};
132+
if (auto e = std::get_if<NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent>(&*event)) {
133+
return *e;
134+
} else if (auto* e = std::get_if<NYdb::NTopic::TReadSessionEvent::TStartPartitionSessionEvent>(&*event)) {
135+
e->Confirm();
136+
} else if (auto* e = std::get_if<NYdb::NTopic::TReadSessionEvent::TStopPartitionSessionEvent>(&*event)) {
137+
e->Confirm();
138+
} else if (std::get_if<NYdb::NTopic::TSessionClosedEvent>(&*event)) {
139+
return {};
140+
}
141+
}
142+
return {};
143+
}
102144
}

ydb/public/sdk/cpp/client/ydb_persqueue_public/ut/ut_utils/data_plane_helpers.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <ydb/public/sdk/cpp/client/ydb_driver/driver.h>
44
#include <ydb/public/sdk/cpp/client/ydb_table/table.h>
55
#include <ydb/public/sdk/cpp/client/ydb_persqueue_public/persqueue.h>
6+
#include <ydb/public/sdk/cpp/client/ydb_topic/topic.h>
67

78
namespace NKikimr::NPersQueueTests {
89

@@ -34,16 +35,24 @@ namespace NKikimr::NPersQueueTests {
3435
std::optional<ui32> partitionGroup = {},
3536
std::optional<TString> codec = {},
3637
std::optional<bool> reconnectOnFailure = {},
37-
THashMap<TString, TString> sessionMeta = {}
38+
THashMap<TString, TString> sessionMeta = {},
39+
const TString& userAgent = {}
3840
);
3941

4042
std::shared_ptr<NYdb::NPersQueue::IReadSession> CreateReader(
4143
NYdb::TDriver& driver,
4244
const NYdb::NPersQueue::TReadSessionSettings& settings,
4345
std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr
46+
);
4447

48+
std::shared_ptr<NYdb::NTopic::IReadSession> CreateReader(
49+
NYdb::TDriver& driver,
50+
const NYdb::NTopic::TReadSessionSettings& settings,
51+
std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr,
52+
const TString& userAgent = ""
4553
);
4654

4755
TMaybe<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<NYdb::NPersQueue::IReadSession>& reader, TDuration timeout = TDuration::Max());
56+
TMaybe<NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<NYdb::NTopic::IReadSession>& reader, TDuration timeout = TDuration::Max());
4857

4958
}

ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ class TWriteSessionActor : public NActors::TActorBootstrapped<TWriteSessionActor
465465

466466
void GenerateNextWriteRequest(const NActors::TActorContext& ctx);
467467

468+
void SetupBytesWrittenByUserAgentCounter();
468469
void SetupCounters();
469470
void SetupCounters(const TString& cloudId, const TString& dbId, const TString& dbPath,
470471
bool isServerless, const TString& folderId);
@@ -570,6 +571,7 @@ class TWriteSessionActor : public NActors::TActorBootstrapped<TWriteSessionActor
570571
TInstant StartTime;
571572
NKikimr::NPQ::TPercentileCounter InitLatency;
572573
NKikimr::NPQ::TMultiCounter SLIBigLatency;
574+
NYdb::NPersQueue::TCounterPtr BytesWrittenByUserAgent;
573575

574576
THolder<NPersQueue::TTopicNamesConverterFactory> ConverterFactory;
575577
NPersQueue::TDiscoveryConverterPtr DiscoveryConverter;
@@ -699,6 +701,7 @@ class TReadSessionActor : public TActorBootstrapped<TReadSessionActor> {
699701
void SendAuthRequest(const TActorContext& ctx);
700702
void CreateInitAndAuthActor(const TActorContext& ctx);
701703

704+
void SetupBytesReadByUserAgentCounter();
702705
void SetupCounters();
703706
void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic);
704707
void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic, const TString& cloudId, const TString& dbId,
@@ -746,6 +749,7 @@ class TReadSessionActor : public TActorBootstrapped<TReadSessionActor> {
746749
TString Session;
747750
TString PeerName;
748751
TString Database;
752+
TString UserAgent;
749753

750754
bool ClientsideLocksAllowed;
751755
bool BalanceRightNow;
@@ -929,6 +933,7 @@ class TReadSessionActor : public TActorBootstrapped<TReadSessionActor> {
929933
NKikimr::NPQ::TPercentileCounter InitLatency;
930934
NKikimr::NPQ::TPercentileCounter CommitLatency;
931935
NKikimr::NPQ::TMultiCounter SLIBigLatency;
936+
NYdb::NPersQueue::TCounterPtr BytesReadByUserAgent;
932937

933938
NKikimr::NPQ::TPercentileCounter ReadLatency;
934939
NKikimr::NPQ::TPercentileCounter ReadLatencyFromDisk;

ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#include <ydb/library/actors/core/log.h>
1616
#include <ydb/library/actors/interconnect/interconnect.h>
17+
#include <ydb/services/persqueue_v1/actors/helpers.h>
18+
1719
#include <library/cpp/protobuf/util/repeated_field_utils.h>
1820

1921
#include <util/string/strip.h>
@@ -655,6 +657,7 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const TActorCo
655657
Session = session;
656658
ProtocolVersion = init.GetProtocolVersion();
657659
CommitsDisabled = init.GetCommitsDisabled();
660+
UserAgent = init.GetVersion();
658661

659662
if (ProtocolVersion >= NPersQueue::TReadRequest::ReadParamsInInit) {
660663
ReadSettingsInited = true;
@@ -835,6 +838,14 @@ void TReadSessionActor::RegisterSessions(const TActorContext& ctx) {
835838
}
836839
}
837840

841+
void TReadSessionActor::SetupBytesReadByUserAgentCounter() {
842+
BytesReadByUserAgent = GetServiceCounters(Counters, "pqproxy|userAgents")
843+
->GetSubgroup("host", "")
844+
->GetSubgroup("protocol", "pqv0")
845+
->GetSubgroup("consumer", ClientPath)
846+
->GetSubgroup("user_agent", V1::CleanupCounterValueString(UserAgent))
847+
->GetExpiringNamedCounter("sensor", "BytesReadByUserAgent", true);
848+
}
838849

839850
void TReadSessionActor::SetupCounters()
840851
{
@@ -864,6 +875,8 @@ void TReadSessionActor::SetupCounters()
864875
if (ProtocolVersion < NPersQueue::TReadRequest::Batching) {
865876
++(*SessionsWithOldBatchingVersion);
866877
}
878+
879+
SetupBytesReadByUserAgentCounter();
867880
}
868881

869882

@@ -1525,6 +1538,9 @@ bool TReadSessionActor::ProcessAnswer(const TActorContext& ctx, TFormedReadRespo
15251538

15261539
Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0);
15271540
i64 diff = formedResponse->Response.ByteSize();
1541+
1542+
BytesReadByUserAgent->Add(diff);
1543+
15281544
const bool hasMessages = HasMessages(formedResponse->Response.GetBatchedData());
15291545
if (hasMessages) {
15301546
LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " assign read id " << ReadIdToResponse << " to read request " << formedResponse->Guid);

ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <ydb/library/persqueue/topic_parser/topic_parser.h>
1111
#include <ydb/library/persqueue/topic_parser/counters.h>
1212
#include <ydb/services/lib/sharding/sharding.h>
13+
#include <ydb/services/persqueue_v1/actors/helpers.h>
1314

1415
#include <ydb/library/actors/core/log.h>
1516
#include <util/string/hex.h>
@@ -256,6 +257,16 @@ void TWriteSessionActor::InitAfterDiscovery(const TActorContext& ctx) {
256257
}
257258

258259

260+
void TWriteSessionActor::SetupBytesWrittenByUserAgentCounter() {
261+
BytesWrittenByUserAgent = GetServiceCounters(Counters, "pqproxy|userAgents")
262+
->GetSubgroup("host", "")
263+
->GetSubgroup("protocol", "pqv0")
264+
->GetSubgroup("topic", FullConverter->GetFederationPath())
265+
->GetSubgroup("user_agent", V1::CleanupCounterValueString(UserAgent))
266+
->GetExpiringNamedCounter("sensor", "BytesWrittenByUserAgent", true);
267+
}
268+
269+
259270
void TWriteSessionActor::SetupCounters()
260271
{
261272
if (SessionsCreated) {
@@ -286,6 +297,8 @@ void TWriteSessionActor::SetupCounters()
286297

287298
SessionsCreated.Inc();
288299
SessionsActive.Inc();
300+
301+
SetupBytesWrittenByUserAgentCounter();
289302
}
290303

291304

@@ -307,6 +320,8 @@ void TWriteSessionActor::SetupCounters(const TString& cloudId, const TString& db
307320

308321
SessionsCreated.Inc();
309322
SessionsActive.Inc();
323+
324+
SetupBytesWrittenByUserAgentCounter();
310325
}
311326

312327

@@ -851,6 +866,8 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvWrite::TPtr& ev, const TActorCont
851866
BytesInflight.Inc(diff);
852867
BytesInflightTotal.Inc(diff);
853868

869+
BytesWrittenByUserAgent->Add(diff);
870+
854871
if (BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended
855872
Y_ABORT_UNLESS(NextRequestInited);
856873
Handler->ReadyForNextRead();

ydb/services/persqueue_v1/actors/helpers.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,32 @@ bool HasMessages(const Topic::StreamReadMessage::ReadResponse& data) {
2727
return false;
2828
}
2929

30+
31+
TString CleanupCounterValueString(const TString& value) {
32+
// Internal Monitoring system requires metrics values to no longer than 200 characters
33+
// and prohibits some ASCII characters.
34+
35+
TString clean;
36+
constexpr auto valueLenghtLimit = 200;
37+
38+
for (auto c : value) {
39+
switch (c) {
40+
case '|':
41+
case '*':
42+
case '?':
43+
case '"':
44+
case '\'':
45+
case '`':
46+
case '\\':
47+
continue;
48+
default:
49+
clean.push_back(c);
50+
if (clean.size() == valueLenghtLimit) {
51+
break;
52+
}
53+
}
54+
}
55+
return clean;
56+
}
57+
3058
}

ydb/services/persqueue_v1/actors/helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ bool HasMessages(const PersQueue::V1::MigrationStreamingReadServerMessage::DataB
1717

1818
bool HasMessages(const Topic::StreamReadMessage::ReadResponse& data);
1919

20+
TString CleanupCounterValueString(const TString& value);
21+
2022
}

ydb/services/persqueue_v1/actors/read_session_actor.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ TReadSessionActor<UseMigrationProtocol>::TReadSessionActor(
6262
, AutoPartitioningSupport(false)
6363
{
6464
Y_ASSERT(Request);
65+
66+
if (auto values = Request->GetStreamCtx()->GetPeerMetaValues(NYdb::YDB_APPLICATION_NAME); !values.empty()) {
67+
UserAgent = values[0];
68+
}
69+
if (auto values = Request->GetStreamCtx()->GetPeerMetaValues(NYdb::YDB_SDK_BUILD_INFO_HEADER); !values.empty()) {
70+
SdkBuildInfo = values[0];
71+
}
6572
}
6673

6774
template <bool UseMigrationProtocol>
@@ -884,6 +891,18 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr&
884891
}
885892
}
886893

894+
template<bool UseMigrationProtocol>
895+
void TReadSessionActor<UseMigrationProtocol>::SetupBytesReadByUserAgentCounter() {
896+
static constexpr auto protocol = UseMigrationProtocol ? "pqv1" : "topic";
897+
BytesReadByUserAgent = GetServiceCounters(Counters, "pqproxy|userAgents")
898+
->GetSubgroup("host", "")
899+
->GetSubgroup("protocol", protocol)
900+
->GetSubgroup("consumer", ClientPath)
901+
->GetSubgroup("sdk_build_info", CleanupCounterValueString(SdkBuildInfo))
902+
->GetSubgroup("user_agent", CleanupCounterValueString(UserAgent))
903+
->GetExpiringNamedCounter("sensor", "BytesReadByUserAgent", true);
904+
}
905+
887906
template <bool UseMigrationProtocol>
888907
void TReadSessionActor<UseMigrationProtocol>::SetupCounters() {
889908
if (SessionsCreated) {
@@ -913,6 +932,8 @@ void TReadSessionActor<UseMigrationProtocol>::SetupCounters() {
913932
++(*SessionsCreated);
914933
++(*SessionsActive);
915934
PartsPerSession.IncFor(Partitions.size(), 1); // for 0
935+
936+
SetupBytesReadByUserAgentCounter();
916937
}
917938

918939
template <bool UseMigrationProtocol>
@@ -937,6 +958,8 @@ void TReadSessionActor<UseMigrationProtocol>::SetupTopicCounters(const NPersQueu
937958
topicCounters.CommitLatency = CommitLatency;
938959
topicCounters.SLIBigLatency = SLIBigLatency;
939960
topicCounters.SLITotal = SLITotal;
961+
962+
SetupBytesReadByUserAgentCounter();
940963
}
941964

942965
template <bool UseMigrationProtocol>
@@ -960,6 +983,8 @@ void TReadSessionActor<UseMigrationProtocol>::SetupTopicCounters(const NPersQueu
960983
topicCounters.CommitLatency = CommitLatency;
961984
topicCounters.SLIBigLatency = SLIBigLatency;
962985
topicCounters.SLITotal = SLITotal;
986+
987+
SetupBytesReadByUserAgentCounter();
963988
}
964989

965990
template <bool UseMigrationProtocol>
@@ -1956,6 +1981,8 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedRead
19561981
formedResponse->Response.mutable_read_response()->set_bytes_size(sizeEstimation);
19571982
}
19581983

1984+
BytesReadByUserAgent->Add(sizeEstimation);
1985+
19591986
if (formedResponse->IsDirectRead) {
19601987
auto it = Partitions.find(formedResponse->AssignId);
19611988
if (it == Partitions.end()) {

ydb/services/persqueue_v1/actors/read_session_actor.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ class TReadSessionActor
321321
void CloseSession(PersQueue::ErrorCode::ErrorCode code, const TString& reason, const TActorContext& ctx);
322322
void SendLockPartitionToSelf(ui32 partitionId, TString topicName, TTopicHolder topic, const TActorContext& ctx);
323323

324+
void SetupBytesReadByUserAgentCounter();
324325
void SetupCounters();
325326
void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic);
326327
void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic,
@@ -343,6 +344,9 @@ class TReadSessionActor
343344
const TString ClientDC;
344345
const TInstant StartTimestamp;
345346

347+
TString SdkBuildInfo;
348+
TString UserAgent = UseMigrationProtocol ? "pqv1 server" : "topic server";
349+
346350
TActorId SchemeCache;
347351
TActorId NewSchemeCache;
348352

@@ -425,6 +429,8 @@ class TReadSessionActor
425429
::NMonitoring::TDynamicCounters::TCounterPtr Errors;
426430
::NMonitoring::TDynamicCounters::TCounterPtr PipeReconnects;
427431
::NMonitoring::TDynamicCounters::TCounterPtr BytesInflight;
432+
::NMonitoring::TDynamicCounters::TCounterPtr BytesReadByUserAgent;
433+
428434
ui64 BytesInflight_;
429435
ui64 RequestedBytes;
430436
ui32 ReadsInfly;

0 commit comments

Comments
 (0)