Skip to content

Commit 046e3ca

Browse files
authored
YQ-4111 Fix row dispatcher statistics #14574 / to stable (#14627)
1 parent 772fabd commit 046e3ca

File tree

2 files changed

+43
-34
lines changed

2 files changed

+43
-34
lines changed

ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ struct TAggQueryStat {
123123
NYql::TCounters::TEntry ReadLagMessages;
124124
bool IsWaiting = false;
125125

126-
void Add(const TTopicSessionClientStatistic& stat) {
127-
FilteredBytes.Add(NYql::TCounters::TEntry(stat.FilteredBytes));
126+
void Add(const TTopicSessionClientStatistic& stat, ui64 filteredBytes) {
127+
FilteredBytes.Add(NYql::TCounters::TEntry(filteredBytes));
128128
QueuedBytes.Add(NYql::TCounters::TEntry(stat.QueuedBytes));
129129
QueuedRows.Add(NYql::TCounters::TEntry(stat.QueuedRows));
130130
ReadLagMessages.Add(NYql::TCounters::TEntry(stat.ReadLagMessages));
@@ -266,7 +266,6 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
266266
};
267267

268268
struct TAggregatedStats{
269-
NYql::TCounters::TEntry AllSessionsReadBytes;
270269
THashMap<TQueryStatKey, TMaybe<TAggQueryStat>, TQueryStatKeyHash> LastQueryStats;
271270
TDuration LastUpdateMetricsPeriod;
272271
};
@@ -290,6 +289,7 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
290289
NYql::IPqGateway::TPtr PqGateway;
291290
NActors::TMon* Monitoring;
292291
TNodesTracker NodesTracker;
292+
NYql::TCounters::TEntry AllSessionsDateRate;
293293
TAggregatedStats AggrStats;
294294
ui64 LastCpuTime = 0;
295295

@@ -304,6 +304,7 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
304304
bool PendingNewDataArrived = false;
305305
TActorId TopicSessionId;
306306
TTopicSessionClientStatistic Stat;
307+
ui64 FilteredBytes = 0;
307308
bool StatisticsUpdated = false;
308309
};
309310

@@ -333,7 +334,6 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
333334
THashMap<ui32, TConsumerPartition> Partitions;
334335
const TString QueryId;
335336
TConsumerCounters Counters;
336-
TTopicSessionClientStatistic Stat;
337337
ui64 CpuMicrosec = 0; // Increment.
338338
ui64 Generation;
339339
};
@@ -583,25 +583,30 @@ void TRowDispatcher::UpdateMetrics() {
583583
return;
584584
}
585585

586-
AggrStats.AllSessionsReadBytes = NYql::TCounters::TEntry();
586+
AllSessionsDateRate = NYql::TCounters::TEntry();
587587
for (auto& [queryId, stat] : AggrStats.LastQueryStats) {
588588
stat = Nothing();
589589
}
590590

591591
for (auto& [key, sessionsInfo] : TopicSessions) {
592592
for (auto& [actorId, sessionInfo] : sessionsInfo.Sessions) {
593593
auto read = NYql::TCounters::TEntry(sessionInfo.Stat.ReadBytes);
594-
AggrStats.AllSessionsReadBytes.Add(read);
594+
AllSessionsDateRate.Add(read);
595595
sessionInfo.AggrReadBytes = read;
596596
sessionInfo.Stat.Clear();
597597

598598
for (auto& [readActorId, consumer] : sessionInfo.Consumers) {
599+
const auto partionIt = consumer->Partitions.find(key.PartitionId);
600+
if (partionIt == consumer->Partitions.end()) {
601+
continue;
602+
}
603+
auto& partition = partionIt->second;
599604
auto& stat = AggrStats.LastQueryStats[TQueryStatKey{consumer->QueryId, key.ReadGroup}];
600605
if (!stat) {
601606
stat = TAggQueryStat();
602607
}
603-
stat->Add(consumer->Stat);
604-
consumer->Stat.Clear();
608+
stat->Add(partition.Stat, partition.FilteredBytes);
609+
partition.FilteredBytes = 0;
605610
}
606611
}
607612
}
@@ -658,7 +663,7 @@ TString TRowDispatcher::GetInternalState() {
658663
str << "Max session buffer size: " << toHuman(MaxSessionBufferSizeBytes) << "\n";
659664
str << "CpuMicrosec: " << toHuman(LastCpuTime) << "\n";
660665
str << "DataRate (all sessions): ";
661-
printDataRate(AggrStats.AllSessionsReadBytes);
666+
printDataRate(AllSessionsDateRate);
662667
str << "\n";
663668

664669
THashMap<TQueryStatKey, TAggQueryStat, TQueryStatKeyHash> queryState;
@@ -669,9 +674,14 @@ TString TRowDispatcher::GetInternalState() {
669674
for (auto& [actorId, sessionInfo] : sessionsInfo.Sessions) {
670675
queuedBytesSum += sessionInfo.Stat.QueuedBytes;
671676
for (auto& [readActorId, consumer] : sessionInfo.Consumers) {
677+
const auto partionIt = consumer->Partitions.find(sessionKey.PartitionId);
678+
if (partionIt == consumer->Partitions.end()) {
679+
continue;
680+
}
681+
const auto& partitionStat = partionIt->second.Stat;
672682
auto key = TQueryStatKey{consumer->QueryId, sessionKey.ReadGroup};
673683
++sessionCountByQuery[key];
674-
queryState[key].Add(consumer->Stat);
684+
queryState[key].Add(partitionStat, 0);
675685
}
676686
}
677687
}
@@ -714,16 +724,17 @@ TString TRowDispatcher::GetInternalState() {
714724
continue;
715725
}
716726
const auto& partition = consumer->Partitions[key.PartitionId];
717-
str << " " << consumer->QueryId << " " << LeftPad(readActorId, 32) << " unread bytes "
718-
<< toHuman(consumer->Stat.QueuedBytes) << " (" << leftPad(consumer->Stat.QueuedRows) << " rows) "
719-
<< " offset " << leftPad(consumer->Stat.Offset) << " init offset " << leftPad(consumer->Stat.InitialOffset)
727+
const auto& stat = partition.Stat;
728+
str << " " << consumer->QueryId << " " << LeftPad(readActorId, 33) << " unread bytes "
729+
<< toHuman(stat.QueuedBytes) << " (" << leftPad(stat.QueuedRows) << " rows) "
730+
<< " offset " << leftPad(stat.Offset) << " init offset " << leftPad(stat.InitialOffset)
720731
<< " get " << leftPad(consumer->Counters.GetNextBatch)
721732
<< " arr " << leftPad(consumer->Counters.NewDataArrived) << " btc " << leftPad(consumer->Counters.MessageBatch)
722-
<< " pend get " << leftPad(partition.PendingGetNextBatch) << " pend new " << leftPad(partition.PendingNewDataArrived)
723-
<< " waiting " << consumer->Stat.IsWaiting << " read lag " << leftPad(consumer->Stat.ReadLagMessages)
733+
<< " pend get " << leftPad(partition.PendingGetNextBatch) << " pend new " << leftPad(partition.PendingNewDataArrived)
734+
<< " waiting " << stat.IsWaiting << " read lag " << leftPad(stat.ReadLagMessages)
724735
<< " conn id " << consumer->Generation << "\n";
725-
maxInitialOffset = std::max(maxInitialOffset, consumer->Stat.InitialOffset);
726-
minInitialOffset = std::min(minInitialOffset, consumer->Stat.InitialOffset);
736+
maxInitialOffset = std::max(maxInitialOffset, stat.InitialOffset);
737+
minInitialOffset = std::min(minInitialOffset, stat.InitialOffset);
727738
}
728739
str << " initial offset max " << leftPad(maxInitialOffset) << " min " << leftPad(minInitialOffset) << "\n";;
729740
}
@@ -943,10 +954,8 @@ void TRowDispatcher::DeleteConsumer(NActors::TActorId readActorId) {
943954
partitionId};
944955
TTopicSessionInfo& topicSessionInfo = TopicSessions[topicKey];
945956
TSessionInfo& sessionInfo = topicSessionInfo.Sessions[partition.TopicSessionId];
946-
if (!sessionInfo.Consumers.contains(consumer->ReadActorId)) {
957+
if (!sessionInfo.Consumers.erase(consumer->ReadActorId)) {
947958
LOG_ROW_DISPATCHER_ERROR("Wrong readActorId " << consumer->ReadActorId << ", no such consumer");
948-
} else {
949-
sessionInfo.Consumers.erase(consumer->ReadActorId);
950959
}
951960
if (sessionInfo.Consumers.empty()) {
952961
LOG_ROW_DISPATCHER_DEBUG("Session is not used, sent TEvPoisonPill to " << partition.TopicSessionId);
@@ -991,9 +1000,9 @@ void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvNewDataArrived::TPtr& ev)
9911000
LOG_ROW_DISPATCHER_WARN("Ignore (no consumer) TEvNewDataArrived from " << ev->Sender << " part id " << ev->Get()->Record.GetPartitionId());
9921001
return;
9931002
}
994-
LWPROBE(NewDataArrived, ev->Sender.ToString(), ev->Get()->ReadActorId.ToString(), it->second->QueryId, it->second->Generation, ev->Get()->Record.ByteSizeLong());
995-
LOG_ROW_DISPATCHER_TRACE("Forward TEvNewDataArrived from " << ev->Sender << " to " << ev->Get()->ReadActorId << " query id " << it->second->QueryId);
9961003
auto consumerInfoPtr = it->second;
1004+
LWPROBE(NewDataArrived, ev->Sender.ToString(), ev->Get()->ReadActorId.ToString(), consumerInfoPtr->QueryId, consumerInfoPtr->Generation, ev->Get()->Record.ByteSizeLong());
1005+
LOG_ROW_DISPATCHER_TRACE("Forward TEvNewDataArrived from " << ev->Sender << " to " << ev->Get()->ReadActorId << " query id " << consumerInfoPtr->QueryId);
9971006
auto partitionIt = consumerInfoPtr->Partitions.find(ev->Get()->Record.GetPartitionId());
9981007
if (partitionIt == consumerInfoPtr->Partitions.end()) {
9991008
// Ignore TEvNewDataArrived because read actor now read others partitions.
@@ -1010,10 +1019,10 @@ void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvMessageBatch::TPtr& ev) {
10101019
LOG_ROW_DISPATCHER_WARN("Ignore (no consumer) TEvMessageBatch from " << ev->Sender << " to " << ev->Get()->ReadActorId);
10111020
return;
10121021
}
1013-
LWPROBE(MessageBatch, ev->Sender.ToString(), ev->Get()->ReadActorId.ToString(), it->second->QueryId, it->second->Generation, ev->Get()->Record.ByteSizeLong());
1014-
LOG_ROW_DISPATCHER_TRACE("Forward TEvMessageBatch from " << ev->Sender << " to " << ev->Get()->ReadActorId << " query id " << it->second->QueryId);
1015-
Metrics.RowsSent->Add(ev->Get()->Record.MessagesSize());
10161022
auto consumerInfoPtr = it->second;
1023+
LWPROBE(MessageBatch, ev->Sender.ToString(), ev->Get()->ReadActorId.ToString(), consumerInfoPtr->QueryId, consumerInfoPtr->Generation, ev->Get()->Record.ByteSizeLong());
1024+
LOG_ROW_DISPATCHER_TRACE("Forward TEvMessageBatch from " << ev->Sender << " to " << ev->Get()->ReadActorId << " query id " << consumerInfoPtr->QueryId);
1025+
Metrics.RowsSent->Add(ev->Get()->Record.MessagesSize());
10171026
auto partitionIt = consumerInfoPtr->Partitions.find(ev->Get()->Record.GetPartitionId());
10181027
if (partitionIt == consumerInfoPtr->Partitions.end()) {
10191028
// Ignore TEvMessageBatch because read actor now read others partitions.
@@ -1152,12 +1161,12 @@ void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvSessionStatistic::TPtr& ev
11521161
continue;
11531162
}
11541163
auto consumerInfoPtr = it->second;
1155-
consumerInfoPtr->Stat.Add(clientStat);
11561164
auto partitionIt = consumerInfoPtr->Partitions.find(key.PartitionId);
11571165
if (partitionIt == consumerInfoPtr->Partitions.end()) {
11581166
continue;
11591167
}
11601168
partitionIt->second.Stat.Add(clientStat);
1169+
partitionIt->second.FilteredBytes += clientStat.FilteredBytes;
11611170
partitionIt->second.StatisticsUpdated = true;
11621171
}
11631172
}

ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ class TDqPqRdReadActor : public NActors::TActor<TDqPqRdReadActor>, public NYql::
308308
void TrySendGetNextBatch(TSession& sessionInfo);
309309
TString GetInternalState();
310310
template <class TEventPtr>
311-
TSession* FindSession(const TEventPtr& ev);
311+
TSession* FindAndUpdateSession(const TEventPtr& ev);
312312
void SendNoSession(const NActors::TActorId& recipient, ui64 cookie);
313313
void NotifyCA();
314314
void SendStartSession(TSession& sessionInfo);
@@ -565,7 +565,7 @@ void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvStartSessionAck::TPtr& e
565565
const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
566566
SRC_LOG_I("Received TEvStartSessionAck from " << ev->Sender << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo() << ", generation " << ev->Cookie);
567567
Counters.StartSessionAck++;
568-
auto* session = FindSession(ev);
568+
auto* session = FindAndUpdateSession(ev);
569569
if (!session) {
570570
return;
571571
}
@@ -578,7 +578,7 @@ void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvSessionError::TPtr& ev)
578578
SRC_LOG_I("Received TEvSessionError from " << ev->Sender << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo());
579579
Counters.SessionError++;
580580

581-
auto* session = FindSession(ev);
581+
auto* session = FindAndUpdateSession(ev);
582582
if (!session) {
583583
return;
584584
}
@@ -592,7 +592,7 @@ void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvStatistics::TPtr& ev) {
592592
SRC_LOG_T("Received TEvStatistics from " << ev->Sender << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo() << " generation " << ev->Cookie);
593593
Counters.Statistics++;
594594
CpuMicrosec += ev->Get()->Record.GetCpuMicrosec();
595-
auto* session = FindSession(ev);
595+
auto* session = FindAndUpdateSession(ev);
596596
if (!session) {
597597
return;
598598
}
@@ -630,7 +630,7 @@ void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvNewDataArrived::TPtr& ev
630630
SRC_LOG_T("Received TEvNewDataArrived from " << ev->Sender << ", partition " << ev->Get()->Record.GetPartitionId() << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo() << " generation " << ev->Cookie);
631631
Counters.NewDataArrived++;
632632

633-
auto* session = FindSession(ev);
633+
auto* session = FindAndUpdateSession(ev);
634634
if (!session) {
635635
return;
636636
}
@@ -687,7 +687,7 @@ void TDqPqRdReadActor::Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvEvHeartb
687687
void TDqPqRdReadActor::Handle(const NFq::TEvRowDispatcher::TEvHeartbeat::TPtr& ev) {
688688
SRC_LOG_T("Received TEvHeartbeat from " << ev->Sender << ", generation " << ev->Cookie);
689689
Counters.Heartbeat++;
690-
FindSession(ev);
690+
FindAndUpdateSession(ev);
691691
}
692692

693693
void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev) {
@@ -799,7 +799,7 @@ void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvMessageBatch::TPtr& ev)
799799
const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
800800
SRC_LOG_T("Received TEvMessageBatch from " << ev->Sender << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo() << " generation " << ev->Cookie);
801801
Counters.MessageBatch++;
802-
auto* session = FindSession(ev);
802+
auto* session = FindAndUpdateSession(ev);
803803
if (!session) {
804804
return;
805805
}
@@ -934,7 +934,7 @@ void TDqPqRdReadActor::TrySendGetNextBatch(TSession& sessionInfo) {
934934
}
935935

936936
template <class TEventPtr>
937-
TDqPqRdReadActor::TSession* TDqPqRdReadActor::FindSession(const TEventPtr& ev) {
937+
TDqPqRdReadActor::TSession* TDqPqRdReadActor::FindAndUpdateSession(const TEventPtr& ev) {
938938
auto sessionIt = Sessions.find(ev->Sender);
939939
if (sessionIt == Sessions.end()) {
940940
SRC_LOG_W("Ignore " << typeid(TEventPtr).name() << " from " << ev->Sender);

0 commit comments

Comments
 (0)