@@ -50,12 +50,14 @@ struct TEvPrivate {
50
50
EvBegin = EventSpaceBegin (NActors::TEvents::ES_PRIVATE),
51
51
EvCoordinatorPing = EvBegin + 20 ,
52
52
EvUpdateMetrics,
53
+ EvPrintStateToLog,
53
54
EvEnd
54
55
};
55
56
56
57
static_assert (EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), " expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)" );
57
58
struct TEvCoordinatorPing : NActors::TEventLocal<TEvCoordinatorPing, EvCoordinatorPing> {};
58
59
struct TEvUpdateMetrics : public NActors ::TEventLocal<TEvUpdateMetrics, EvUpdateMetrics> {};
60
+ struct TEvPrintStateToLog : public NActors ::TEventLocal<TEvPrintStateToLog, EvPrintStateToLog> {};
59
61
};
60
62
61
63
struct TQueryStat {
@@ -65,6 +67,7 @@ struct TQueryStat {
65
67
};
66
68
67
69
ui64 UpdateMetricsPeriodSec = 60 ;
70
+ ui64 PrintStateToLogPeriodSec = 300 ;
68
71
69
72
class TRowDispatcher : public TActorBootstrapped <TRowDispatcher> {
70
73
@@ -128,6 +131,7 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
128
131
TRowDispatcherMetrics Metrics;
129
132
NYql::IPqGateway::TPtr PqGateway;
130
133
THashSet<TActorId> InterconnectSessions;
134
+ TMap<ui32, bool > NodeConnected;
131
135
132
136
struct ConsumerCounters {
133
137
ui64 NewDataArrived = 0 ;
@@ -141,16 +145,17 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
141
145
NActors::TActorId selfId,
142
146
ui64 eventQueueId,
143
147
NFq::NRowDispatcherProto::TEvStartSession& proto,
144
- TActorId topicSessionId)
148
+ TActorId topicSessionId,
149
+ bool alreadyConnected)
145
150
: ReadActorId(readActorId)
146
151
, SourceParams(proto.GetSource())
147
152
, PartitionId(proto.GetPartitionId())
148
153
, EventQueueId(eventQueueId)
149
154
, Proto(proto)
150
155
, TopicSessionId(topicSessionId)
151
156
, QueryId(proto.GetQueryId()) {
152
- EventsQueue.Init (" txId" , selfId, selfId, eventQueueId, /* KeepAlive */ true );
153
- EventsQueue.OnNewRecipientId (readActorId);
157
+ EventsQueue.Init (" txId" , selfId, selfId, eventQueueId, /* KeepAlive */ true , /* UseConnect */ false );
158
+ EventsQueue.OnNewRecipientId (readActorId, true , alreadyConnected );
154
159
}
155
160
156
161
NActors::TActorId ReadActorId;
@@ -215,10 +220,12 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
215
220
void Handle (const NYql::NDq::TEvRetryQueuePrivate::TEvPing::TPtr&);
216
221
void Handle (const NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed::TPtr&);
217
222
void Handle (NFq::TEvPrivate::TEvUpdateMetrics::TPtr&);
218
-
223
+ void Handle (NFq::TEvPrivate::TEvPrintStateToLog::TPtr&);
224
+
219
225
void DeleteConsumer (const ConsumerSessionKey& key);
220
226
void UpdateInterconnectSessions (const NActors::TActorId& interconnectSession);
221
227
void UpdateMetrics ();
228
+ void PrintInternalState ();
222
229
223
230
STRICT_STFUNC (
224
231
StateFunc, {
@@ -242,6 +249,7 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
242
249
hFunc (NActors::TEvents::TEvPing, Handle);
243
250
hFunc (NFq::TEvRowDispatcher::TEvNewDataArrived, Handle);
244
251
hFunc (NFq::TEvPrivate::TEvUpdateMetrics, Handle);
252
+ hFunc (NFq::TEvPrivate::TEvPrintStateToLog, Handle);
245
253
})
246
254
};
247
255
@@ -275,6 +283,7 @@ void TRowDispatcher::Bootstrap() {
275
283
Register (NewLeaderElection (SelfId (), coordinatorId, config, CredentialsProviderFactory, YqSharedResources, Tenant, Counters).release ());
276
284
Schedule (TDuration::Seconds (CoordinatorPingPeriodSec), new TEvPrivate::TEvCoordinatorPing ());
277
285
Schedule (TDuration::Seconds (UpdateMetricsPeriodSec), new NFq::TEvPrivate::TEvUpdateMetrics ());
286
+ Schedule (TDuration::Seconds (PrintStateToLogPeriodSec), new NFq::TEvPrivate::TEvPrintStateToLog ());
278
287
}
279
288
280
289
void TRowDispatcher::Handle (NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev) {
@@ -292,13 +301,15 @@ void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr&
292
301
293
302
void TRowDispatcher::HandleConnected (TEvInterconnect::TEvNodeConnected::TPtr& ev) {
294
303
LOG_ROW_DISPATCHER_DEBUG (" EvNodeConnected, node id " << ev->Get ()->NodeId );
304
+ NodeConnected[ev->Get ()->NodeId ] = true ;
295
305
for (auto & [actorId, consumer] : Consumers) {
296
306
consumer->EventsQueue .HandleNodeConnected (ev->Get ()->NodeId );
297
307
}
298
308
}
299
309
300
310
void TRowDispatcher::HandleDisconnected (TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
301
311
LOG_ROW_DISPATCHER_DEBUG (" TEvNodeDisconnected, node id " << ev->Get ()->NodeId );
312
+ NodeConnected[ev->Get ()->NodeId ] = false ;
302
313
for (auto & [actorId, consumer] : Consumers) {
303
314
consumer->EventsQueue .HandleNodeDisconnected (ev->Get ()->NodeId );
304
315
}
@@ -339,18 +350,34 @@ void TRowDispatcher::UpdateMetrics() {
339
350
return ;
340
351
}
341
352
TMap<TString, TQueryStat> queryStats;
342
- TStringStream str;
353
+
354
+ for (auto & [key, sessionsInfo] : TopicSessions) {
355
+ for (auto & [actorId, sessionInfo] : sessionsInfo.Sessions ) {
356
+ for (auto & [readActorId, consumer] : sessionInfo.Consumers ) {
357
+ auto & stat = queryStats[consumer->QueryId ];
358
+ stat.UnreadRows .Add (NYql::TCounters::TEntry (consumer->Stat .UnreadRows ));
359
+ stat.UnreadBytes .Add (NYql::TCounters::TEntry (consumer->Stat .UnreadBytes ));
360
+ }
361
+ }
362
+ }
363
+ for (const auto & [queryId, stat] : queryStats) {
364
+ auto queryGroup = Metrics.Counters ->GetSubgroup (" queryId" , queryId);
365
+ queryGroup->GetCounter (" MaxUnreadRows" )->Set (stat.UnreadRows .Max );
366
+ queryGroup->GetCounter (" AvgUnreadRows" )->Set (stat.UnreadRows .Avg );
367
+ queryGroup->GetCounter (" MaxUnreadBytes" )->Set (stat.UnreadBytes .Max );
368
+ queryGroup->GetCounter (" AvgUnreadBytes" )->Set (stat.UnreadBytes .Avg );
369
+ }
370
+ }
343
371
372
+ void TRowDispatcher::PrintInternalState () {
373
+ TStringStream str;
344
374
str << " Statistics:\n " ;
345
375
for (auto & [key, sessionsInfo] : TopicSessions) {
346
376
str << " " << key.Endpoint << " / " << key.Database << " / " << key.TopicPath << " / " << key.PartitionId ;
347
377
for (auto & [actorId, sessionInfo] : sessionsInfo.Sessions ) {
348
378
str << " / " << actorId << " \n " ;
349
379
str << " unread bytes " << sessionInfo.Stat .UnreadBytes << " \n " ;
350
380
for (auto & [readActorId, consumer] : sessionInfo.Consumers ) {
351
- auto & stat = queryStats[consumer->QueryId ];
352
- stat.UnreadRows .Add (NYql::TCounters::TEntry (consumer->Stat .UnreadRows ));
353
- stat.UnreadBytes .Add (NYql::TCounters::TEntry (consumer->Stat .UnreadBytes ));
354
381
str << " " << consumer->QueryId << " " << readActorId << " unread rows "
355
382
<< consumer->Stat .UnreadRows << " unread bytes " << consumer->Stat .UnreadBytes << " offset " << consumer->Stat .Offset
356
383
<< " get " << consumer->Counters .GetNextBatch
@@ -361,15 +388,6 @@ void TRowDispatcher::UpdateMetrics() {
361
388
}
362
389
}
363
390
LOG_ROW_DISPATCHER_DEBUG (str.Str ());
364
-
365
- for (const auto & [queryId, stat] : queryStats) {
366
- LOG_ROW_DISPATCHER_DEBUG (" UnreadBytes " << queryId << " " << stat.UnreadBytes .Max );
367
- auto queryGroup = Metrics.Counters ->GetSubgroup (" queryId" , queryId);
368
- queryGroup->GetCounter (" MaxUnreadRows" )->Set (stat.UnreadRows .Max );
369
- queryGroup->GetCounter (" AvgUnreadRows" )->Set (stat.UnreadRows .Avg );
370
- queryGroup->GetCounter (" MaxUnreadBytes" )->Set (stat.UnreadBytes .Max );
371
- queryGroup->GetCounter (" AvgUnreadBytes" )->Set (stat.UnreadBytes .Avg );
372
- }
373
391
}
374
392
375
393
void TRowDispatcher::Handle (NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
@@ -384,7 +402,7 @@ void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
384
402
ConsumerSessionKey key{ev->Sender , ev->Get ()->Record .GetPartitionId ()};
385
403
auto it = Consumers.find (key);
386
404
if (it != Consumers.end ()) {
387
- LOG_ROW_DISPATCHER_ERROR (" Сonsumer already exists, ignore StartSession" );
405
+ LOG_ROW_DISPATCHER_ERROR (" Consumer already exists, ignore StartSession" );
388
406
return ;
389
407
}
390
408
const auto & source = ev->Get ()->Record .GetSource ();
@@ -395,7 +413,7 @@ void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
395
413
LOG_ROW_DISPATCHER_DEBUG (" Topic session count " << topicSessionInfo.Sessions .size ());
396
414
Y_ENSURE (topicSessionInfo.Sessions .size () <= 1 );
397
415
398
- auto consumerInfo = MakeAtomicShared<ConsumerInfo>(ev->Sender , SelfId (), NextEventQueueId++, ev->Get ()->Record , TActorId ());
416
+ auto consumerInfo = MakeAtomicShared<ConsumerInfo>(ev->Sender , SelfId (), NextEventQueueId++, ev->Get ()->Record , TActorId (), NodeConnected[ev-> Sender . NodeId ()] );
399
417
Consumers[key] = consumerInfo;
400
418
ConsumersByEventQueueId[consumerInfo->EventQueueId ] = consumerInfo;
401
419
if (!consumerInfo->EventsQueue .OnEventReceived (ev)) {
@@ -609,6 +627,11 @@ void TRowDispatcher::Handle(NFq::TEvPrivate::TEvUpdateMetrics::TPtr&) {
609
627
UpdateMetrics ();
610
628
}
611
629
630
+ void TRowDispatcher::Handle (NFq::TEvPrivate::TEvPrintStateToLog::TPtr&) {
631
+ PrintInternalState ();
632
+ Schedule (TDuration::Seconds (PrintStateToLogPeriodSec), new NFq::TEvPrivate::TEvPrintStateToLog ());
633
+ }
634
+
612
635
void TRowDispatcher::Handle (NFq::TEvRowDispatcher::TEvSessionStatistic::TPtr& ev) {
613
636
LOG_ROW_DISPATCHER_TRACE (" TEvSessionStatistic from " << ev->Sender );
614
637
const auto & key = ev->Get ()->Stat .SessionKey ;
0 commit comments