@@ -99,14 +99,15 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
99
99
}
100
100
}
101
101
102
+ auto affectedShards = Reads.AffectedShards ();
102
103
// TODO: use evread statistics after KIKIMR-16924
103
104
tableStats->SetReadRows (tableStats->GetReadRows () + rowsReadEstimate);
104
105
tableStats->SetReadBytes (tableStats->GetReadBytes () + bytesReadEstimate);
105
- tableStats->SetAffectedPartitions (tableStats->GetAffectedPartitions () + ReadsPerShard .size ());
106
+ tableStats->SetAffectedPartitions (tableStats->GetAffectedPartitions () + affectedShards .size ());
106
107
107
108
NKqpProto::TKqpTableExtraStats tableExtraStats;
108
109
auto readActorTableAggrExtraStats = tableExtraStats.MutableReadActorTableAggrExtraStats ();
109
- for (const auto & [ shardId, _] : ReadsPerShard ) {
110
+ for (const auto & shardId : affectedShards ) {
110
111
readActorTableAggrExtraStats->AddAffectedShards (shardId);
111
112
}
112
113
@@ -137,10 +138,6 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
137
138
, ShardId(shardId)
138
139
, State(EReadState::Initial) {}
139
140
140
- void SetFinished () {
141
- State = EReadState::Finished;
142
- }
143
-
144
141
bool Finished () const {
145
142
return (State == EReadState::Finished);
146
143
}
@@ -159,7 +156,68 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
159
156
160
157
struct TShardState {
161
158
ui64 RetryAttempts = 0 ;
162
- std::vector<TReadState*> Reads;
159
+ std::unordered_set<ui64> Reads;
160
+ };
161
+
162
+ struct TReads {
163
+ std::unordered_map<ui64, TReadState> Reads;
164
+ std::unordered_map<ui64, TShardState> ReadsPerShard;
165
+
166
+ std::unordered_map<ui64, TReadState>::iterator begin () { return Reads.begin (); }
167
+
168
+ std::unordered_map<ui64, TReadState>::iterator end () { return Reads.end (); }
169
+
170
+ std::unordered_map<ui64, TReadState>::iterator find (ui64 readId) {
171
+ return Reads.find (readId);
172
+ }
173
+
174
+ void insert (TReadState&& read) {
175
+ const auto [readIt, succeeded] = Reads.insert ({read.Id , std::move (read)});
176
+ YQL_ENSURE (succeeded);
177
+ ReadsPerShard[readIt->second .ShardId ].Reads .emplace (readIt->second .Id );
178
+ }
179
+
180
+ size_t InFlightReads () const {
181
+ return Reads.size ();
182
+ }
183
+
184
+ std::vector<ui64> AffectedShards () const {
185
+ std::vector<ui64> result;
186
+ result.reserve (ReadsPerShard.size ());
187
+ for (const auto & [shard, _]: ReadsPerShard) {
188
+ result.push_back (shard);
189
+ }
190
+ return result;
191
+ }
192
+
193
+ bool CheckShardRetriesExeeded (TReadState& failedRead) {
194
+ const auto & shardState = ReadsPerShard[failedRead.ShardId ];
195
+ return shardState.RetryAttempts + 1 > MaxShardRetries ();
196
+ }
197
+
198
+ TDuration CalcDelayForShard (TReadState& failedRead, bool allowInstantRetry) {
199
+ auto & shardState = ReadsPerShard[failedRead.ShardId ];
200
+ ++shardState.RetryAttempts ;
201
+ return CalcDelay (shardState.RetryAttempts , allowInstantRetry);
202
+ }
203
+
204
+ void erase (TReadState& read) {
205
+ ReadsPerShard[read.ShardId ].Reads .erase (read.Id );
206
+ Reads.erase (read.Id );
207
+ }
208
+
209
+ std::vector<TReadState*> GetShardReads (ui64 shardId) {
210
+ auto it = ReadsPerShard.find (shardId);
211
+ YQL_ENSURE (it != ReadsPerShard.end ());
212
+ std::vector<TReadState*> result;
213
+ for (ui64 readId: it->second .Reads ) {
214
+ auto it = Reads.find (readId);
215
+ YQL_ENSURE (it != Reads.end ());
216
+ result.push_back (&it->second );
217
+ }
218
+
219
+ return result;
220
+ }
163
221
};
164
222
165
223
struct TEvPrivate {
@@ -224,13 +282,15 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
224
282
ReadRowsCount += replyResultStats.ReadRowsCount ;
225
283
ReadBytesCount += replyResultStats.ReadBytesCount ;
226
284
227
- auto status = FetchInputRows ();
285
+ if (!StreamLookupWorker->IsOverloaded ()) {
286
+ FetchInputRows ();
287
+ }
228
288
229
289
if (Partitioning) {
230
290
ProcessInputRows ();
231
291
}
232
292
233
- const bool inputRowsFinished = status == NUdf::EFetchStatus::Finish;
293
+ const bool inputRowsFinished = LastFetchStatus == NUdf::EFetchStatus::Finish;
234
294
const bool allReadsFinished = AllReadsFinished ();
235
295
const bool allRowsProcessed = StreamLookupWorker->AllRowsProcessed ();
236
296
@@ -305,14 +365,14 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
305
365
void Handle (TEvDataShard::TEvReadResult::TPtr& ev) {
306
366
const auto & record = ev->Get ()->Record ;
307
367
308
-
309
368
auto readIt = Reads.find (record.GetReadId ());
310
369
if (readIt == Reads.end () || readIt->second .State != EReadState::Running) {
311
370
CA_LOG_D (" Drop read with readId: " << record.GetReadId () << " , because it's already completed or blocked" );
312
371
return ;
313
372
}
314
373
315
374
auto & read = readIt->second ;
375
+ ui64 shardId = read.ShardId ;
316
376
317
377
CA_LOG_D (" Recv TEvReadResult (stream lookup) from ShardID=" << read.ShardId
318
378
<< " , Table = " << StreamLookupWorker->GetTablePath ()
@@ -369,13 +429,13 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
369
429
case Ydb::StatusIds::NOT_FOUND:
370
430
{
371
431
StreamLookupWorker->ResetRowsProcessing (read.Id , read.FirstUnprocessedQuery , read.LastProcessedKey );
372
- read.SetFinished ();
373
432
CA_LOG_D (" NOT_FOUND was received from tablet: " << read.ShardId << " . "
374
433
<< getIssues ().ToOneLineString ());
434
+ Reads.erase (read);
375
435
return ResolveTableShards ();
376
436
}
377
437
case Ydb::StatusIds::OVERLOADED: {
378
- if (CheckTotalRetriesExeeded () || CheckShardRetriesExeeded (read)) {
438
+ if (CheckTotalRetriesExeeded () || Reads. CheckShardRetriesExeeded (read)) {
379
439
return replyError (
380
440
TStringBuilder () << " Table '" << StreamLookupWorker->GetTablePath () << " ' retry limit exceeded." ,
381
441
NYql::NDqProto::StatusIds::OVERLOADED);
@@ -386,7 +446,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
386
446
return RetryTableRead (read, /* allowInstantRetry = */ false );
387
447
}
388
448
case Ydb::StatusIds::INTERNAL_ERROR: {
389
- if (CheckTotalRetriesExeeded () || CheckShardRetriesExeeded (read)) {
449
+ if (CheckTotalRetriesExeeded () || Reads. CheckShardRetriesExeeded (read)) {
390
450
return replyError (
391
451
TStringBuilder () << " Table '" << StreamLookupWorker->GetTablePath () << " ' retry limit exceeded." ,
392
452
NYql::NDqProto::StatusIds::INTERNAL_ERROR);
@@ -405,7 +465,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
405
465
read.LastSeqNo = record.GetSeqNo ();
406
466
407
467
if (record.GetFinished ()) {
408
- read. SetFinished ( );
468
+ Reads. erase (read );
409
469
} else {
410
470
YQL_ENSURE (record.HasContinuationToken (), " Successful TEvReadResult should contain continuation token" );
411
471
NKikimrTxDataShard::TReadContinuationToken continuationToken;
@@ -443,7 +503,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
443
503
444
504
auto guard = BindAllocator ();
445
505
StreamLookupWorker->AddResult (TKqpStreamLookupWorker::TShardReadResult{
446
- read. ShardId , THolder<TEventHandle<TEvDataShard::TEvReadResult>>(ev.Release ())
506
+ shardId , THolder<TEventHandle<TEvDataShard::TEvReadResult>>(ev.Release ())
447
507
});
448
508
Send (ComputeActorId, new TEvNewAsyncInputDataArrived (InputIndex));
449
509
}
@@ -452,11 +512,9 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
452
512
CA_LOG_D (" TEvDeliveryProblem was received from tablet: " << ev->Get ()->TabletId );
453
513
454
514
const auto & tabletId = ev->Get ()->TabletId ;
455
- auto shardIt = ReadsPerShard.find (tabletId);
456
- YQL_ENSURE (shardIt != ReadsPerShard.end ());
457
515
458
516
TVector<TReadState*> toRetry;
459
- for (auto * read : shardIt-> second . Reads ) {
517
+ for (auto * read : Reads. GetShardReads (tabletId) ) {
460
518
if (read->State == EReadState::Running) {
461
519
Counters->IteratorDeliveryProblems ->Inc ();
462
520
toRetry.push_back (read);
@@ -489,27 +547,24 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
489
547
490
548
if ((read.State == EReadState::Running && read.LastSeqNo <= ev->Get ()->LastSeqNo ) || read.State == EReadState::Blocked) {
491
549
if (ev->Get ()->InstantStart ) {
492
- read.SetFinished ();
493
550
auto requests = StreamLookupWorker->RebuildRequest (read.Id , read.FirstUnprocessedQuery , read.LastProcessedKey , ReadId);
494
551
for (auto & request : requests) {
495
552
StartTableRead (read.ShardId , std::move (request));
496
553
}
554
+ Reads.erase (read);
497
555
} else {
498
556
RetryTableRead (read);
499
557
}
500
558
}
501
559
}
502
560
503
- NUdf::EFetchStatus FetchInputRows () {
561
+ void FetchInputRows () {
504
562
auto guard = BindAllocator ();
505
563
506
- NUdf::EFetchStatus status;
507
564
NUdf::TUnboxedValue row;
508
- while ((status = Input.Fetch (row)) == NUdf::EFetchStatus::Ok) {
565
+ while ((LastFetchStatus = Input.Fetch (row)) == NUdf::EFetchStatus::Ok) {
509
566
StreamLookupWorker->AddInputRow (std::move (row));
510
567
}
511
-
512
- return status;
513
568
}
514
569
515
570
void ProcessInputRows () {
@@ -569,9 +624,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
569
624
570
625
auto readId = read.Id ;
571
626
auto lastSeqNo = read.LastSeqNo ;
572
- const auto [readIt, succeeded] = Reads.insert ({readId, std::move (read)});
573
- YQL_ENSURE (succeeded);
574
- ReadsPerShard[shardId].Reads .push_back (&readIt->second );
627
+ Reads.insert (std::move (read));
575
628
576
629
if (auto delay = ShardTimeout ()) {
577
630
TlsActivationContext->Schedule (
@@ -585,11 +638,6 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
585
638
return limit && TotalRetryAttempts + 1 > *limit;
586
639
}
587
640
588
- bool CheckShardRetriesExeeded (TReadState& failedRead) {
589
- const auto & shardState = ReadsPerShard[failedRead.ShardId ];
590
- return shardState.RetryAttempts + 1 > MaxShardRetries ();
591
- }
592
-
593
641
void RetryTableRead (TReadState& failedRead, bool allowInstantRetry = true ) {
594
642
CA_LOG_D (" Retry reading of table: " << StreamLookupWorker->GetTablePath () << " , readId: " << failedRead.Id
595
643
<< " , shardId: " << failedRead.ShardId );
@@ -600,21 +648,19 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
600
648
}
601
649
++TotalRetryAttempts;
602
650
603
- if (CheckShardRetriesExeeded (failedRead)) {
651
+ if (Reads. CheckShardRetriesExeeded (failedRead)) {
604
652
StreamLookupWorker->ResetRowsProcessing (failedRead.Id , failedRead.FirstUnprocessedQuery , failedRead.LastProcessedKey );
605
- failedRead. SetFinished ( );
653
+ Reads. erase (failedRead );
606
654
return ResolveTableShards ();
607
655
}
608
- auto & shardState = ReadsPerShard[failedRead.ShardId ];
609
- ++shardState.RetryAttempts ;
610
656
611
- auto delay = CalcDelay (shardState. RetryAttempts , allowInstantRetry);
657
+ auto delay = Reads. CalcDelayForShard (failedRead , allowInstantRetry);
612
658
if (delay == TDuration::Zero ()) {
613
- failedRead.SetFinished ();
614
659
auto requests = StreamLookupWorker->RebuildRequest (failedRead.Id , failedRead.FirstUnprocessedQuery , failedRead.LastProcessedKey , ReadId);
615
660
for (auto & request : requests) {
616
661
StartTableRead (failedRead.ShardId , std::move (request));
617
662
}
663
+ Reads.erase (failedRead);
618
664
} else {
619
665
CA_LOG_D (" Schedule retry atempt for readId: " << failedRead.Id << " after " << delay);
620
666
TlsActivationContext->Schedule (
@@ -660,13 +706,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
660
706
}
661
707
662
708
bool AllReadsFinished () const {
663
- for (const auto & [_, read] : Reads) {
664
- if (!read.Finished ()) {
665
- return false ;
666
- }
667
- }
668
-
669
- return true ;
709
+ return Reads.InFlightReads () == 0 ;
670
710
}
671
711
672
712
TGuard<NKikimr::NMiniKQL::TScopedAlloc> BindAllocator () {
@@ -702,8 +742,8 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
702
742
const TMaybe<ui64> LockTxId;
703
743
const TMaybe<ui32> NodeLockId;
704
744
const TMaybe<NKikimrDataEvents::ELockMode> LockMode;
705
- std::unordered_map<ui64, TReadState> Reads;
706
- std::unordered_map<ui64, TShardState> ReadsPerShard ;
745
+ TReads Reads;
746
+ NUdf::EFetchStatus LastFetchStatus = NUdf::EFetchStatus::Yield ;
707
747
std::shared_ptr<const TVector<TKeyDesc::TPartitionInfo>> Partitioning;
708
748
const TDuration SchemeCacheRequestTimeout;
709
749
NActors::TActorId SchemeCacheRequestTimeoutTimer;
0 commit comments