@@ -21,6 +21,9 @@ static constexpr ui64 MAX_SHARD_RETRIES = 5; // retry after: 0, 250, 500, 1000
21
21
static constexpr ui64 MAX_TOTAL_SHARD_RETRIES = 20 ;
22
22
static constexpr ui64 MAX_SHARD_RESOLVES = 3 ;
23
23
24
+ constexpr TDuration REGISTRATION_TIMEOUT = TDuration::Seconds(60 );
25
+ constexpr TDuration PING_PERIOD = TDuration::Seconds(30 );
26
+
24
27
} // anonymous namespace
25
28
26
29
TKqpScanFetcherActor::TKqpScanFetcherActor (const NKikimrKqp::TKqpSnapshot& snapshot, const TComputeRuntimeSettings& settings,
@@ -79,16 +82,18 @@ void TKqpScanFetcherActor::Bootstrap() {
79
82
auto & state = PendingShards.emplace_back (TShardState (read.GetShardId ()));
80
83
state.Ranges = BuildSerializedTableRanges (read);
81
84
}
85
+ RegistrationStartTime = Now ();
82
86
for (auto && c : ComputeActorIds) {
83
87
Sender<TEvScanExchange::TEvRegisterFetcher>().SendTo (c);
84
88
}
85
89
AFL_DEBUG (NKikimrServices::KQP_COMPUTE)(" event" , " bootstrap" )(" compute" , ComputeActorIds.size ())(" shards" , PendingShards.size ());
86
90
StartTableScan ();
87
91
Become (&TKqpScanFetcherActor::StateFunc);
88
- Schedule (TDuration::Seconds ( 30 ) , new NActors::TEvents::TEvWakeup ());
92
+ Schedule (PING_PERIOD , new NActors::TEvents::TEvWakeup ());
89
93
}
90
94
91
95
void TKqpScanFetcherActor::HandleExecute (TEvScanExchange::TEvAckData::TPtr& ev) {
96
+ RegistrationFinished = true ;
92
97
AFL_ENSURE (ev->Get ()->GetFreeSpace ());
93
98
AFL_DEBUG (NKikimrServices::KQP_COMPUTE)(" event" , " AckDataFromCompute" )(" self_id" , SelfId ())(" scan_id" , ScanId)(
94
99
" packs_to_send" , InFlightComputes.GetPacksToSendCount ())(" from" , ev->Sender )(" shards remain" , PendingShards.size ())(
@@ -697,8 +702,18 @@ void TKqpScanFetcherActor::CheckFinish() {
697
702
}
698
703
699
704
void TKqpScanFetcherActor::HandleExecute (NActors::TEvents::TEvWakeup::TPtr&) {
700
- InFlightShards.PingAllScanners ();
701
- Schedule (TDuration::Seconds (30 ), new NActors::TEvents::TEvWakeup ());
705
+ if (RegistrationFinished) {
706
+ InFlightShards.PingAllScanners ();
707
+ } else if (Now () - RegistrationStartTime > REGISTRATION_TIMEOUT) {
708
+ AFL_DEBUG (NKikimrServices::KQP_COMPUTE)(" event" , " TEvWakeup" )(" info" , " Abort fetcher due to Registration timeout" );
709
+ InFlightShards.AbortAllScanners (" Abort fetcher due to Registration timeout" );
710
+ TIssues issues;
711
+ issues.AddIssue (TIssue (" Abort fetcher due to Registration timeout" ));
712
+ SendGlobalFail (NDqProto::COMPUTE_STATE_FAILURE, NYql::NDqProto::StatusIds::INTERNAL_ERROR, issues);
713
+ PassAway ();
714
+ return ;
715
+ }
716
+ Schedule (PING_PERIOD, new NActors::TEvents::TEvWakeup ());
702
717
}
703
718
704
719
} // namespace NKikimr::NKqp::NScanPrivate
0 commit comments