Skip to content

Commit 27e2b85

Browse files
authored
add debug details to troubleshoot scan data ack failure (#9518)
1 parent 9e8c4c6 commit 27e2b85

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,18 @@ class TShardScannerInfo {
7171
new TEvPipeCache::TEvForward(ev.release(), TabletId, !subscribed), IEventHandle::FlagTrackDelivery);
7272
}
7373

74+
TString ToString() const {
75+
TStringBuilder builder;
76+
77+
if (ActorId) {
78+
builder << "ActorId: " << *ActorId;
79+
}
80+
81+
builder << "TabletId: " << TabletId << ", ScanId: " << ScanId;
82+
83+
return builder;
84+
}
85+
7486
void Stop(const bool finalFlag, const TString& message) {
7587
AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "stop_scanner")("actor_id", ActorId)("message", message)("final_flag", finalFlag);
7688
if (ActorId) {

ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ void TKqpScanFetcherActor::Bootstrap() {
8484
}
8585

8686
void TKqpScanFetcherActor::HandleExecute(TEvScanExchange::TEvAckData::TPtr& ev) {
87-
Y_ABORT_UNLESS(ev->Get()->GetFreeSpace());
87+
AFL_ENSURE(ev->Get()->GetFreeSpace());
8888
AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "AckDataFromCompute")("self_id", SelfId())("scan_id", ScanId)
8989
("packs_to_send", InFlightComputes.GetPacksToSendCount())
9090
("from", ev->Sender)("shards remain", PendingShards.size())
@@ -237,7 +237,7 @@ void TKqpScanFetcherActor::HandleExecute(TEvTxProxySchemeCache::TEvResolveKeySet
237237
PendingResolveShards.pop_front();
238238
ResolveNextShard();
239239

240-
Y_ABORT_UNLESS(!InFlightShards.GetShardScanner(state.TabletId));
240+
AFL_ENSURE(!InFlightShards.GetShardScanner(state.TabletId));
241241

242242
AFL_ENSURE(state.State == EShardState::Resolving);
243243
CA_LOG_D("Received TEvResolveKeySetResult update for table '" << ScanDataMeta.TablePath << "'");
@@ -364,13 +364,19 @@ void TKqpScanFetcherActor::HandleExecute(TEvents::TEvUndelivered::TPtr& ev) {
364364
case TEvDataShard::TEvKqpScan::EventType:
365365
// Handled by TEvPipeCache::TEvDeliveryProblem event.
366366
return;
367-
case TEvKqpCompute::TEvScanDataAck::EventType:
368-
if (!!InFlightShards.GetShardScanner(ev->Cookie)) {
369-
SendGlobalFail(NDqProto::StatusIds::UNAVAILABLE, TIssuesIds::DEFAULT_ERROR, "Delivery problem: EvScanDataAck lost.");
367+
case TEvKqpCompute::TEvScanDataAck::EventType: {
368+
auto info = InFlightShards.GetShardScanner(ev->Cookie);
369+
if (!!info) {
370+
TStringBuilder builder;
371+
builder << "Delivery problem: EvScanDataAck lost, NodeId: "
372+
<< SelfId().NodeId() << ", Details: " << info->ToString() << ".";
373+
374+
SendGlobalFail(NDqProto::StatusIds::UNAVAILABLE, TIssuesIds::DEFAULT_ERROR, TString(builder));
370375
}
371376
return;
377+
}
372378
}
373-
Y_ABORT("UNEXPECTED EVENT TYPE");
379+
AFL_ENSURE("Unexpected event type ")("source_type", ev->Get()->SourceType);
374380
}
375381

376382
void TKqpScanFetcherActor::HandleExecute(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {

0 commit comments

Comments
 (0)