Skip to content

Commit cc68f9c

Browse files
authored
Return seqno & row index for duplicate rows (#7660)
1 parent e437bd0 commit cc68f9c

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

ydb/core/kqp/runtime/kqp_read_actor.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,13 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
5454
size_t ProcessedRows = 0;
5555
size_t PackedRows = 0;
5656
ui64 ReadId;
57+
ui64 SeqNo;
5758

58-
TResult(ui64 shardId, THolder<TEventHandle<TEvDataShard::TEvReadResult>> readResult, ui64 readId)
59+
TResult(ui64 shardId, THolder<TEventHandle<TEvDataShard::TEvReadResult>> readResult, ui64 readId, ui64 seqNo)
5960
: ShardId(shardId)
6061
, ReadResult(std::move(readResult))
6162
, ReadId(readId)
63+
, SeqNo(seqNo)
6264
{
6365
}
6466
};
@@ -983,6 +985,7 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
983985
CA_LOG_D("Taken " << Locks.size() << " locks");
984986
Reads[id].SerializedContinuationToken = record.GetContinuationToken();
985987

988+
ui64 seqNo = ev->Get()->Record.GetSeqNo();
986989
Reads[id].RegisterMessage(*ev->Get());
987990

988991

@@ -992,7 +995,7 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
992995
<< " finished = " << ev->Get()->Record.GetFinished());
993996
CA_LOG_T(TStringBuilder() << "read #" << id << " pushed " << DebugPrintCells(ev->Get()) << " continuation token " << DebugPrintContionuationToken(record.GetContinuationToken()));
994997

995-
Results.push({Reads[id].Shard->TabletId, THolder<TEventHandle<TEvDataShard::TEvReadResult>>(ev.Release()), id});
998+
Results.push({Reads[id].Shard->TabletId, THolder<TEventHandle<TEvDataShard::TEvReadResult>>(ev.Release()), id, seqNo});
996999
NotifyCA();
9971000
}
9981001

@@ -1057,7 +1060,7 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
10571060
}
10581061

10591062
NMiniKQL::TBytesStatistics PackArrow(TResult& handle, i64& freeSpace) {
1060-
auto& [shardId, result, batch, _, packed, readId] = handle;
1063+
auto& [shardId, result, batch, _, packed, readId, seqNo] = handle;
10611064
NMiniKQL::TBytesStatistics stats;
10621065
bool hasResultColumns = false;
10631066
if (result->Get()->GetRowsCount() == 0) {
@@ -1144,7 +1147,7 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
11441147
}
11451148

11461149
NMiniKQL::TBytesStatistics PackCells(TResult& handle, i64& freeSpace) {
1147-
auto& [shardId, result, batch, processedRows, packed, readId] = handle;
1150+
auto& [shardId, result, batch, processedRows, packed, readId, seqNo] = handle;
11481151
NMiniKQL::TBytesStatistics stats;
11491152
batch->reserve(batch->size());
11501153
CA_LOG_D(TStringBuilder() << "enter pack cells method "
@@ -1205,13 +1208,17 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
12051208
<< " current is " << handle.ShardId
12061209
<< " previous readId is " << ptr->ReadId
12071210
<< " current is " << handle.ReadId
1211+
<< " previous seqNo is " << ptr->SeqNo
1212+
<< " current is " << handle.SeqNo
1213+
<< " previous row number is " << ptr->RowIndex
1214+
<< " current is " << rowIndex
12081215
<< " key is " << rowRepr;
12091216
CA_LOG_E(rowMessage);
12101217
Counters->RowsDuplicationsFound->Inc();
12111218
RuntimeError(rowMessage, NYql::NDqProto::StatusIds::INTERNAL_ERROR, {});
12121219
return stats;
12131220
}
1214-
DuplicateCheckStats[result] = {.ReadId = readId , .ShardId = handle.ShardId};
1221+
DuplicateCheckStats[result] = {.ReadId = readId , .ShardId = handle.ShardId, .SeqNo = seqNo, .RowIndex = rowIndex };
12151222
}
12161223

12171224
stats.DataBytes += rowSize;
@@ -1547,6 +1554,8 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
15471554
struct TDuplicationStats {
15481555
ui64 ReadId;
15491556
ui64 ShardId;
1557+
ui64 SeqNo;
1558+
ui64 RowIndex;
15501559
};
15511560
THashMap<TString, TDuplicationStats> DuplicateCheckStats;
15521561
TVector<TResultColumn> DuplicateCheckExtraColumns;

0 commit comments

Comments
 (0)