Skip to content

Commit 2e43e2a

Browse files
authored
Fix unexpected read iterator stream reset (#7697)
1 parent cc68f9c commit 2e43e2a

File tree

4 files changed

+209
-13
lines changed

4 files changed

+209
-13
lines changed

ydb/core/tx/datashard/datashard__read_iterator.cpp

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "datashard_locks_db.h"
66
#include "probes.h"
77

8+
#include <ydb/core/base/counters.h>
89
#include <ydb/core/formats/arrow/arrow_batch_builder.h>
910

1011
#include <ydb/library/actors/core/monotonic_provider.h>
@@ -315,6 +316,8 @@ class TReader {
315316
, Self(self)
316317
, TableId(state.PathId.OwnerId, state.PathId.LocalPathId, state.SchemaVersion)
317318
, FirstUnprocessedQuery(State.FirstUnprocessedQuery)
319+
, LastProcessedKey(State.LastProcessedKey)
320+
, LastProcessedKeyErasedOrMissing(State.LastProcessedKeyErasedOrMissing)
318321
{
319322
GetTimeFast(&StartTime);
320323
EndTime = StartTime;
@@ -329,10 +332,10 @@ class TReader {
329332
bool toInclusive;
330333
TSerializedCellVec keyFromCells;
331334
TSerializedCellVec keyToCells;
332-
if (Y_UNLIKELY(FirstUnprocessedQuery == State.FirstUnprocessedQuery && State.LastProcessedKey)) {
335+
if (LastProcessedKey) {
333336
if (!State.Reverse) {
334-
keyFromCells = TSerializedCellVec(State.LastProcessedKey);
335-
fromInclusive = State.LastProcessedKeyErasedOrMissing;
337+
keyFromCells = TSerializedCellVec(LastProcessedKey);
338+
fromInclusive = LastProcessedKeyErasedOrMissing;
336339

337340
keyToCells = range.To;
338341
toInclusive = range.ToInclusive;
@@ -341,8 +344,8 @@ class TReader {
341344
keyFromCells = range.From;
342345
fromInclusive = range.FromInclusive;
343346

344-
keyToCells = TSerializedCellVec(State.LastProcessedKey);
345-
toInclusive = State.LastProcessedKeyErasedOrMissing;
347+
keyToCells = TSerializedCellVec(LastProcessedKey);
348+
toInclusive = LastProcessedKeyErasedOrMissing;
346349
}
347350
} else {
348351
keyFromCells = range.From;
@@ -505,6 +508,7 @@ class TReader {
505508
while (FirstUnprocessedQuery < State.Request->Ranges.size()) {
506509
if (ReachedTotalRowsLimit()) {
507510
FirstUnprocessedQuery = -1;
511+
LastProcessedKey.clear();
508512
return true;
509513
}
510514

@@ -531,6 +535,7 @@ class TReader {
531535
FirstUnprocessedQuery++;
532536
else
533537
FirstUnprocessedQuery--;
538+
LastProcessedKey.clear();
534539
}
535540

536541
return true;
@@ -542,6 +547,7 @@ class TReader {
542547
while (FirstUnprocessedQuery < State.Request->Keys.size()) {
543548
if (ReachedTotalRowsLimit()) {
544549
FirstUnprocessedQuery = -1;
550+
LastProcessedKey.clear();
545551
return true;
546552
}
547553

@@ -567,6 +573,7 @@ class TReader {
567573
FirstUnprocessedQuery++;
568574
else
569575
FirstUnprocessedQuery--;
576+
LastProcessedKey.clear();
570577
}
571578

572579
return true;
@@ -732,6 +739,28 @@ class TReader {
732739
}
733740

734741
void UpdateState(TReadIteratorState& state, bool sentResult) {
742+
if (state.FirstUnprocessedQuery == FirstUnprocessedQuery &&
743+
state.LastProcessedKey && !LastProcessedKey)
744+
{
745+
LOG_CRIT_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD,
746+
"DataShard " << Self->TabletID() << " detected unexpected reset of LastProcessedKey:"
747+
<< " ReadId# " << State.ReadId
748+
<< " LastSeqNo# " << State.SeqNo
749+
<< " LastQuery# " << State.FirstUnprocessedQuery
750+
<< " RowsRead# " << RowsRead
751+
<< " RowsProcessed# " << RowsProcessed
752+
<< " RowsSinceLastCheck# " << RowsSinceLastCheck
753+
<< " BytesInResult# " << BytesInResult
754+
<< " DeletedRowSkips# " << DeletedRowSkips
755+
<< " InvisibleRowSkips# " << InvisibleRowSkips
756+
<< " Quota.Rows# " << State.Quota.Rows
757+
<< " Quota.Bytes# " << State.Quota.Bytes
758+
<< " State.TotalRows# " << State.TotalRows
759+
<< " State.TotalRowsLimit# " << State.TotalRowsLimit
760+
<< " State.MaxRowsInResult# " << State.MaxRowsInResult);
761+
Self->IncCounterReadIteratorLastKeyReset();
762+
}
763+
735764
state.TotalRows += RowsRead;
736765
state.FirstUnprocessedQuery = FirstUnprocessedQuery;
737766
state.LastProcessedKey = LastProcessedKey;
@@ -1683,6 +1712,7 @@ class TDataShard::TReadOperation : public TOperation, public IReadOperation {
16831712
if (Reader->HasUnreadQueries()) {
16841713
Reader->UpdateState(state, ResultSent);
16851714
if (!state.IsExhausted()) {
1715+
state.ReadContinuePending = true;
16861716
ctx.Send(
16871717
Self->SelfId(),
16881718
new TEvDataShard::TEvReadContinue(ReadId.Sender, ReadId.ReadId));
@@ -2333,6 +2363,15 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase
23332363
Y_ASSERT(it->second);
23342364
auto& state = *it->second;
23352365

2366+
if (state.IsExhausted()) {
2367+
// iterator quota reduced and exhausted while ReadContinue was inflight
2368+
LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " ReadContinue for iterator# " << ReadId
2369+
<< ", quota exhausted while rescheduling");
2370+
state.ReadContinuePending = false;
2371+
Result.reset();
2372+
return true;
2373+
}
2374+
23362375
LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " ReadContinue for iterator# " << ReadId
23372376
<< ", firstUnprocessedQuery# " << state.FirstUnprocessedQuery);
23382377

@@ -2446,6 +2485,7 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase
24462485
if (Reader->Read(txc, ctx)) {
24472486
// Retry later when dependencies are resolved
24482487
if (!Reader->GetVolatileReadDependencies().empty()) {
2488+
state.ReadContinuePending = true;
24492489
Self->WaitVolatileDependenciesThenSend(
24502490
Reader->GetVolatileReadDependencies(),
24512491
Self->SelfId(),
@@ -2532,6 +2572,8 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase
25322572
Y_ABORT_UNLESS(it->second);
25332573
auto& state = *it->second;
25342574

2575+
state.ReadContinuePending = false;
2576+
25352577
if (!Result) {
25362578
LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " read iterator# " << ReadId
25372579
<< " TTxReadContinue::Execute() finished without Result, aborting");
@@ -2579,14 +2621,14 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase
25792621
}
25802622

25812623
if (Reader->HasUnreadQueries()) {
2582-
Y_ASSERT(it->second);
2583-
auto& state = *it->second;
2624+
bool wasExhausted = state.IsExhausted();
25842625
Reader->UpdateState(state, useful);
25852626
if (!state.IsExhausted()) {
2627+
state.ReadContinuePending = true;
25862628
ctx.Send(
25872629
Self->SelfId(),
25882630
new TEvDataShard::TEvReadContinue(ReadId.Sender, ReadId.ReadId));
2589-
} else {
2631+
} else if (!wasExhausted) {
25902632
Self->IncCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT);
25912633
LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID()
25922634
<< " read iterator# " << ReadId << " exhausted");
@@ -2859,14 +2901,19 @@ void TDataShard::Handle(TEvDataShard::TEvReadAck::TPtr& ev, const TActorContext&
28592901
bool wasExhausted = state.IsExhausted();
28602902
state.UpQuota(
28612903
record.GetSeqNo(),
2862-
record.GetMaxRows(),
2863-
record.GetMaxBytes());
2904+
record.HasMaxRows() ? record.GetMaxRows() : Max<ui64>(),
2905+
record.HasMaxBytes() ? record.GetMaxBytes() : Max<ui64>());
28642906

28652907
if (wasExhausted && !state.IsExhausted()) {
28662908
DecCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT);
2867-
ctx.Send(
2868-
SelfId(),
2869-
new TEvDataShard::TEvReadContinue(ev->Sender, record.GetReadId()));
2909+
if (!state.ReadContinuePending) {
2910+
state.ReadContinuePending = true;
2911+
ctx.Send(
2912+
SelfId(),
2913+
new TEvDataShard::TEvReadContinue(ev->Sender, record.GetReadId()));
2914+
}
2915+
} else if (!wasExhausted && state.IsExhausted()) {
2916+
IncCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT);
28702917
}
28712918

28722919
LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, TabletID() << " ReadAck for read iterator# " << readId
@@ -2995,6 +3042,16 @@ void TDataShard::UnsubscribeReadIteratorSessions(const TActorContext& ctx) {
29953042
ReadIteratorSessions.clear();
29963043
}
29973044

3045+
void TDataShard::IncCounterReadIteratorLastKeyReset() {
3046+
if (!CounterReadIteratorLastKeyReset) {
3047+
CounterReadIteratorLastKeyReset = GetServiceCounters(AppData()->Counters, "tablets")
3048+
->GetSubgroup("type", "DataShard")
3049+
->GetSubgroup("category", "app")
3050+
->GetCounter("DataShard/ReadIteratorLastKeyReset", true);
3051+
}
3052+
++*CounterReadIteratorLastKeyReset;
3053+
}
3054+
29983055
} // NKikimr::NDataShard
29993056

30003057
template<>

ydb/core/tx/datashard/datashard_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3322,6 +3322,10 @@ class TDataShard
33223322
bool AllowCancelROwithReadsets() const;
33233323

33243324
void ResolveTablePath(const TActorContext &ctx);
3325+
3326+
public:
3327+
NMonitoring::TDynamicCounters::TCounterPtr CounterReadIteratorLastKeyReset;
3328+
void IncCounterReadIteratorLastKeyReset();
33253329
};
33263330

33273331
NKikimrTxDataShard::TError::EKind ConvertErrCode(NMiniKQL::IEngineFlat::EResult code);

ydb/core/tx/datashard/datashard_ut_read_iterator.cpp

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4627,6 +4627,140 @@ Y_UNIT_TEST_SUITE(DataShardReadIteratorConsistency) {
46274627
"result2: " << result2);
46284628
}
46294629

4630+
template<class TEvType>
4631+
class TBlockEvents : public std::deque<typename TEvType::TPtr> {
4632+
public:
4633+
TBlockEvents(TTestActorRuntime& runtime, std::function<bool(typename TEvType::TPtr&)> condition = {})
4634+
: Runtime(runtime)
4635+
, Condition(std::move(condition))
4636+
, Holder(Runtime.AddObserver<TEvType>(
4637+
[this](typename TEvType::TPtr& ev) {
4638+
this->Process(ev);
4639+
}))
4640+
{}
4641+
4642+
TBlockEvents& Unblock(size_t count = -1) {
4643+
while (!this->empty() && count > 0) {
4644+
auto& ev = this->front();
4645+
IEventHandle* ptr = ev.Get();
4646+
UnblockedOnce.insert(ptr);
4647+
Runtime.Send(ev.Release(), 0, /* viaActorSystem */ true);
4648+
this->pop_front();
4649+
--count;
4650+
}
4651+
return *this;
4652+
}
4653+
4654+
void Stop() {
4655+
UnblockedOnce.clear();
4656+
Holder.Remove();
4657+
}
4658+
4659+
private:
4660+
void Process(typename TEvType::TPtr& ev) {
4661+
IEventHandle* ptr = ev.Get();
4662+
auto it = UnblockedOnce.find(ptr);
4663+
if (it != UnblockedOnce.end()) {
4664+
UnblockedOnce.erase(it);
4665+
return;
4666+
}
4667+
4668+
if (Condition && !Condition(ev)) {
4669+
return;
4670+
}
4671+
4672+
this->emplace_back(std::move(ev));
4673+
}
4674+
4675+
private:
4676+
TTestActorRuntime& Runtime;
4677+
std::function<bool(typename TEvType::TPtr&)> Condition;
4678+
TTestActorRuntime::TEventObserverHolder Holder;
4679+
THashSet<IEventHandle*> UnblockedOnce;
4680+
};
4681+
4682+
Y_UNIT_TEST(Bug_7674_IteratorDuplicateRows) {
4683+
TPortManager pm;
4684+
TServerSettings serverSettings(pm.GetPort(2134));
4685+
serverSettings.SetDomainName("Root")
4686+
.SetUseRealThreads(false);
4687+
TServer::TPtr server = new TServer(serverSettings);
4688+
4689+
auto& runtime = *server->GetRuntime();
4690+
auto sender = runtime.AllocateEdgeActor();
4691+
4692+
runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE);
4693+
4694+
InitRoot(server, sender);
4695+
4696+
TDisableDataShardLogBatching disableDataShardLogBatching;
4697+
4698+
CreateShardedTable(server, sender, "/Root", "table-1", 1);
4699+
4700+
ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50);");
4701+
ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (6, 60), (7, 70), (8, 80), (9, 90), (10, 100);");
4702+
runtime.SimulateSleep(TDuration::Seconds(1));
4703+
4704+
auto forceSmallChunks = runtime.AddObserver<TEvDataShard::TEvRead>(
4705+
[&](TEvDataShard::TEvRead::TPtr& ev) {
4706+
auto* msg = ev->Get();
4707+
// Force chunks of at most 3 rows
4708+
msg->Record.SetMaxRowsInResult(3);
4709+
});
4710+
4711+
TBlockEvents<TEvDataShard::TEvReadAck> blockedAcks(runtime);
4712+
TBlockEvents<TEvDataShard::TEvReadResult> blockedResults(runtime);
4713+
TBlockEvents<TEvDataShard::TEvReadContinue> blockedContinue(runtime);
4714+
4715+
auto waitFor = [&](const TString& description, const auto& condition, size_t count = 1) {
4716+
while (!condition()) {
4717+
UNIT_ASSERT_C(count > 0, "... failed to wait for " << description);
4718+
Cerr << "... waiting for " << description << Endl;
4719+
TDispatchOptions options;
4720+
options.CustomFinalCondition = [&]() {
4721+
return condition();
4722+
};
4723+
runtime.DispatchEvents(options);
4724+
--count;
4725+
}
4726+
};
4727+
4728+
auto readFuture = KqpSimpleSend(runtime, "SELECT key, value FROM `/Root/table-1` ORDER BY key LIMIT 7");
4729+
waitFor("first TEvReadContinue", [&]{ return blockedContinue.size() >= 1; });
4730+
waitFor("first TEvReadResult", [&]{ return blockedResults.size() >= 1; });
4731+
4732+
blockedContinue.Unblock(1);
4733+
waitFor("second TEvReadContinue", [&]{ return blockedContinue.size() >= 1; });
4734+
waitFor("second TEvReadResult", [&]{ return blockedResults.size() >= 2; });
4735+
4736+
// We need both results to arrive without pauses
4737+
blockedResults.Unblock();
4738+
4739+
waitFor("both TEvReadAcks", [&]{ return blockedAcks.size() >= 2; });
4740+
4741+
// Unblock the first TEvReadAck and then pending TEvReadContinue
4742+
blockedAcks.Unblock(1);
4743+
blockedContinue.Unblock(1);
4744+
4745+
// Give it some time to trigger the bug
4746+
runtime.SimulateSleep(TDuration::MilliSeconds(1));
4747+
4748+
// Stop blocking everything
4749+
blockedAcks.Unblock().Stop();
4750+
blockedResults.Unblock().Stop();
4751+
blockedContinue.Unblock().Stop();
4752+
4753+
UNIT_ASSERT_VALUES_EQUAL(
4754+
FormatResult(AwaitResponse(runtime, std::move(readFuture))),
4755+
"{ items { uint32_value: 1 } items { uint32_value: 10 } }, "
4756+
"{ items { uint32_value: 2 } items { uint32_value: 20 } }, "
4757+
"{ items { uint32_value: 3 } items { uint32_value: 30 } }, "
4758+
"{ items { uint32_value: 4 } items { uint32_value: 40 } }, "
4759+
"{ items { uint32_value: 5 } items { uint32_value: 50 } }, "
4760+
"{ items { uint32_value: 6 } items { uint32_value: 60 } }, "
4761+
"{ items { uint32_value: 7 } items { uint32_value: 70 } }");
4762+
}
4763+
46304764
}
46314765

46324766
} // namespace NKikimr

ydb/core/tx/datashard/read_iterator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ struct TReadIteratorState {
205205
TActorId SessionId;
206206
TMonotonic StartTs;
207207
bool IsFinished = false;
208+
bool ReadContinuePending = false;
208209

209210
// note that we send SeqNo's starting from 1
210211
ui64 SeqNo = 0;

0 commit comments

Comments
 (0)