diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 8937cd378bf7..3469111c67cc 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -2162,6 +2162,7 @@ class TKqpDataExecuter : public TKqpExecuterBase consumer; if (operations.HasConsumer()) { @@ -442,7 +442,6 @@ void TKqpQueryState::AddOffsetsToTransaction() { } TopicOperations = NTopic::TTopicOperations(); - for (auto& topic : operations.GetTopics()) { auto path = CanonizePath(NPersQueue::GetFullTopicPath(GetDatabase(), topic.path())); @@ -452,8 +451,7 @@ void TKqpQueryState::AddOffsetsToTransaction() { } else { for (auto& range : partition.partition_offsets()) { YQL_ENSURE(consumer.Defined()); - - TopicOperations.AddOperation(path, partition.partition_id(), *consumer, range); + TopicOperations.AddOperation(path, partition.partition_id(), *consumer, range, partition.force_commit(), partition.kill_read_session(), partition.only_check_commited_to_finish(), partition.read_session_id()); } } } @@ -474,7 +472,7 @@ std::unique_ptr TKqpQueryState::BuildSchemeC auto navigate = std::make_unique(); navigate->DatabaseName = CanonizePath(GetDatabase()); - const auto& operations = GetTopicOperations(); + const auto& operations = GetTopicOperationsFromRequest(); TMaybe consumer; if (operations.HasConsumer()) consumer = operations.GetConsumer(); diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index 57411dde5964..f8df052feda1 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -332,7 +332,7 @@ class TKqpQueryState : public TNonCopyable { return RequestEv->GetQuery(); } - const ::NKikimrKqp::TTopicOperationsRequest& GetTopicOperations() const { + const ::NKikimrKqp::TTopicOperationsRequest& GetTopicOperationsFromRequest() const { return RequestEv->GetTopicOperations(); } diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 137c75b4c084..26a2e05b5c39 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1358,7 +1358,6 @@ class TKqpSessionActor : public TActorBootstrapped { } } } - request.TopicOperations = std::move(txCtx.TopicOperations); } else if (QueryState->ShouldAcquireLocks(tx) && (!txCtx.HasOlapTable || Settings.TableService.GetEnableOlapSink())) { request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); diff --git a/ydb/core/kqp/topics/kqp_topics.cpp b/ydb/core/kqp/topics/kqp_topics.cpp index fcd14d18934a..7dff2f08679c 100644 --- a/ydb/core/kqp/topics/kqp_topics.cpp +++ b/ydb/core/kqp/topics/kqp_topics.cpp @@ -1,6 +1,7 @@ #include "kqp_topics.h" #include +#include #include #include @@ -26,21 +27,50 @@ static void UpdateSupportivePartition(TMaybe& lhs, const TMaybe& rhs // bool TConsumerOperations::IsValid() const { - return Offsets_.GetNumIntervals() == 1; + return Offsets_.GetNumIntervals() <= 1; } -std::pair TConsumerOperations::GetRange() const +std::pair TConsumerOperations::GetOffsetsCommitRange() const { Y_ABORT_UNLESS(IsValid()); - return {Offsets_.Min(), Offsets_.Max()}; + if (Offsets_.Empty()) { + return {0,0}; + } else { + return {Offsets_.Min(), Offsets_.Max()}; + } +} + +bool TConsumerOperations::GetForceCommit() const +{ + return ForceCommit_; +} + +bool TConsumerOperations::GetKillReadSession() const +{ + return KillReadSession_; +} + +bool TConsumerOperations::GetOnlyCheckCommitedToFinish() const +{ + return OnlyCheckCommitedToFinish_; } -void TConsumerOperations::AddOperation(const TString& consumer, const Ydb::Topic::OffsetsRange& range) +TString TConsumerOperations::GetReadSessionId() const +{ + return ReadSessionId_; +} + +void TConsumerOperations::AddOperation(const TString& consumer, + const NKikimrKqp::TTopicOperationsRequest_TopicOffsets_PartitionOffsets_OffsetsRange& range, + bool forceCommit, + bool killReadSession, + bool onlyCheckCommitedToFinish, + const TString& readSessionId) { Y_ABORT_UNLESS(Consumer_.Empty() || Consumer_ == consumer); - AddOperationImpl(consumer, range.start(), range.end()); + AddOperationImpl(consumer, range.start(), range.end(), forceCommit, killReadSession, onlyCheckCommitedToFinish, readSessionId); } void TConsumerOperations::Merge(const TConsumerOperations& rhs) @@ -48,13 +78,22 @@ void TConsumerOperations::Merge(const TConsumerOperations& rhs) Y_ABORT_UNLESS(rhs.Consumer_.Defined()); Y_ABORT_UNLESS(Consumer_.Empty() || Consumer_ == rhs.Consumer_); - for (auto& range : rhs.Offsets_) { - AddOperationImpl(*rhs.Consumer_, range.first, range.second); + if (!rhs.Offsets_.Empty()) { + for (auto& range : rhs.Offsets_) { + AddOperationImpl(*rhs.Consumer_, range.first, range.second, rhs.GetForceCommit(), rhs.GetKillReadSession(), rhs.GetOnlyCheckCommitedToFinish(), rhs.GetReadSessionId()); + } + } else { + AddOperationImpl(*rhs.Consumer_, 0, 0, rhs.GetForceCommit(), rhs.GetKillReadSession(), rhs.GetOnlyCheckCommitedToFinish(), rhs.GetReadSessionId()); } } void TConsumerOperations::AddOperationImpl(const TString& consumer, - ui64 begin, ui64 end) + ui64 begin, + ui64 end, + bool forceCommit, + bool killReadSession, + bool onlyCheckCommitedToFinish, + const TString& readSessionId) { if (Offsets_.Intersects(begin, end)) { ythrow TOffsetsRangeIntersectExpection() << "offset ranges intersect"; @@ -64,7 +103,14 @@ void TConsumerOperations::AddOperationImpl(const TString& consumer, Consumer_ = consumer; } - Offsets_.InsertInterval(begin, end); + if (end != 0) { + Offsets_.InsertInterval(begin, end); + } + + ForceCommit_ = forceCommit; + KillReadSession_ = killReadSession; + OnlyCheckCommitedToFinish_ = onlyCheckCommitedToFinish; + ReadSessionId_ = readSessionId; } // @@ -76,9 +122,14 @@ bool TTopicPartitionOperations::IsValid() const [](auto& x) { return x.second.IsValid(); }); } -void TTopicPartitionOperations::AddOperation(const TString& topic, ui32 partition, +void TTopicPartitionOperations::AddOperation(const TString& topic, + ui32 partition, const TString& consumer, - const Ydb::Topic::OffsetsRange& range) + const NKikimrKqp::TTopicOperationsRequest_TopicOffsets_PartitionOffsets_OffsetsRange& range, + bool forceCommit, + bool killReadSession, + bool onlyCheckCommitedToFinish, + const TString& readSessionId) { Y_ABORT_UNLESS(Topic_.Empty() || Topic_ == topic); Y_ABORT_UNLESS(Partition_.Empty() || Partition_ == partition); @@ -88,7 +139,7 @@ void TTopicPartitionOperations::AddOperation(const TString& topic, ui32 partitio Partition_ = partition; } - Operations_[consumer].AddOperation(consumer, range); + Operations_[consumer].AddOperation(consumer, range, forceCommit, killReadSession, onlyCheckCommitedToFinish, readSessionId); } void TTopicPartitionOperations::AddOperation(const TString& topic, ui32 partition, @@ -117,11 +168,15 @@ void TTopicPartitionOperations::BuildTopicTxs(TTopicOperationTransactions& txs) for (auto& [consumer, operations] : Operations_) { NKikimrPQ::TPartitionOperation* o = t.tx.MutableOperations()->Add(); o->SetPartitionId(*Partition_); - auto [begin, end] = operations.GetRange(); - o->SetBegin(begin); - o->SetEnd(end); + auto [begin, end] = operations.GetOffsetsCommitRange(); + o->SetCommitOffsetsBegin(begin); + o->SetCommitOffsetsEnd(end); o->SetConsumer(consumer); o->SetPath(*Topic_); + o->SetKillReadSession(operations.GetKillReadSession()); + o->SetForceCommit(operations.GetForceCommit()); + o->SetOnlyCheckCommitedToFinish(operations.GetOnlyCheckCommitedToFinish()); + o->SetReadSessionId(operations.GetReadSessionId()); } if (HasWriteOperations_) { @@ -256,14 +311,25 @@ bool TTopicOperations::TabletHasReadOperations(ui64 tabletId) const return false; } -void TTopicOperations::AddOperation(const TString& topic, ui32 partition, +void TTopicOperations::AddOperation(const TString& topic, + ui32 partition, const TString& consumer, - const Ydb::Topic::OffsetsRange& range) + const NKikimrKqp::TTopicOperationsRequest_TopicOffsets_PartitionOffsets_OffsetsRange& range, + bool forceCommit, + bool killReadSession, + bool onlyCheckCommitedToFinish, + const TString& readSessionId + ) { TTopicPartition key{topic, partition}; - Operations_[key].AddOperation(topic, partition, + Operations_[key].AddOperation(topic, + partition, consumer, - range); + range, + forceCommit, + killReadSession, + onlyCheckCommitedToFinish, + readSessionId); HasReadOperations_ = true; } diff --git a/ydb/core/kqp/topics/kqp_topics.h b/ydb/core/kqp/topics/kqp_topics.h index b04e7a186f5e..8eff402d1aa6 100644 --- a/ydb/core/kqp/topics/kqp_topics.h +++ b/ydb/core/kqp/topics/kqp_topics.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -26,20 +27,40 @@ class TConsumerOperations { public: bool IsValid() const; - std::pair GetRange() const; + std::pair GetOffsetsCommitRange() const; - ui64 GetBegin() const; - ui64 GetEnd() const; + ui64 GetOffsetCommitBegin() const; + ui64 GetOffsetCommitEnd() const; + + bool GetForceCommit() const; + bool GetKillReadSession() const; + bool GetOnlyCheckCommitedToFinish() const; + TString GetReadSessionId() const; + + void AddOperation(const TString& consumer, + const NKikimrKqp::TTopicOperationsRequest_TopicOffsets_PartitionOffsets_OffsetsRange& range, + bool forceCommit = false, + bool killReadSession = false, + bool onlyCheckCommitedToFinish = false, + const TString& readSessionId = {}); - void AddOperation(const TString& consumer, const Ydb::Topic::OffsetsRange& range); void Merge(const TConsumerOperations& rhs); private: void AddOperationImpl(const TString& consumer, - ui64 begin, ui64 end); + ui64 begin, + ui64 end, + bool forceCommit = false, + bool killReadSession = false, + bool onlyCheckCommitedToFinish = false, + const TString& readSessionId = {}); TMaybe Consumer_; TDisjointIntervalTree Offsets_; + bool ForceCommit_ = false; + bool KillReadSession_ = false; + bool OnlyCheckCommitedToFinish_ = false; + TString ReadSessionId_; }; struct TTopicOperationTransaction { @@ -53,9 +74,14 @@ class TTopicPartitionOperations { public: bool IsValid() const; - void AddOperation(const TString& topic, ui32 partition, + void AddOperation(const TString& topic, + ui32 partition, const TString& consumer, - const Ydb::Topic::OffsetsRange& range); + const NKikimrKqp::TTopicOperationsRequest_TopicOffsets_PartitionOffsets_OffsetsRange& range, + bool forceCommit = false, + bool killReadSession = false, + bool onlyCheckCommitedToFinish = false, + const TString& readSessionId = {}); void AddOperation(const TString& topic, ui32 partition, TMaybe supportivePartition); @@ -108,7 +134,11 @@ class TTopicOperations { void AddOperation(const TString& topic, ui32 partition, const TString& consumer, - const Ydb::Topic::OffsetsRange& range); + const NKikimrKqp::TTopicOperationsRequest_TopicOffsets_PartitionOffsets_OffsetsRange& range, + bool forceCommit, + bool killReadSession, + bool onlyCheckCommitedToFinish, + const TString& readSessionId); void AddOperation(const TString& topic, ui32 partition, TMaybe supportivePartition); diff --git a/ydb/core/persqueue/events/internal.h b/ydb/core/persqueue/events/internal.h index 176b12f48e66..5f8c15036a28 100644 --- a/ydb/core/persqueue/events/internal.h +++ b/ydb/core/persqueue/events/internal.h @@ -811,11 +811,15 @@ struct TEvPQ { { } - void AddOperation(TString consumer, ui64 begin, ui64 end) { + void AddOperation(TString consumer, ui64 begin, ui64 end, bool forceCommit = false, bool killReadSession = false, bool onlyCheckCommitedToFinish = false, TString readSessionId = {}) { NKikimrPQ::TPartitionOperation operation; - operation.SetBegin(begin); - operation.SetEnd(end); + operation.SetCommitOffsetsBegin(begin); + operation.SetCommitOffsetsEnd(end); operation.SetConsumer(std::move(consumer)); + operation.SetForceCommit(forceCommit); + operation.SetKillReadSession(killReadSession); + operation.SetOnlyCheckCommitedToFinish(onlyCheckCommitedToFinish); + operation.SetReadSessionId(readSessionId); Operations.push_back(std::move(operation)); } diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp index 9a37c987e1ce..74263e2cb98c 100644 --- a/ydb/core/persqueue/partition.cpp +++ b/ydb/core/persqueue/partition.cpp @@ -167,15 +167,14 @@ void TPartition::ReplyOk(const TActorContext& ctx, const ui64 dst, NWilson::TSpa } void TPartition::ReplyGetClientOffsetOk(const TActorContext& ctx, const ui64 dst, const i64 offset, - const TInstant writeTimestamp, const TInstant createTimestamp) { - ctx.Send(Tablet, MakeReplyGetClientOffsetOk(dst, offset, writeTimestamp, createTimestamp).Release()); + const TInstant writeTimestamp, const TInstant createTimestamp, bool consumerHasAnyCommits) { + ctx.Send(Tablet, MakeReplyGetClientOffsetOk(dst, offset, writeTimestamp, createTimestamp, consumerHasAnyCommits).Release()); } NKikimrClient::TKeyValueRequest::EStorageChannel GetChannel(ui32 i) { return NKikimrClient::TKeyValueRequest::EStorageChannel(NKikimrClient::TKeyValueRequest::MAIN + i); } - void AddCheckDiskRequest(TEvKeyValue::TEvRequest *request, ui32 numChannels) { for (ui32 i = 0; i < numChannels; ++i) { request->Record.AddCmdGetStatus()->SetStorageChannel(GetChannel(i)); @@ -841,7 +840,7 @@ void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext } } } - result.SetScaleStatus(SplitMergeEnabled(TabletConfig) ? ScaleStatus :NKikimrPQ::EScaleStatus::NORMAL); + result.SetScaleStatus(SplitMergeEnabled(TabletConfig) ? ScaleStatus : NKikimrPQ::EScaleStatus::NORMAL); ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionStatusResponse(result, Partition)); } @@ -1970,23 +1969,23 @@ void TPartition::ContinueProcessTxsAndUserActs(const TActorContext&) msg->WaitPreviousWriteSpan.End(); } switch (std::visit(visitor, front.Event)) { - case EProcessResult::Continue: - MoveUserActOrTxToCommitState(); - FirstEvent = false; - break; - case EProcessResult::ContinueDrop: - UserActionAndTransactionEvents.pop_front(); - break; - case EProcessResult::Break: - MoveUserActOrTxToCommitState(); - BatchingState = ETxBatchingState::Finishing; - FirstEvent = false; - break; - case EProcessResult::Blocked: - BatchingState = ETxBatchingState::Executing; - return; - case EProcessResult::NotReady: - return; + case EProcessResult::Continue: + MoveUserActOrTxToCommitState(); + FirstEvent = false; + break; + case EProcessResult::ContinueDrop: + UserActionAndTransactionEvents.pop_front(); + break; + case EProcessResult::Break: + MoveUserActOrTxToCommitState(); + BatchingState = ETxBatchingState::Finishing; + FirstEvent = false; + break; + case EProcessResult::Blocked: + BatchingState = ETxBatchingState::Executing; + return; + case EProcessResult::NotReady: + return; } CurrentBatchSize += 1; } @@ -2297,15 +2296,15 @@ bool TPartition::ExecUserActionOrTransaction(TSimpleSharedPtr& t, return true; } -TPartition::EProcessResult TPartition::BeginTransaction(const TEvPQ::TEvTxCalcPredicate& tx, TMaybe& predicate) +TPartition::EProcessResult TPartition::BeginTransaction(const TEvPQ::TEvTxCalcPredicate& tx, TMaybe& predicateOut) { if (tx.ForcePredicateFalse) { - predicate = false; + predicateOut = false; return EProcessResult::Continue; } THashSet consumers; - bool ok = true; + bool result = true; for (auto& operation : tx.Operations) { const TString& consumer = operation.GetConsumer(); if (TxAffectedConsumers.contains(consumer)) { @@ -2318,55 +2317,69 @@ TPartition::EProcessResult TPartition::BeginTransaction(const TEvPQ::TEvTxCalcPr if (AffectedUsers.contains(consumer) && !GetPendingUserIfExists(consumer)) { PQ_LOG_D("Partition " << Partition << " Consumer '" << consumer << "' has been removed"); - ok = false; + result = false; break; } if (!UsersInfoStorage->GetIfExists(consumer)) { PQ_LOG_D("Partition " << Partition << " Unknown consumer '" << consumer << "'"); - ok = false; + result = false; break; } bool isAffectedConsumer = AffectedUsers.contains(consumer); TUserInfoBase& userInfo = GetOrCreatePendingUser(consumer); - if (operation.GetBegin() > operation.GetEnd()) { - PQ_LOG_D("Partition " << Partition << - " Consumer '" << consumer << "'" << - " Bad request (invalid range) " << - " Begin " << operation.GetBegin() << - " End " << operation.GetEnd()); - ok = false; - } else if (userInfo.Offset != (i64)operation.GetBegin()) { - PQ_LOG_D("Partition " << Partition << - " Consumer '" << consumer << "'" << - " Bad request (gap) " << - " Offset " << userInfo.Offset << - " Begin " << operation.GetBegin()); - ok = false; - } else if (operation.GetEnd() > EndOffset) { + if (!operation.GetReadSessionId().empty() && operation.GetReadSessionId() != userInfo.Session) { PQ_LOG_D("Partition " << Partition << - " Consumer '" << consumer << "'" << - " Bad request (behind the last offset) " << - " EndOffset " << EndOffset << - " End " << operation.GetEnd()); - ok = false; - } + " Consumer '" << consumer << "'" << + " Bad request (session already dead) " << + " RequestSessionId '" << operation.GetReadSessionId() << + " CurrentSessionId '" << userInfo.Session << + "'"); + result = false; + } else if (operation.GetOnlyCheckCommitedToFinish()) { + if (IsActive() || static_cast(userInfo.Offset) != EndOffset) { + result = false; + } + } else { + if (!operation.GetForceCommit() && operation.GetCommitOffsetsBegin() > operation.GetCommitOffsetsEnd()) { + PQ_LOG_D("Partition " << Partition << + " Consumer '" << consumer << "'" << + " Bad request (invalid range) " << + " Begin " << operation.GetCommitOffsetsBegin() << + " End " << operation.GetCommitOffsetsEnd()); + result = false; + } else if (!operation.GetForceCommit() && userInfo.Offset != (i64)operation.GetCommitOffsetsBegin()) { + PQ_LOG_D("Partition " << Partition << + " Consumer '" << consumer << "'" << + " Bad request (gap) " << + " Offset " << userInfo.Offset << + " Begin " << operation.GetCommitOffsetsBegin()); + result = false; + } else if (!operation.GetForceCommit() && operation.GetCommitOffsetsEnd() > EndOffset) { + PQ_LOG_D("Partition " << Partition << + " Consumer '" << consumer << "'" << + " Bad request (behind the last offset) " << + " EndOffset " << EndOffset << + " End " << operation.GetCommitOffsetsEnd()); + result = false; + } - if (!ok) { - if (!isAffectedConsumer) { - AffectedUsers.erase(consumer); + if (!result) { + if (!isAffectedConsumer) { + AffectedUsers.erase(consumer); + } + break; } - break; + consumers.insert(consumer); } - consumers.insert(consumer); } - if (ok) { + if (result) { TxAffectedConsumers.insert(consumers.begin(), consumers.end()); } - predicate = ok; + predicateOut = result; return EProcessResult::Continue; } @@ -2513,12 +2526,40 @@ void TPartition::CommitTransaction(TSimpleSharedPtr& t) Y_ABORT_UNLESS(t->Predicate.Defined() && *t->Predicate); for (auto& operation : t->Tx->Operations) { + if (operation.GetOnlyCheckCommitedToFinish()) { + continue; + } + TUserInfoBase& userInfo = GetOrCreatePendingUser(operation.GetConsumer()); - Y_ABORT_UNLESS(userInfo.Offset == (i64)operation.GetBegin()); + if (!operation.GetForceCommit()) { + Y_ABORT_UNLESS(userInfo.Offset == (i64)operation.GetCommitOffsetsBegin()); + } - userInfo.Offset = operation.GetEnd(); + if ((i64)operation.GetCommitOffsetsEnd() < userInfo.Offset && !operation.GetReadSessionId().empty()) { + continue; // this is stale request, answer ok for it + } + + if (operation.GetCommitOffsetsEnd() <= StartOffset) { + userInfo.AnyCommits = false; + userInfo.Offset = StartOffset; + } else if (operation.GetCommitOffsetsEnd() > EndOffset) { + userInfo.AnyCommits = true; + userInfo.Offset = EndOffset; + } else { + userInfo.AnyCommits = true; + userInfo.Offset = operation.GetCommitOffsetsEnd(); + } + + if (operation.GetKillReadSession()) { + userInfo.Session = ""; + userInfo.PartitionSessionId = 0; + userInfo.Generation = 0; + userInfo.Step = 0; + userInfo.PipeClient = {}; + } } + CommitWriteOperations(*t); ChangePlanStepAndTxId(t->Tx->Step, t->Tx->TxId); ScheduleReplyCommitDone(t->Tx->Step, t->Tx->TxId); @@ -2640,6 +2681,9 @@ void TPartition::OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx) userInfo.Generation = actual->Generation; userInfo.Step = actual->Step; userInfo.Offset = actual->Offset; + if (userInfo.Offset <= (i64)StartOffset) { + userInfo.AnyCommits = false; + } userInfo.ReadRuleGeneration = actual->ReadRuleGeneration; userInfo.ReadFromTimestamp = actual->ReadFromTimestamp; userInfo.HasReadRule = true; @@ -2784,12 +2828,12 @@ TPartition::EProcessResult TPartition::PreProcessImmediateTx(const NKikimrPQ::TE Y_ABORT_UNLESS(tx.HasData()); THashSet consumers; for (auto& operation : tx.GetData().GetOperations()) { - if (!operation.HasBegin() || !operation.HasEnd() || !operation.HasConsumer()) { + if (!operation.HasCommitOffsetsBegin() || !operation.HasCommitOffsetsEnd() || !operation.HasConsumer()) { continue; //Write operation - handled separately via WriteInfo } - Y_ABORT_UNLESS(operation.GetBegin() <= (ui64)Max(), "Unexpected begin offset: %" PRIu64, operation.GetBegin()); - Y_ABORT_UNLESS(operation.GetEnd() <= (ui64)Max(), "Unexpected end offset: %" PRIu64, operation.GetEnd()); + Y_ABORT_UNLESS(operation.GetCommitOffsetsBegin() <= (ui64)Max(), "Unexpected begin offset: %" PRIu64, operation.GetCommitOffsetsBegin()); + Y_ABORT_UNLESS(operation.GetCommitOffsetsEnd() <= (ui64)Max(), "Unexpected end offset: %" PRIu64, operation.GetCommitOffsetsEnd()); const TString& user = operation.GetConsumer(); if (TxAffectedConsumers.contains(user)) { @@ -2802,7 +2846,7 @@ TPartition::EProcessResult TPartition::PreProcessImmediateTx(const NKikimrPQ::TE "the consumer has been deleted"); return EProcessResult::ContinueDrop; } - if (operation.GetBegin() > operation.GetEnd()) { + if (operation.GetCommitOffsetsBegin() > operation.GetCommitOffsetsEnd()) { ScheduleReplyPropose(tx, NKikimrPQ::TEvProposeTransactionResult::BAD_REQUEST, NKikimrPQ::TError::BAD_REQUEST, @@ -2833,12 +2877,12 @@ void TPartition::ExecImmediateTx(TTransaction& t) return; } for (const auto& operation : record.GetData().GetOperations()) { - if (!operation.HasBegin() || !operation.HasEnd() || !operation.HasConsumer()) { + if (!operation.HasCommitOffsetsBegin() || !operation.HasCommitOffsetsEnd() || !operation.HasConsumer()) { continue; //Write operation - handled separately via WriteInfo } - Y_ABORT_UNLESS(operation.GetBegin() <= (ui64)Max(), "Unexpected begin offset: %" PRIu64, operation.GetBegin()); - Y_ABORT_UNLESS(operation.GetEnd() <= (ui64)Max(), "Unexpected end offset: %" PRIu64, operation.GetEnd()); + Y_ABORT_UNLESS(operation.GetCommitOffsetsBegin() <= (ui64)Max(), "Unexpected begin offset: %" PRIu64, operation.GetCommitOffsetsBegin()); + Y_ABORT_UNLESS(operation.GetCommitOffsetsEnd() <= (ui64)Max(), "Unexpected end offset: %" PRIu64, operation.GetCommitOffsetsEnd()); const TString& user = operation.GetConsumer(); if (!PendingUsersInfo.contains(user) && AffectedUsers.contains(user)) { @@ -2848,9 +2892,9 @@ void TPartition::ExecImmediateTx(TTransaction& t) "the consumer has been deleted"); return; } - TUserInfoBase& userInfo = GetOrCreatePendingUser(user); + TUserInfoBase& pendingUserInfo = GetOrCreatePendingUser(user); - if (operation.GetBegin() > operation.GetEnd()) { + if (!operation.GetForceCommit() && operation.GetCommitOffsetsBegin() > operation.GetCommitOffsetsEnd()) { ScheduleReplyPropose(record, NKikimrPQ::TEvProposeTransactionResult::BAD_REQUEST, NKikimrPQ::TError::BAD_REQUEST, @@ -2858,7 +2902,7 @@ void TPartition::ExecImmediateTx(TTransaction& t) return; } - if (userInfo.Offset != (i64)operation.GetBegin()) { + if (!operation.GetForceCommit() && pendingUserInfo.Offset != (i64)operation.GetCommitOffsetsBegin()) { ScheduleReplyPropose(record, NKikimrPQ::TEvProposeTransactionResult::ABORTED, NKikimrPQ::TError::BAD_REQUEST, @@ -2866,14 +2910,14 @@ void TPartition::ExecImmediateTx(TTransaction& t) return; } - if (operation.GetEnd() > EndOffset) { + if (!operation.GetForceCommit() && operation.GetCommitOffsetsEnd() > EndOffset) { ScheduleReplyPropose(record, NKikimrPQ::TEvProposeTransactionResult::BAD_REQUEST, NKikimrPQ::TError::BAD_REQUEST, "incorrect offset range (commit to the future)"); return; } - userInfo.Offset = operation.GetEnd(); + pendingUserInfo.Offset = operation.GetCommitOffsetsEnd(); } CommitWriteOperations(t); @@ -3009,7 +3053,7 @@ void TPartition::CommitUserAct(TEvPQ::TEvSetClientInfo& act) { userInfo.PipeClient = act.PipeClient; ScheduleReplyGetClientOffsetOk(act.Cookie, userInfo.Offset, - ts.first, ts.second); + ts.first, ts.second, ui->AnyCommits); return; } @@ -3105,15 +3149,16 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act, ui32 step = act.Step; const ui64 readRuleGeneration = act.ReadRuleGeneration; - bool setSession = act.Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION; + bool createSession = act.Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION; bool dropSession = act.Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION; - bool strictCommitOffset = (act.Type == TEvPQ::TEvSetClientInfo::ESCI_OFFSET && act.SessionId.empty()); + bool commitNotFromReadSession = (act.Type == TEvPQ::TEvSetClientInfo::ESCI_OFFSET && act.SessionId.empty()); if (act.Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE) { userInfo.ReadRuleGeneration = 0; userInfo.Session = ""; userInfo.Generation = userInfo.Step = 0; userInfo.Offset = 0; + userInfo.AnyCommits = false; PQ_LOG_D("Topic '" << TopicName() << "' partition " << Partition << " user " << user << " drop done" @@ -3129,30 +3174,31 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act, userInfo.PartitionSessionId = 0; userInfo.Generation = userInfo.Step = 0; userInfo.Offset = 0; + userInfo.AnyCommits = false; if (userInfo.Important) { userInfo.Offset = StartOffset; } } else { - if (setSession || dropSession) { + if (createSession || dropSession) { offset = userInfo.Offset; auto *ui = UsersInfoStorage->GetIfExists(userInfo.User); auto ts = ui ? GetTime(*ui, userInfo.Offset) : std::make_pair(TInstant::Zero(), TInstant::Zero()); ScheduleReplyGetClientOffsetOk(act.Cookie, offset, - ts.first, ts.second); + ts.first, ts.second, ui ? ui->AnyCommits : false); } else { ScheduleReplyOk(act.Cookie); } - if (setSession) { + if (createSession) { userInfo.Session = session; userInfo.Generation = generation; userInfo.Step = step; userInfo.PipeClient = act.PipeClient; userInfo.PartitionSessionId = act.PartitionSessionId; - } else if ((dropSession && act.PipeClient == userInfo.PipeClient) || strictCommitOffset) { + } else if ((dropSession && act.PipeClient == userInfo.PipeClient) || commitNotFromReadSession) { userInfo.Session = ""; userInfo.PartitionSessionId = 0; userInfo.Generation = 0; @@ -3162,17 +3208,20 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act, Y_ABORT_UNLESS(offset <= (ui64)Max(), "Unexpected Offset: %" PRIu64, offset); PQ_LOG_D("Topic '" << TopicName() << "' partition " << Partition << " user " << user - << (setSession || dropSession ? " session" : " offset") + << (createSession || dropSession ? " session" : " offset") << " is set to " << offset << " (startOffset " << StartOffset << ") session " << session ); userInfo.Offset = offset; + if (userInfo.Offset <= (i64)StartOffset) { + userInfo.AnyCommits = false; + } if (LastOffsetHasBeenCommited(userInfo)) { SendReadingFinished(user); } - auto counter = setSession ? COUNTER_PQ_CREATE_SESSION_OK : (dropSession ? COUNTER_PQ_DELETE_SESSION_OK : COUNTER_PQ_SET_CLIENT_OFFSET_OK); + auto counter = createSession ? COUNTER_PQ_CREATE_SESSION_OK : (dropSession ? COUNTER_PQ_DELETE_SESSION_OK : COUNTER_PQ_SET_CLIENT_OFFSET_OK); TabletCounters.Cumulative()[counter].Increment(1); } } @@ -3185,12 +3234,16 @@ void TPartition::ScheduleReplyOk(const ui64 dst) void TPartition::ScheduleReplyGetClientOffsetOk(const ui64 dst, const i64 offset, - const TInstant writeTimestamp, const TInstant createTimestamp) + const TInstant writeTimestamp, + const TInstant createTimestamp, + bool consumerHasAnyCommits) { Replies.emplace_back(Tablet, MakeReplyGetClientOffsetOk(dst, offset, - writeTimestamp, createTimestamp).Release()); + writeTimestamp, + createTimestamp, + consumerHasAnyCommits).Release()); } @@ -3265,7 +3318,8 @@ void TPartition::AddCmdWrite(NKikimrClient::TKeyValueRequest& request, const TKeyPrefix& ikey, const TKeyPrefix& ikeyDeprecated, ui64 offset, ui32 gen, ui32 step, const TString& session, ui64 readOffsetRewindSum, - ui64 readRuleGeneration) + ui64 readRuleGeneration, + bool anyCommits) { TBuffer idata; { @@ -3276,6 +3330,7 @@ void TPartition::AddCmdWrite(NKikimrClient::TKeyValueRequest& request, userData.SetSession(session); userData.SetOffsetRewindSum(readOffsetRewindSum); userData.SetReadRuleGeneration(readRuleGeneration); + userData.SetAnyCommits(anyCommits); TString out; Y_PROTOBUF_SUPPRESS_NODISCARD userData.SerializeToString(&out); @@ -3336,7 +3391,8 @@ void TPartition::AddCmdWriteUserInfos(NKikimrClient::TKeyValueRequest& request) userInfo->Offset, userInfo->Generation, userInfo->Step, userInfo->Session, ui ? ui->ReadOffsetRewindSum : 0, - userInfo->ReadRuleGeneration); + userInfo->ReadRuleGeneration, + userInfo->AnyCommits); } else { AddCmdDeleteRange(request, ikey, ikeyDeprecated); @@ -3365,27 +3421,27 @@ TUserInfoBase& TPartition::GetOrCreatePendingUser(const TString& user, TMaybe readRuleGeneration) { TUserInfoBase* userInfo = nullptr; - auto i = PendingUsersInfo.find(user); - if (i == PendingUsersInfo.end()) { - auto ui = UsersInfoStorage->GetIfExists(user); - auto [p, _] = PendingUsersInfo.emplace(user, UsersInfoStorage->CreateUserInfo(user, readRuleGeneration)); + auto pendingUserIt = PendingUsersInfo.find(user); + if (pendingUserIt == PendingUsersInfo.end()) { + auto userIt = UsersInfoStorage->GetIfExists(user); + auto [newPendingUserIt, _] = PendingUsersInfo.emplace(user, UsersInfoStorage->CreateUserInfo(user, readRuleGeneration)); - if (ui) { - p->second.Session = ui->Session; - p->second.PartitionSessionId = ui->PartitionSessionId; - p->second.PipeClient = ui->PipeClient; + if (userIt) { + newPendingUserIt->second.Session = userIt->Session; + newPendingUserIt->second.PartitionSessionId = userIt->PartitionSessionId; + newPendingUserIt->second.PipeClient = userIt->PipeClient; - p->second.Generation = ui->Generation; - p->second.Step = ui->Step; - p->second.Offset = ui->Offset; - p->second.ReadRuleGeneration = ui->ReadRuleGeneration; - p->second.Important = ui->Important; - p->second.ReadFromTimestamp = ui->ReadFromTimestamp; + newPendingUserIt->second.Generation = userIt->Generation; + newPendingUserIt->second.Step = userIt->Step; + newPendingUserIt->second.Offset = userIt->Offset; + newPendingUserIt->second.ReadRuleGeneration = userIt->ReadRuleGeneration; + newPendingUserIt->second.Important = userIt->Important; + newPendingUserIt->second.ReadFromTimestamp = userIt->ReadFromTimestamp; } - userInfo = &p->second; + userInfo = &newPendingUserIt->second; } else { - userInfo = &i->second; + userInfo = &pendingUserIt->second; } AffectedUsers.insert(user); @@ -3415,7 +3471,8 @@ THolder TPartition::MakeReplyOk(const ui64 dst) THolder TPartition::MakeReplyGetClientOffsetOk(const ui64 dst, const i64 offset, const TInstant writeTimestamp, - const TInstant createTimestamp) + const TInstant createTimestamp, + bool consumerHasAnyCommits) { auto response = MakeHolder(dst); NKikimrClient::TResponse& resp = *response->Response; @@ -3439,10 +3496,9 @@ THolder TPartition::MakeReplyGetClientOffsetOk(const ui } else { user->SetSizeLag(0); } - + user->SetClientHasAnyCommits(consumerHasAnyCommits); return response; } - THolder TPartition::MakeReplyError(const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error) diff --git a/ydb/core/persqueue/partition.h b/ydb/core/persqueue/partition.h index 1cfa15852463..f780be84169c 100644 --- a/ydb/core/persqueue/partition.h +++ b/ydb/core/persqueue/partition.h @@ -158,7 +158,7 @@ class TPartition : public TActorBootstrapped { NKikimrPQ::TError::EKind kind, const TString& reason); void ReplyErrorForStoredWrites(const TActorContext& ctx); - void ReplyGetClientOffsetOk(const TActorContext& ctx, const ui64 dst, const i64 offset, const TInstant writeTimestamp, const TInstant createTimestamp); + void ReplyGetClientOffsetOk(const TActorContext& ctx, const ui64 dst, const i64 offset, const TInstant writeTimestamp, const TInstant createTimestamp, bool consumerHasAnyCommits); void ReplyOk(const TActorContext& ctx, const ui64 dst); void ReplyOk(const TActorContext& ctx, const ui64 dst, NWilson::TSpan& span); void ReplyOwnerOk(const TActorContext& ctx, const ui64 dst, const TString& ownerCookie, ui64 seqNo, NWilson::TSpan& span); @@ -345,7 +345,9 @@ class TPartition : public TActorBootstrapped { void ScheduleReplyOk(const ui64 dst); void ScheduleReplyGetClientOffsetOk(const ui64 dst, const i64 offset, - const TInstant writeTimestamp, const TInstant createTimestamp); + const TInstant writeTimestamp, + const TInstant createTimestamp, + bool consumerHasAnyCommits); void ScheduleReplyError(const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); @@ -361,7 +363,8 @@ class TPartition : public TActorBootstrapped { const TKeyPrefix& ikey, const TKeyPrefix& ikeyDeprecated, ui64 offset, ui32 gen, ui32 step, const TString& session, ui64 readOffsetRewindSum, - ui64 readRuleGeneration); + ui64 readRuleGeneration, + bool anyCommits); void AddCmdWriteTxMeta(NKikimrClient::TKeyValueRequest& request); void AddCmdWriteUserInfos(NKikimrClient::TKeyValueRequest& request); void AddCmdWriteConfig(NKikimrClient::TKeyValueRequest& request); @@ -374,7 +377,9 @@ class TPartition : public TActorBootstrapped { THolder MakeReplyOk(const ui64 dst); THolder MakeReplyGetClientOffsetOk(const ui64 dst, const i64 offset, - const TInstant writeTimestamp, const TInstant createTimestamp); + const TInstant writeTimestamp, + const TInstant createTimestamp, + bool consumerHasAnyCommits); THolder MakeReplyError(const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); diff --git a/ydb/core/persqueue/partition_read.cpp b/ydb/core/persqueue/partition_read.cpp index 4161ea3fd99d..3fb9d083233f 100644 --- a/ydb/core/persqueue/partition_read.cpp +++ b/ydb/core/persqueue/partition_read.cpp @@ -124,10 +124,10 @@ TAutoPtr TPartition::MakeHasDataInfoRespon ui32 partitionId = Partition.OriginalPartitionId; auto* node = PartitionGraph.GetPartition(partitionId); - for (auto* child : node->Children) { + for (auto* child : node->DirectChildren) { res->Record.AddChildPartitionIds(child->Id); - for (auto* p : child->Parents) { + for (auto* p : child->DirectParents) { if (p->Id != partitionId) { res->Record.AddAdjacentPartitionIds(p->Id); } @@ -252,7 +252,7 @@ void TPartition::InitUserInfoForImportantClients(const TActorContext& ctx) { } if (!userInfo) { userInfo = &UsersInfoStorage->Create( - ctx, consumer.GetName(), 0, true, "", 0, 0, 0, 0, 0, TInstant::Zero(), {} + ctx, consumer.GetName(), 0, true, "", 0, 0, 0, 0, 0, TInstant::Zero(), {}, false ); } if (userInfo->Offset < (i64)StartOffset) @@ -314,7 +314,7 @@ void TPartition::Handle(TEvPQ::TEvGetClientOffset::TPtr& ev, const TActorContext ui64 offset = Max(userInfo.Offset, 0); auto ts = GetTime(userInfo, offset); TabletCounters.Cumulative()[COUNTER_PQ_GET_CLIENT_OFFSET_OK].Increment(1); - ReplyGetClientOffsetOk(ctx, ev->Get()->Cookie, userInfo.Offset, ts.first, ts.second); + ReplyGetClientOffsetOk(ctx, ev->Get()->Cookie, userInfo.Offset, ts.first, ts.second, userInfo.AnyCommits); } void TPartition::Handle(TEvPQ::TEvSetClientInfo::TPtr& ev, const TActorContext& ctx) { @@ -776,7 +776,6 @@ void TPartition::Handle(TEvPQ::TEvRead::TPtr& ev, const TActorContext& ctx) { const TString& user = read->ClientId; auto& userInfo = UsersInfoStorage->GetOrCreate(user, ctx); - if (!read->SessionId.empty() && !userInfo.NoConsumer) { if (userInfo.Session != read->SessionId) { TabletCounters.Cumulative()[COUNTER_PQ_READ_ERROR_NO_SESSION].Increment(1); diff --git a/ydb/core/persqueue/partition_scale_manager.cpp b/ydb/core/persqueue/partition_scale_manager.cpp index 77f16ab019ea..3360752d7653 100644 --- a/ydb/core/persqueue/partition_scale_manager.cpp +++ b/ydb/core/persqueue/partition_scale_manager.cpp @@ -74,7 +74,7 @@ std::pair, std::vector> TPartition auto partitionId = PartitionsToSplit.begin(); while (allowedSplitsCount > 0 && partitionId != PartitionsToSplit.end()) { auto* node = PartitionGraph.GetPartition(*partitionId); - if (node->Children.empty()) { + if (node->DirectChildren.empty()) { auto from = node->From; auto to = node->To; auto mid = MiddleOf(from, to); diff --git a/ydb/core/persqueue/partition_sourcemanager.cpp b/ydb/core/persqueue/partition_sourcemanager.cpp index c9214300384a..c2b6d5a0edd4 100644 --- a/ydb/core/persqueue/partition_sourcemanager.cpp +++ b/ydb/core/persqueue/partition_sourcemanager.cpp @@ -50,7 +50,7 @@ TSourceIdStorage& TPartitionSourceManager::GetSourceIdStorage() const { bool TPartitionSourceManager::HasParents() const { auto node = GetPartitionNode(); - return node && !node->Parents.empty(); + return node && !node->DirectParents.empty(); } diff --git a/ydb/core/persqueue/pq_impl.cpp b/ydb/core/persqueue/pq_impl.cpp index 2918c297ba7c..baae65944a6a 100644 --- a/ydb/core/persqueue/pq_impl.cpp +++ b/ydb/core/persqueue/pq_impl.cpp @@ -3198,7 +3198,7 @@ bool TPersQueue::CheckTxWriteOperations(const NKikimrPQ::TDataTransaction& txBod for (auto& operation : txBody.GetOperations()) { auto isWrite = [](const NKikimrPQ::TPartitionOperation& o) { - return !o.HasBegin(); + return !o.HasCommitOffsetsBegin(); }; if (isWrite(operation)) { @@ -3921,7 +3921,7 @@ TMaybe TPersQueue::FindPartitionId(const NKikimrPQ::TDataTransacti { auto hasWriteOperation = [](const auto& txBody) { for (const auto& o : txBody.GetOperations()) { - if (!o.HasBegin()) { + if (!o.HasCommitOffsetsBegin()) { return true; } } @@ -3973,10 +3973,14 @@ void TPersQueue::SendEvTxCalcPredicateToPartitions(const TActorContext& ctx, event = std::make_unique(tx.Step, tx.TxId); } - if (operation.HasBegin()) { + if (operation.HasCommitOffsetsBegin()) { event->AddOperation(operation.GetConsumer(), - operation.GetBegin(), - operation.GetEnd()); + operation.GetCommitOffsetsBegin(), + operation.GetCommitOffsetsEnd(), + operation.HasForceCommit() ? operation.GetForceCommit() : false, + operation.HasKillReadSession() ? operation.GetKillReadSession() : false, + operation.HasOnlyCheckCommitedToFinish() ? operation.GetOnlyCheckCommitedToFinish() : false, + operation.HasReadSessionId() ? operation.GetReadSessionId() : ""); } } diff --git a/ydb/core/persqueue/read_balancer__balancing.cpp b/ydb/core/persqueue/read_balancer__balancing.cpp index 79218cdff6b9..f8f97a4c4795 100644 --- a/ydb/core/persqueue/read_balancer__balancing.cpp +++ b/ydb/core/persqueue/read_balancer__balancing.cpp @@ -681,7 +681,7 @@ bool IsRoot(const TPartitionGraph::Node* node, const std::unordered_set& p if (node->IsRoot()) { return true; } - for (auto* p : node->Parents) { + for (auto* p : node->DirectParents) { if (partitions.contains(p->Id)) { return false; } @@ -966,10 +966,10 @@ bool TConsumer::IsReadable(ui32 partitionId) { } if (Partitions.empty()) { - return node->Parents.empty(); + return node->DirectParents.empty(); } - for(auto* parent : node->HierarhicalParents) { + for(auto* parent : node->AllParents) { if (!IsInactive(parent->Id)) { return false; } @@ -1035,9 +1035,9 @@ bool TConsumer::ProccessReadingFinished(ui32 partitionId, bool wasInactive, cons if (family->CanAttach(std::vector{id})) { auto* node = GetPartitionGraph().GetPartition(id); bool allParentsMerged = true; - if (node->Parents.size() > 1) { + if (node->DirectParents.size() > 1) { // The partition was obtained as a result of the merge. - for (auto* c : node->Parents) { + for (auto* c : node->DirectParents) { auto* other = FindFamily(c->Id); if (!other) { allParentsMerged = false; diff --git a/ydb/core/persqueue/read_balancer__balancing_app.cpp b/ydb/core/persqueue/read_balancer__balancing_app.cpp index fd565135d162..71c7b72150fb 100644 --- a/ydb/core/persqueue/read_balancer__balancing_app.cpp +++ b/ydb/core/persqueue/read_balancer__balancing_app.cpp @@ -76,7 +76,7 @@ void TBalancer::RenderApp(NApp::TNavigationBar& __navigationBar) const { for (auto& [partitionId, partition] : consumer->Partitions) { const auto* family = consumer->FindFamily(partitionId); const auto* node = consumer->GetPartitionGraph().GetPartition(partitionId); - TString style = node && node->Children.empty() ? "text-success" : "text-muted"; + TString style = node && node->DirectChildren.empty() ? "text-success" : "text-muted"; auto* partitionInfo = GetPartitionInfo(partitionId); TABLER() { @@ -110,7 +110,7 @@ void TBalancer::RenderApp(NApp::TNavigationBar& __navigationBar) const { } TABLED() { if (node) { - for (auto* parent : node->Parents) { + for (auto* parent : node->DirectParents) { HREF("#" + partitionAnchor(parent->Id)) { __stream << parent->Id; } __stream << ", "; } diff --git a/ydb/core/persqueue/read_balancer_app.cpp b/ydb/core/persqueue/read_balancer_app.cpp index d4f9f8abf137..9ca5c425258b 100644 --- a/ydb/core/persqueue/read_balancer_app.cpp +++ b/ydb/core/persqueue/read_balancer_app.cpp @@ -77,7 +77,7 @@ TString TPersQueueReadBalancer::GenerateStat() { for (auto& [partitionId, partitionInfo] : PartitionsInfo) { const auto& stats = AggregatedStats.Stats[partitionId]; const auto* node = PartitionGraph.GetPartition(partitionId); - TString style = node && node->Children.empty() ? "text-success" : "text-muted"; + TString style = node && node->DirectChildren.empty() ? "text-success" : "text-muted"; TABLER() { TABLED() { @@ -87,7 +87,7 @@ TString TPersQueueReadBalancer::GenerateStat() { } TABLED() { if (node) { - str << (node->Children.empty() ? "Active" : "Inactive"); + str << (node->DirectChildren.empty() ? "Active" : "Inactive"); if (node->IsRoot()) { str << " (root)"; } @@ -96,7 +96,7 @@ TString TPersQueueReadBalancer::GenerateStat() { TABLED() { HREF(TStringBuilder() << "?TabletID=" << partitionInfo.TabletId) { str << partitionInfo.TabletId; } } TABLED() { if (node) { - for (auto* parent : node->Parents) { + for (auto* parent : node->DirectParents) { HREF("#" + partitionAnchor(parent->Id)) { str << parent->Id; } str << ", "; } @@ -104,7 +104,7 @@ TString TPersQueueReadBalancer::GenerateStat() { } TABLED() { if (node) { - for (auto* child : node->Children) { + for (auto* child : node->DirectChildren) { HREF("#" + partitionAnchor(child->Id)) { str << child->Id; } str << ", "; } diff --git a/ydb/core/persqueue/transaction.cpp b/ydb/core/persqueue/transaction.cpp index 63fe2a184b20..25d5ee46f01f 100644 --- a/ydb/core/persqueue/transaction.cpp +++ b/ydb/core/persqueue/transaction.cpp @@ -77,7 +77,7 @@ void TDistributedTransaction::InitPartitions(const google::protobuf::RepeatedPtr Partitions.clear(); for (auto& o : operations) { - if (!o.HasBegin()) { + if (!o.HasCommitOffsetsBegin()) { HasWriteOperations = true; } @@ -185,16 +185,16 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TConfigTrans continue; } - if (node->Children.empty()) { - for (const auto* r : node->Parents) { + if (node->DirectChildren.empty()) { + for (const auto* r : node->DirectParents) { if (extractTabletId != r->TabletId) { PredicatesReceived[r->TabletId].SetTabletId(r->TabletId); } } } - for (const auto* r : node->Children) { - if (r->Children.empty()) { + for (const auto* r : node->DirectChildren) { + if (r->DirectChildren.empty()) { if (extractTabletId != r->TabletId) { PredicateRecipients[r->TabletId] = false; } diff --git a/ydb/core/persqueue/user_info.cpp b/ydb/core/persqueue/user_info.cpp index 92bf080dbf5c..9b8560576756 100644 --- a/ydb/core/persqueue/user_info.cpp +++ b/ydb/core/persqueue/user_info.cpp @@ -95,7 +95,7 @@ void TUsersInfoStorage::ParseDeprecated(const TString& key, const TString& data, Y_ABORT_UNLESS(offset <= (ui64)Max(), "Offset is too big: %" PRIu64, offset); if (!userInfo) { - Create(ctx, user, 0, false, session, 0, gen, step, static_cast(offset), 0, TInstant::Zero(), {}); + Create(ctx, user, 0, false, session, 0, gen, step, static_cast(offset), 0, TInstant::Zero(), {}, false); } else { userInfo->Session = session; userInfo->Generation = gen; @@ -123,7 +123,7 @@ void TUsersInfoStorage::Parse(const TString& key, const TString& data, const TAc Create( ctx, user, userData.GetReadRuleGeneration(), false, userData.GetSession(), userData.GetPartitionSessionId(), userData.GetGeneration(), userData.GetStep(), offset, - userData.GetOffsetRewindSum(), TInstant::Zero(), {} + userData.GetOffsetRewindSum(), TInstant::Zero(), {}, userData.GetAnyCommits() ); } else { userInfo->Session = userData.GetSession(); @@ -150,7 +150,7 @@ TUserInfo& TUsersInfoStorage::GetOrCreate(const TString& user, const TActorConte if (it == UsersInfo.end()) { return Create( ctx, user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration, false, "", 0, - 0, 0, 0, 0, TInstant::Zero(), {} + 0, 0, 0, 0, TInstant::Zero(), {}, false ); } return it->second; @@ -177,7 +177,7 @@ TUserInfo TUsersInfoStorage::CreateUserInfo(const TActorContext& ctx, const TString& session, ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, - TInstant readFromTimestamp, const TActorId& pipeClient) const + TInstant readFromTimestamp, const TActorId& pipeClient, bool anyCommits) const { TString defaultServiceType = AppData(ctx)->PQConfig.GetDefaultClientServiceType().GetName(); TString userServiceType = ""; @@ -195,7 +195,7 @@ TUserInfo TUsersInfoStorage::CreateUserInfo(const TActorContext& ctx, ctx, StreamCountersSubgroup, user, readRuleGeneration, important, TopicConverter, Partition, session, partitionSessionId, gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, DbPath, - meterRead, pipeClient + meterRead, pipeClient, anyCommits }; } @@ -203,16 +203,16 @@ TUserInfoBase TUsersInfoStorage::CreateUserInfo(const TString& user, TMaybe readRuleGeneration) const { return TUserInfoBase{user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration, - "", 0, 0, 0, false, {}, 0, {}}; + "", 0, 0, 0, false, false, {}, 0, {}}; } TUserInfo& TUsersInfoStorage::Create( const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session, ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, - TInstant readFromTimestamp, const TActorId& pipeClient + TInstant readFromTimestamp, const TActorId& pipeClient, bool anyCommits ) { auto userInfo = CreateUserInfo(ctx, user, readRuleGeneration, important, session, partitionSessionId, - gen, step, offset, readOffsetRewindSum, readFromTimestamp, pipeClient); + gen, step, offset, readOffsetRewindSum, readFromTimestamp, pipeClient, anyCommits); auto result = UsersInfo.emplace(user, std::move(userInfo)); Y_ABORT_UNLESS(result.second); return result.first->second; diff --git a/ydb/core/persqueue/user_info.h b/ydb/core/persqueue/user_info.h index d93da603a10a..5aaad167d9f0 100644 --- a/ydb/core/persqueue/user_info.h +++ b/ydb/core/persqueue/user_info.h @@ -45,6 +45,7 @@ struct TUserInfoBase { ui32 Generation = 0; ui32 Step = 0; i64 Offset = 0; + bool AnyCommits = false; bool Important = false; TInstant ReadFromTimestamp; @@ -170,9 +171,9 @@ struct TUserInfo: public TUserInfoBase { const ui64 readRuleGeneration, const bool important, const NPersQueue::TTopicConverterPtr& topicConverter, const ui32 partition, const TString& session, ui64 partitionSession, ui32 gen, ui32 step, i64 offset, const ui64 readOffsetRewindSum, const TString& dcId, TInstant readFromTimestamp, - const TString& dbPath, bool meterRead, const TActorId& pipeClient + const TString& dbPath, bool meterRead, const TActorId& pipeClient, bool anyCommits ) - : TUserInfoBase{user, readRuleGeneration, session, gen, step, offset, important, + : TUserInfoBase{user, readRuleGeneration, session, gen, step, offset, anyCommits, important, readFromTimestamp, partitionSession, pipeClient} , WriteTimestamp(TAppData::TimeProvider->Now()) , CreateTimestamp(TAppData::TimeProvider->Now()) @@ -388,7 +389,7 @@ class TUsersInfoStorage { TUserInfo& Create( const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session, ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, - TInstant readFromTimestamp, const TActorId& pipeClient + TInstant readFromTimestamp, const TActorId& pipeClient, bool anyCommits ); void Clear(const TActorContext& ctx); @@ -404,7 +405,7 @@ class TUsersInfoStorage { const TString& session, ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, - TInstant readFromTimestamp, const TActorId& pipeClient) const; + TInstant readFromTimestamp, const TActorId& pipeClient, bool anyCommits) const; private: THashMap UsersInfo; diff --git a/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp b/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp index dc877d041c9e..779bf4776c94 100644 --- a/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp +++ b/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp @@ -261,7 +261,6 @@ std::shared_ptr::TSdkReadSession> TTestReadS TString{message.GetData()}, impl->AutoCommit) .WithMsg(new MsgWrapper(message)); - impl->ReceivedMessages.push_back(msg); if (impl->AutoCommit) { @@ -450,7 +449,7 @@ void TTestReadSession::WaitAllMessages() { template void TTestReadSession::Commit() { - Cerr << ">>>>> " << Impl->Name << " Commit all received messages" << Endl << Flush; + Cerr << ">>>>> " << Impl->Name << "Commit all received messages" << Endl << Flush; for (auto& m : Impl->ReceivedMessages) { if (!m.Commited) { m.Msg->Commit(); diff --git a/ydb/core/persqueue/ut/partition_ut.cpp b/ydb/core/persqueue/ut/partition_ut.cpp index 1f67f68deed2..c06681753ae3 100644 --- a/ydb/core/persqueue/ut/partition_ut.cpp +++ b/ydb/core/persqueue/ut/partition_ut.cpp @@ -957,8 +957,8 @@ void TPartitionFixture::SendProposeTransactionRequest(ui32 partition, auto* body = event->Record.MutableData(); auto* operation = body->MutableOperations()->Add(); operation->SetPartitionId(partition); - operation->SetBegin(begin); - operation->SetEnd(end); + operation->SetCommitOffsetsBegin(begin); + operation->SetCommitOffsetsEnd(end); operation->SetConsumer(client); operation->SetPath(topic); body->SetImmediate(immediate); diff --git a/ydb/core/persqueue/ut/partitiongraph_ut.cpp b/ydb/core/persqueue/ut/partitiongraph_ut.cpp index eb8d1cff01cf..43603f312ef8 100644 --- a/ydb/core/persqueue/ut/partitiongraph_ut.cpp +++ b/ydb/core/persqueue/ut/partitiongraph_ut.cpp @@ -59,22 +59,22 @@ Y_UNIT_TEST_SUITE(TPartitionGraphTest) { UNIT_ASSERT(n4); UNIT_ASSERT(n5); - UNIT_ASSERT_VALUES_EQUAL(n0->Parents.size(), 0); - UNIT_ASSERT_VALUES_EQUAL(n0->Children.size(), 0); - UNIT_ASSERT_VALUES_EQUAL(n0->HierarhicalParents.size(), 0); - - UNIT_ASSERT_VALUES_EQUAL(n1->Parents.size(), 0); - UNIT_ASSERT_VALUES_EQUAL(n1->Children.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(n1->HierarhicalParents.size(), 0); - - UNIT_ASSERT_VALUES_EQUAL(n5->Parents.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(n5->Children.size(), 0u); - UNIT_ASSERT_VALUES_EQUAL(n5->HierarhicalParents.size(), 4); - UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n0) == n5->HierarhicalParents.end()); - UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n1) != n5->HierarhicalParents.end()); - UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n2) != n5->HierarhicalParents.end()); - UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n3) != n5->HierarhicalParents.end()); - UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n4) != n5->HierarhicalParents.end()); + UNIT_ASSERT_VALUES_EQUAL(n0->DirectParents.size(), 0); + UNIT_ASSERT_VALUES_EQUAL(n0->DirectChildren.size(), 0); + UNIT_ASSERT_VALUES_EQUAL(n0->AllParents.size(), 0); + + UNIT_ASSERT_VALUES_EQUAL(n1->DirectParents.size(), 0); + UNIT_ASSERT_VALUES_EQUAL(n1->DirectChildren.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(n1->AllParents.size(), 0); + + UNIT_ASSERT_VALUES_EQUAL(n5->DirectParents.size(), 2); + UNIT_ASSERT_VALUES_EQUAL(n5->DirectChildren.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(n5->AllParents.size(), 4); + UNIT_ASSERT(std::find(n5->AllParents.cbegin(), n5->AllParents.cend(), n0) == n5->AllParents.end()); + UNIT_ASSERT(std::find(n5->AllParents.cbegin(), n5->AllParents.cend(), n1) != n5->AllParents.end()); + UNIT_ASSERT(std::find(n5->AllParents.cbegin(), n5->AllParents.cend(), n2) != n5->AllParents.end()); + UNIT_ASSERT(std::find(n5->AllParents.cbegin(), n5->AllParents.cend(), n3) != n5->AllParents.end()); + UNIT_ASSERT(std::find(n5->AllParents.cbegin(), n5->AllParents.cend(), n4) != n5->AllParents.end()); { std::set traversedNodes; diff --git a/ydb/core/persqueue/ut/pqtablet_ut.cpp b/ydb/core/persqueue/ut/pqtablet_ut.cpp index 86e5fe7f2df0..ad86fb76432d 100644 --- a/ydb/core/persqueue/ut/pqtablet_ut.cpp +++ b/ydb/core/persqueue/ut/pqtablet_ut.cpp @@ -331,8 +331,8 @@ void TPQTabletFixture::SendProposeTransactionRequest(const TProposeTransactionPa auto* operation = body->MutableOperations()->Add(); operation->SetPartitionId(txOp.Partition); if (txOp.Begin.Defined()) { - operation->SetBegin(*txOp.Begin); - operation->SetEnd(*txOp.End); + operation->SetCommitOffsetsBegin(*txOp.Begin); + operation->SetCommitOffsetsEnd(*txOp.End); operation->SetConsumer(*txOp.Consumer); } operation->SetPath(txOp.Path); diff --git a/ydb/core/persqueue/ut/user_action_processor_ut.cpp b/ydb/core/persqueue/ut/user_action_processor_ut.cpp index 0796c620f39b..348da76588cd 100644 --- a/ydb/core/persqueue/ut/user_action_processor_ut.cpp +++ b/ydb/core/persqueue/ut/user_action_processor_ut.cpp @@ -653,8 +653,8 @@ void TUserActionProcessorFixture::SendProposeTransactionRequest(ui32 partition, auto* body = event->Record.MutableTxBody(); auto* operation = body->MutableOperations()->Add(); operation->SetPartitionId(partition); - operation->SetBegin(begin); - operation->SetEnd(end); + operation->SetCommitOffsetsBegin(begin); + operation->SetCommitOffsetsEnd(end); operation->SetConsumer(client); operation->SetPath(topic); body->SetImmediate(immediate); @@ -679,8 +679,8 @@ void TUserActionProcessorFixture::SendProposeTransactionRequest(const TProposeTr for (auto& txOp : params.TxOps) { auto* operation = body->MutableOperations()->Add(); operation->SetPartitionId(txOp.Partition); - operation->SetBegin(txOp.Begin); - operation->SetEnd(txOp.End); + operation->SetCommitOffsetsBegin(txOp.Begin); + operation->SetCommitOffsetsEnd(txOp.End); operation->SetConsumer(txOp.Consumer); operation->SetPath(txOp.Path); } diff --git a/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp b/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp index b39ad0c0c7fb..018e6b30331d 100644 --- a/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp +++ b/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp @@ -622,6 +622,110 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { readSession2->Close(); } + Y_UNIT_TEST(PartitionSplit_OffsetCommit) { + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .UpUtilizationPercent(2) + .DownUtilizationPercent(1) + .StabilizationWindow(TDuration::Seconds(2)) + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings(); + + TConsumerSettings consumers(createSettings, TEST_CONSUMER); + createSettings.AppendConsumers(consumers); + + client.CreateTopic(TEST_TOPIC, createSettings).Wait(); + + auto msg = TString(1_MB, 'a'); + + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, std::string{TEST_TOPIC}, false); + auto writeSession_2 = CreateWriteSession(client, "producer-2", 0, std::string{TEST_TOPIC}, false); + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 1))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 2))); + Sleep(TDuration::Seconds(15)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 3))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 4))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 5))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, 6))); + Sleep(TDuration::Seconds(15)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); + } + + auto writeSession2_1 = CreateWriteSession(client, "producer-1", 1, std::string{TEST_TOPIC}, false); + auto writeSession2_2 = CreateWriteSession(client, "producer-2", 1, std::string{TEST_TOPIC}, false); + + { + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 7))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 8))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 9))); + UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 10))); + Sleep(TDuration::Seconds(15)); + auto describe2 = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetTopicDescription().GetPartitions().size(), 5); + } + + auto status = client.CommitOffset(TEST_TOPIC, 1, TEST_CONSUMER, 2).GetValueSync(); + UNIT_ASSERT(status.IsSuccess()); + + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + UNIT_ASSERT(description.GetPartitions().size() == 5); + + auto stats_part_0_try_1 = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats_part_0_try_1); + UNIT_ASSERT(stats_part_0_try_1->GetCommittedOffset() == 6); + + auto stats_part_1_try_1 = description.GetPartitions().at(1).GetPartitionConsumerStats(); + UNIT_ASSERT(stats_part_1_try_1); + UNIT_ASSERT(stats_part_1_try_1->GetCommittedOffset() == 2); + + auto stats_part_3_try_1 = description.GetPartitions().at(3).GetPartitionConsumerStats(); + UNIT_ASSERT(stats_part_3_try_1); + UNIT_ASSERT(stats_part_3_try_1->GetCommittedOffset() == 0); + + + + auto status2 = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 0).GetValueSync(); + UNIT_ASSERT(status2.IsSuccess()); + + auto result2 = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result2.IsSuccess()); + + auto description2 = result2.GetConsumerDescription(); + UNIT_ASSERT(description2.GetPartitions().size() == 5); + + auto stats_part_0_try_2 = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats_part_0_try_2); + UNIT_ASSERT(stats_part_0_try_2->GetCommittedOffset() == 6); + + auto stats_part_1_try_2 = description.GetPartitions().at(1).GetPartitionConsumerStats(); + UNIT_ASSERT(stats_part_1_try_2); + UNIT_ASSERT(stats_part_1_try_2->GetCommittedOffset() == 2); + + auto stats_part_3_try_2 = description.GetPartitions().at(3).GetPartitionConsumerStats(); + UNIT_ASSERT(stats_part_3_try_2); + UNIT_ASSERT(stats_part_3_try_2->GetCommittedOffset() == 0); + } + Y_UNIT_TEST(CommitTopPast_BeforeAutoscaleAwareSDK) { TTopicSdkTestSetup setup = CreateSetup(); setup.CreateTopicWithAutoscale(std::string{TEST_TOPIC}, std::string{TEST_CONSUMER}, 1, 100); @@ -648,7 +752,7 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { UNIT_ASSERT_VALUES_EQUAL_C(NYdb::EStatus::SUCCESS, status.GetStatus(), "The consumer can commit at the end of the inactive partition."); status = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 0).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(NYdb::EStatus::BAD_REQUEST, status.GetStatus(), "The consumer cannot commit an offset for inactive, read-to-the-end partitions."); + UNIT_ASSERT_VALUES_EQUAL_C(NYdb::EStatus::SUCCESS, status.GetStatus(), "The consumer can commit an offset for inactive, read-to-the-end partitions."); } Y_UNIT_TEST(ControlPlane_CreateAlterDescribe) { @@ -869,6 +973,703 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), NYdb::EStatus::BAD_REQUEST); } + Y_UNIT_TEST(PartitionSplit_DistributedTxCommit) { + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .UpUtilizationPercent(2) + .DownUtilizationPercent(1) + .StabilizationWindow(TDuration::Seconds(2)) + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings() + .BeginAddConsumer() + .ConsumerName(TEST_CONSUMER); + client.CreateTopic(TEST_TOPIC, createSettings).Wait(); + + auto msg = TString(1_MB, 'a'); + + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, std::string{TEST_TOPIC}, false); + auto writeSession_2 = CreateWriteSession(client, "producer-2", 0, std::string{TEST_TOPIC}, false); + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 1))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 2))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 3))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 4))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 5))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, 6))); + Sleep(TDuration::Seconds(15)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); + } + + auto writeSession2_1 = CreateWriteSession(client, "producer-1", 1, std::string{TEST_TOPIC}, false); + auto writeSession2_2 = CreateWriteSession(client, "producer-2", 1, std::string{TEST_TOPIC}, false); + + { + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 7))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 8))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 9))); + UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 10))); + Sleep(TDuration::Seconds(15)); + auto describe2 = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetTopicDescription().GetPartitions().size(), 5); + } + + auto reader = client.CreateReadSession( + TReadSessionSettings() + .AutoPartitioningSupport(true) + .AppendTopics(TTopicReadSettings(TEST_TOPIC)) + .ConsumerName(TEST_CONSUMER)); + + TInstant deadlineTime = TInstant::Now() + TDuration::Seconds(5); + auto count = 0; + auto expected = 10; + while (deadlineTime > TInstant::Now()) { + for (auto event : reader->GetEvents(false)) { + if (auto* x = std::get_if(&event)) { + auto& messages = x->GetMessages(); + for (size_t i = 0u; i < messages.size(); ++i) { + count++; + auto& message = messages[i]; + message.Commit(); + Cerr << "SESSION EVENT read message: " << count << " from partition: " << message.GetPartitionSession()->GetPartitionId() << Endl << Flush; + } + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* sessionClosedEvent = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT" << x->DebugString() << Endl << Flush; + } else { + Cerr << "SESSION EVENT unhandled \n"; + } + + if (count == expected) { + break; + } + } + Sleep(TDuration::MilliSeconds(250)); + } + Sleep(TDuration::Seconds(5)); + UNIT_ASSERT_EQUAL(count, expected); + + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + UNIT_ASSERT(description.GetPartitions().size() == 5); + + auto stats1 = description.GetPartitions().at(1).GetPartitionConsumerStats(); + UNIT_ASSERT(stats1); + UNIT_ASSERT(stats1->GetCommittedOffset() == 4); + } + + Y_UNIT_TEST(PartitionSplit_DistributedTxCommit_ChildFirst) { + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .UpUtilizationPercent(2) + .DownUtilizationPercent(1) + .StabilizationWindow(TDuration::Seconds(2)) + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings() + .BeginAddConsumer() + .ConsumerName(TEST_CONSUMER); + + client.CreateTopic(TEST_TOPIC, createSettings).Wait(); + + auto msg = TString(1_MB, 'a'); + + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, std::string{TEST_TOPIC}, false); + auto writeSession_2 = CreateWriteSession(client, "producer-2", 0, std::string{TEST_TOPIC}, false); + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 1))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 2))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 3))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 4))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 5))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, 6))); + Sleep(TDuration::Seconds(15)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); + } + + auto writeSession2_1 = CreateWriteSession(client, "producer-1", 1, std::string{TEST_TOPIC}, false); + auto writeSession2_2 = CreateWriteSession(client, "producer-2", 1, std::string{TEST_TOPIC}, false); + + { + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 7))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 8))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 9))); + UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 10))); + Sleep(TDuration::Seconds(15)); + auto describe2 = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetTopicDescription().GetPartitions().size(), 5); + } + + auto reader = client.CreateReadSession( + TReadSessionSettings() + .AutoPartitioningSupport(true) + .AppendTopics(TTopicReadSettings(TEST_TOPIC)) + .ConsumerName(TEST_CONSUMER)); + + TInstant deadlineTime = TInstant::Now() + TDuration::Seconds(5); + auto count = 0; + auto expected = 10; + + std::vector partition0Messages; + + while(deadlineTime > TInstant::Now()) { + for (auto event : reader->GetEvents(false)) { + if (auto* x = std::get_if(&event)) { + auto& messages = x->GetMessages(); + for (size_t i = 0u; i < messages.size(); ++i) { + auto& message = messages[i]; + count++; + int partitionId = message.GetPartitionSession()->GetPartitionId(); + Cerr << "SESSION EVENT read message: " << count << " from partition: " << partitionId << Endl << Flush; + if (partitionId == 1) { + // Commit messages from partition 1 immediately + message.Commit(); + } else if (partitionId == 0) { + // Store messages from partition 0 for later + partition0Messages.push_back(message); + } + } + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* sessionClosedEvent = std::get_if(&event)) { + Cerr << sessionClosedEvent->DebugString() << Endl << Flush; + } else { + Cerr << "SESSION EVENT unhandled \n"; + } + + if (count == expected) { + break; + } + } + if (count == expected) { + break; + } + Sleep(TDuration::MilliSeconds(250)); + } + + UNIT_ASSERT_EQUAL(count, expected); + + Sleep(TDuration::Seconds(5)); + + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + UNIT_ASSERT(description.GetPartitions().size() == 5); + + auto stats1 = description.GetPartitions().at(1).GetPartitionConsumerStats(); + UNIT_ASSERT(stats1); + + UNIT_ASSERT(stats1->GetCommittedOffset() == 0); + + for (auto& message : partition0Messages) { + message.Commit(); + } + + Sleep(TDuration::Seconds(5)); + + auto result2 = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description2 = result2.GetConsumerDescription(); + UNIT_ASSERT(description2.GetPartitions().size() == 5); + + stats1 = description2.GetPartitions().at(1).GetPartitionConsumerStats(); + UNIT_ASSERT(stats1); + + UNIT_ASSERT(stats1->GetCommittedOffset() == 4); + } + + Y_UNIT_TEST(PartitionSplit_DistributedTxCommit_CheckSessionResetAfterCommit) { + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .UpUtilizationPercent(2) + .DownUtilizationPercent(1) + .StabilizationWindow(TDuration::Seconds(2)) + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings() + .BeginAddConsumer() + .ConsumerName(TEST_CONSUMER); + + client.CreateTopic(TEST_TOPIC, createSettings).Wait(); + + auto msg = TString(1_MB, 'a'); + + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, std::string{TEST_TOPIC}, false); + auto writeSession_2 = CreateWriteSession(client, "producer-2", 0, std::string{TEST_TOPIC}, false); + auto seqNo = 1; + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, seqNo++))); + Sleep(TDuration::Seconds(15)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); + } + + auto writeSession_3 = CreateWriteSession(client, "producer-2", 1, std::string{TEST_TOPIC}, false); + UNIT_ASSERT(writeSession_3->Write(Msg(TStringBuilder() << "message-" << seqNo, seqNo++))); + UNIT_ASSERT(writeSession_3->Write(Msg(TStringBuilder() << "message-" << seqNo, seqNo++))); + + auto reader = client.CreateReadSession( + TReadSessionSettings() + .AutoPartitioningSupport(true) + .AppendTopics(TTopicReadSettings(TEST_TOPIC)) + .ConsumerName(TEST_CONSUMER)); + + TInstant deadlineTime = TInstant::Now() + TDuration::Seconds(5); + + auto commitSent = false; + while(deadlineTime > TInstant::Now()) { + for (auto event : reader->GetEvents(false)) { + if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + auto& messages = x->GetMessages(); + for (size_t i = 0u; i < messages.size(); ++i) { + auto& message = messages[i]; + message.Commit(); + Cerr << "SESSION EVENT READ SeqNo: " << message.GetSeqNo() << Endl << Flush; + // check we get this SeqNo two times + if (message.GetSeqNo() == 6) { + if (!commitSent) { + commitSent = true; + Sleep(TDuration::MilliSeconds(300)); + auto status = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 0).GetValueSync(); + UNIT_ASSERT(status.IsSuccess()); + } else { + return; + } + } + } + UNIT_ASSERT(writeSession_3->Write(Msg(TStringBuilder() << "message-" << seqNo, seqNo++))); + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* sessionClosedEvent = std::get_if(&event)) { + Cerr << sessionClosedEvent->DebugString() << Endl << Flush; + } else { + Cerr << "SESSION EVENT unhandled \n"; + } + } + Sleep(TDuration::MilliSeconds(250)); + } + + UNIT_ASSERT(false); + } + + Y_UNIT_TEST(PartitionSplit_DistributedTxCommit_CheckOffsetCommitForDifferentCases_SplitedTopic) { + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .UpUtilizationPercent(2) + .DownUtilizationPercent(1) + .StabilizationWindow(TDuration::Seconds(2)) + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings() + .BeginAddConsumer() + .ConsumerName(TEST_CONSUMER); + + client.CreateTopic(TEST_TOPIC, createSettings).Wait(); + + auto msg = TString(1_MB, 'a'); + + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, std::string{TEST_TOPIC}, false); + auto writeSession_2 = CreateWriteSession(client, "producer-2", 0, std::string{TEST_TOPIC}, false); + auto seqNo = 1; + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + writeSession_1->Close(); + + UNIT_ASSERT(writeSession_2->Write(Msg(msg, seqNo++))); + writeSession_2->Close(); + Sleep(TDuration::Seconds(15)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); + } + + auto writeSession_3 = CreateWriteSession(client, "producer-2", 1, std::string{TEST_TOPIC}, false); + UNIT_ASSERT(writeSession_3->Write(Msg(TStringBuilder() << "message-" << seqNo, seqNo++))); + UNIT_ASSERT(writeSession_3->Write(Msg(TStringBuilder() << "message-" << seqNo, seqNo++))); + + auto reader = client.CreateReadSession( + TReadSessionSettings() + .AutoPartitioningSupport(true) + .AppendTopics(TTopicReadSettings(TEST_TOPIC)) + .ConsumerName(TEST_CONSUMER)); + + TInstant deadlineTime = TInstant::Now() + TDuration::Seconds(5); + + auto commitSent = false; + TString readSessionId = ""; + while(deadlineTime > TInstant::Now()) { + for (auto event : reader->GetEvents(false)) { + if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + auto& messages = x->GetMessages(); + for (size_t i = 0u; i < messages.size(); ++i) { + auto& message = messages[i]; + Cerr << "SESSION EVENT READ SeqNo: " << message.GetSeqNo() << Endl << Flush; + + if (commitSent) { + // read session not changed + UNIT_ASSERT_EQUAL(readSessionId, message.GetPartitionSession()->GetReadSessionId()); + } + + // check we NOT get this SeqNo two times + if (message.GetSeqNo() == 6) { + if (!commitSent) { + commitSent = true; + Sleep(TDuration::MilliSeconds(300)); + + readSessionId = message.GetPartitionSession()->GetReadSessionId(); + TCommitOffsetSettings commitSettings {.ReadSessionId_ = message.GetPartitionSession()->GetReadSessionId()}; + auto status = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 8, commitSettings).GetValueSync(); + UNIT_ASSERT(status.IsSuccess()); + + { + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + + auto stats = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats); + + UNIT_ASSERT(stats->GetCommittedOffset() == 8); + } + + // must be ignored, because commit to past + TCommitOffsetSettings commitToPastSettings {.ReadSessionId_ = message.GetPartitionSession()->GetReadSessionId()}; + auto commitToPastStatus = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 0, commitToPastSettings).GetValueSync(); + UNIT_ASSERT(commitToPastStatus.IsSuccess()); + + { + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + + auto stats = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats); + + UNIT_ASSERT(stats->GetCommittedOffset() == 8); + } + + TCommitOffsetSettings commitSettingsWrongSession {.ReadSessionId_ = "random_session"}; + auto statusWrongSession = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 0, commitSettingsWrongSession).GetValueSync(); + UNIT_ASSERT(!statusWrongSession.IsSuccess()); + + { + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + + auto stats = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats); + + UNIT_ASSERT(stats->GetCommittedOffset() == 8); + } + + } else { + UNIT_ASSERT(false); + } + } else { + message.Commit(); + } + } + UNIT_ASSERT(writeSession_3->Write(Msg(TStringBuilder() << "message-" << seqNo, seqNo++))); + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* sessionClosedEvent = std::get_if(&event)) { + Cerr << sessionClosedEvent->DebugString() << Endl << Flush; + } else { + Cerr << "SESSION EVENT unhandled \n"; + } + } + Sleep(TDuration::MilliSeconds(250)); + } + } + + Y_UNIT_TEST(PartitionSplit_DistributedTxCommit_CheckOffsetCommitForDifferentCases_NotSplitedTopic) { + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .UpUtilizationPercent(2) + .DownUtilizationPercent(1) + .StabilizationWindow(TDuration::Seconds(2)) + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings() + .BeginAddConsumer() + .ConsumerName(TEST_CONSUMER); + + client.CreateTopic(TEST_TOPIC, createSettings).Wait(); + + auto msg = TString(1_MB, 'a'); + + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, std::string{TEST_TOPIC}, false); + auto seqNo = 1; + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, seqNo++))); + writeSession_1->Close(); + Sleep(TDuration::Seconds(15)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + auto writeSession_2 = CreateWriteSession(client, "producer-1", 0, std::string{TEST_TOPIC}, false); + auto reader = client.CreateReadSession( + TReadSessionSettings() + .AutoPartitioningSupport(true) + .AppendTopics(TTopicReadSettings(TEST_TOPIC)) + .ConsumerName(TEST_CONSUMER)); + + TInstant deadlineTime = TInstant::Now() + TDuration::Seconds(5); + + auto commitSent = false; + TString readSessionId = ""; + while(deadlineTime > TInstant::Now()) { + for (auto event : reader->GetEvents(false)) { + if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + auto& messages = x->GetMessages(); + for (size_t i = 0u; i < messages.size(); ++i) { + auto& message = messages[i]; + + if (commitSent) { + // read session not changed + UNIT_ASSERT_EQUAL(readSessionId, message.GetPartitionSession()->GetReadSessionId()); + } + + // check we NOT get this SeqNo two times + if (message.GetSeqNo() == 6) { + if (!commitSent) { + commitSent = true; + Sleep(TDuration::MilliSeconds(300)); + + readSessionId = message.GetPartitionSession()->GetReadSessionId(); + TCommitOffsetSettings commitSettings {.ReadSessionId_ = message.GetPartitionSession()->GetReadSessionId()}; + auto status = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 8, commitSettings).GetValueSync(); + UNIT_ASSERT(status.IsSuccess()); + + { + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + + auto stats = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats); + UNIT_ASSERT(stats->GetCommittedOffset() == 8); + } + + // must be ignored, because commit to past + TCommitOffsetSettings commitToPastSettings {.ReadSessionId_ = message.GetPartitionSession()->GetReadSessionId()}; + auto commitToPastStatus = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 0, commitToPastSettings).GetValueSync(); + UNIT_ASSERT(commitToPastStatus.IsSuccess()); + + { + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + + auto stats = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats); + UNIT_ASSERT(stats->GetCommittedOffset() == 8); + } + + TCommitOffsetSettings commitSettingsWrongSession {.ReadSessionId_ = "random_session"}; + auto statusWrongSession = client.CommitOffset(TEST_TOPIC, 0, TEST_CONSUMER, 0, commitSettingsWrongSession).GetValueSync(); + UNIT_ASSERT(!statusWrongSession.IsSuccess()); + + { + auto describeConsumerSettings = TDescribeConsumerSettings().IncludeStats(true); + auto result = client.DescribeConsumer(TEST_TOPIC, TEST_CONSUMER, describeConsumerSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto description = result.GetConsumerDescription(); + + auto stats = description.GetPartitions().at(0).GetPartitionConsumerStats(); + UNIT_ASSERT(stats); + UNIT_ASSERT(stats->GetCommittedOffset() == 8); + } + + } else { + UNIT_ASSERT(false); + } + } else { + message.Commit(); + } + } + UNIT_ASSERT(writeSession_2->Write(Msg(msg, seqNo++))); + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* x = std::get_if(&event)) { + x->Confirm(); + Cerr << "SESSION EVENT " << x->DebugString() << Endl << Flush; + } else if (auto* sessionClosedEvent = std::get_if(&event)) { + Cerr << sessionClosedEvent->DebugString() << Endl << Flush; + } else { + Cerr << "SESSION EVENT unhandled \n"; + } + } + Sleep(TDuration::MilliSeconds(250)); + } + } + Y_UNIT_TEST(PartitionSplit_AutosplitByLoad) { TTopicSdkTestSetup setup = CreateSetup(); TTopicClient client = setup.MakeClient(); @@ -902,10 +1703,10 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { { UNIT_ASSERT(writeSession_1->Write(Msg(msg, 3))); - UNIT_ASSERT(writeSession_2->Write(Msg(msg, 4))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 4))); UNIT_ASSERT(writeSession_1->Write(Msg(msg, 5))); UNIT_ASSERT(writeSession_2->Write(Msg(msg, 6))); - Sleep(TDuration::Seconds(5)); + Sleep(TDuration::Seconds(15)); auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); } @@ -915,7 +1716,7 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { { UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 7))); - UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 8))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 8))); UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 9))); UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 10))); Sleep(TDuration::Seconds(5)); diff --git a/ydb/core/persqueue/utils.cpp b/ydb/core/persqueue/utils.cpp index 7f7ca3d81e10..ccdc3ffe95c2 100644 --- a/ydb/core/persqueue/utils.cpp +++ b/ydb/core/persqueue/utils.cpp @@ -170,10 +170,10 @@ std::set TPartitionGraph::GetActiveChildren(ui32 id) const { const auto* n = queue.front(); queue.pop_front(); - if (n->Children.empty()) { + if (n->DirectChildren.empty()) { result.emplace(n->Id); } else { - queue.insert(queue.end(), n->Children.begin(), n->Children.end()); + queue.insert(queue.end(), n->DirectChildren.begin(), n->DirectChildren.end()); } } @@ -186,7 +186,7 @@ void Travers0(std::deque& queue, const std::functi queue.pop_front(); if (func(node->Id)) { - queue.insert(queue.end(), node->Children.begin(), node->Children.end()); + queue.insert(queue.end(), node->DirectChildren.begin(), node->DirectChildren.end()); } } } @@ -203,7 +203,7 @@ void TPartitionGraph::Travers(const std::function& func) const { continue; } - queue.insert(queue.end(), n.Children.begin(), n.Children.end()); + queue.insert(queue.end(), n.DirectChildren.begin(), n.DirectChildren.end()); } Travers0(queue, func); @@ -220,7 +220,7 @@ void TPartitionGraph::Travers(ui32 id, const std::function& func } std::deque queue; - queue.insert(queue.end(), n->Children.begin(), n->Children.end()); + queue.insert(queue.end(), n->DirectChildren.begin(), n->DirectChildren.end()); Travers0(queue, func); } @@ -266,17 +266,18 @@ std::unordered_map BuildGraph(const TCollection& pa } std::deque queue; - for(const auto& p : partitions) { + + for (const auto& p : partitions) { auto& node = result[GetPartitionId(p)]; - node.Children.reserve(p.ChildPartitionIdsSize()); + node.DirectChildren.reserve(p.ChildPartitionIdsSize()); for (auto id : p.GetChildPartitionIds()) { - node.Children.push_back(&result[id]); + node.DirectChildren.push_back(&result[id]); } - node.Parents.reserve(p.ParentPartitionIdsSize()); + node.DirectParents.reserve(p.ParentPartitionIdsSize()); for (auto id : p.GetParentPartitionIds()) { - node.Parents.push_back(&result[id]); + node.DirectParents.push_back(&result[id]); } if (p.GetParentPartitionIds().empty()) { @@ -284,24 +285,39 @@ std::unordered_map BuildGraph(const TCollection& pa } } - while(!queue.empty()) { + while (!queue.empty()) { auto* n = queue.front(); queue.pop_front(); bool allCompleted = true; - for(auto* c : n->Parents) { - if (c->HierarhicalParents.empty() && !c->Parents.empty()) { + for (auto* c : n->DirectParents) { + if (c->AllParents.empty() && !c->DirectParents.empty()) { allCompleted = false; break; } } if (allCompleted) { - for(auto* c : n->Parents) { - n->HierarhicalParents.insert(c->HierarhicalParents.begin(), c->HierarhicalParents.end()); - n->HierarhicalParents.insert(c); + for (auto* c : n->DirectParents) { + n->AllParents.insert(c->AllParents.begin(), c->AllParents.end()); + n->AllParents.insert(c); + } + queue.insert(queue.end(), n->DirectChildren.begin(), n->DirectChildren.end()); + } + } + + for (auto& [_, node] : result) { + queue.push_back(&node); + + while (!queue.empty()) { + auto* current = queue.front(); + queue.pop_front(); + + for (auto* child : current->DirectChildren) { + if (node.AllChildren.insert(child).second) { + queue.push_back(child); + } } - queue.insert(queue.end(), n->Children.begin(), n->Children.end()); } } @@ -316,11 +332,11 @@ TPartitionGraph::Node::Node(ui32 id, ui64 tabletId, const TString& from, const T } bool TPartitionGraph::Node::IsRoot() const { - return Parents.empty(); + return DirectParents.empty(); } bool TPartitionGraph::Node::IsParent(ui32 partitionId) const { - return AnyOf(Parents, [=](const auto& p) { + return AnyOf(DirectParents, [=](const auto& p) { return p->Id == partitionId; }); } diff --git a/ydb/core/persqueue/utils.h b/ydb/core/persqueue/utils.h index 4b4b296bdbb0..43eb519f85ef 100644 --- a/ydb/core/persqueue/utils.h +++ b/ydb/core/persqueue/utils.h @@ -44,11 +44,13 @@ class TPartitionGraph { TString To; // Direct parents of this node - std::vector Parents; + std::vector DirectParents; // Direct children of this node - std::vector Children; + std::vector DirectChildren; // All parents include parents of parents and so on - std::set HierarhicalParents; + std::set AllParents; + // All children include children of children and so on + std::set AllChildren; bool IsRoot() const; bool IsParent(ui32 partitionId) const; diff --git a/ydb/core/protos/kqp.proto b/ydb/core/protos/kqp.proto index 46dee8ed2fd4..b858719b5a47 100644 --- a/ydb/core/protos/kqp.proto +++ b/ydb/core/protos/kqp.proto @@ -69,10 +69,38 @@ enum EIsolationLevel { message TTopicOperationsRequest { optional string Consumer = 1; - repeated Ydb.Topic.UpdateOffsetsInTransactionRequest.TopicOffsets Topics = 2; + repeated TopicOffsets Topics = 2; optional uint32 SupportivePartition = 3; + + message TopicOffsets { + // Topic path. + optional string path = 1; + + // Ranges of offsets by partitions. + repeated PartitionOffsets partitions = 2; + + message PartitionOffsets { + // Partition identifier. + optional int64 partition_id = 1; + + // List of offset ranges. + repeated OffsetsRange partition_offsets = 2; + + optional bool force_commit = 3; + optional bool kill_read_session = 4; + optional bool only_check_commited_to_finish = 5; + optional string read_session_id = 6; + + message OffsetsRange { + optional int64 start = 1; + optional int64 end = 2; + } + } + } } + + message TTopicOperationsResponse { message TWriteId { optional uint64 NodeId = 1; diff --git a/ydb/core/protos/msgbus_pq.proto b/ydb/core/protos/msgbus_pq.proto index 9e16ce29a372..ae4fe4ad0e91 100644 --- a/ydb/core/protos/msgbus_pq.proto +++ b/ydb/core/protos/msgbus_pq.proto @@ -472,7 +472,7 @@ message TPersQueuePartitionResponse { optional uint64 CreateTimestampMS = 6; //create Timestamp of record on Offset (next to be readed record); is not set if no such record exists (no lag) optional uint64 SizeLag = 7; optional uint64 WriteTimestampEstimateMS = 8; - + optional bool ClientHasAnyCommits = 9; } message TCmdGetOwnershipResult { diff --git a/ydb/core/protos/pqconfig.proto b/ydb/core/protos/pqconfig.proto index b2f9d7097285..2f27923c359e 100644 --- a/ydb/core/protos/pqconfig.proto +++ b/ydb/core/protos/pqconfig.proto @@ -904,7 +904,7 @@ message TUserInfo { optional uint64 OffsetRewindSum = 5; optional uint64 ReadRuleGeneration = 6; optional uint64 PartitionSessionId = 7; - + optional bool AnyCommits = 8; } message TPartitionClientInfo { @@ -952,11 +952,15 @@ message TYdsShardIterator { message TPartitionOperation { optional uint32 PartitionId = 1; - optional uint64 Begin = 2; - optional uint64 End = 3; + optional uint64 CommitOffsetsBegin = 2; + optional uint64 CommitOffsetsEnd = 3; optional string Consumer = 4; optional string Path = 5; // topic path optional uint32 SupportivePartition = 6; + optional bool ForceCommit = 7; + optional bool KillReadSession = 8; + optional bool OnlyCheckCommitedToFinish = 9; + optional string ReadSessionId = 10; }; message TWriteId { diff --git a/ydb/public/api/protos/ydb_topic.proto b/ydb/public/api/protos/ydb_topic.proto index a46af6cdf2bd..a45a8c673140 100644 --- a/ydb/public/api/protos/ydb_topic.proto +++ b/ydb/public/api/protos/ydb_topic.proto @@ -752,9 +752,10 @@ message CommitOffsetRequest { int64 partition_id = 3; // Path of consumer. string consumer = 4; - // Processed offset. int64 offset = 5; + // Read session identifier from StreamRead RPC. + string read_session_id = 6; } // Commit offset response sent from server to client. diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/control_plane.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/control_plane.h index 37ee274b22f3..c776e3f5b45a 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/control_plane.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/control_plane.h @@ -766,6 +766,8 @@ struct TDescribePartitionSettings: public TOperationRequestSettings {}; +struct TCommitOffsetSettings : public TOperationRequestSettings { + FLUENT_SETTING_OPTIONAL(std::string, ReadSessionId); +}; } // namespace NYdb::NTopic diff --git a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/read_events.h b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/read_events.h index 9f4ab84f98c9..b619abeb5dc3 100644 --- a/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/read_events.h +++ b/ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/topic/read_events.h @@ -31,6 +31,11 @@ struct TPartitionSession: public TThrRefBase, public TPrintable { template > TPartitionStreamImpl(ui64 partitionStreamId, std::string topicPath, + std::string readSessionId, i64 partitionId, i64 assignId, i64 readOffset, @@ -617,6 +618,7 @@ class TPartitionStreamImpl : public TAPartitionStream { { TAPartitionStream::PartitionSessionId = partitionStreamId; TAPartitionStream::TopicPath = std::move(topicPath); + TAPartitionStream::ReadSessionId = std::move(readSessionId); TAPartitionStream::PartitionId = static_cast(partitionId); MaxCommittedOffset = static_cast(readOffset); } @@ -1333,6 +1335,7 @@ class TSingleClusterReadSessionImpl : public TEnableSelfContext::OnReadDoneImpl( LOG_LAZY(Log, TLOG_INFO, GetLogPrefix() << "Server session id: " << msg.session_id()); RetryState = nullptr; + ReadSessionId = msg.session_id(); // Successful init. Do nothing. ContinueReadingDataImpl(); @@ -1219,6 +1220,7 @@ inline void TSingleClusterReadSessionImpl::OnReadDoneImpl( Y_UNUSED(deferred); RetryState = nullptr; + ReadSessionId = msg.session_id(); LOG_LAZY(Log, TLOG_INFO, GetLogPrefix() << "Server session id: " << msg.session_id()); @@ -1318,8 +1320,12 @@ inline void TSingleClusterReadSessionImpl::OnReadDoneImpl( Y_ABORT_UNLESS(Lock.IsLocked()); auto partitionStream = MakeIntrusive>( - NextPartitionStreamId, msg.partition_session().path(), msg.partition_session().partition_id(), - msg.partition_session().partition_session_id(), msg.committed_offset(), + NextPartitionStreamId, + msg.partition_session().path(), + ReadSessionId, + msg.partition_session().partition_id(), + msg.partition_session().partition_session_id(), + msg.committed_offset(), SelfContext); NextPartitionStreamId += PartitionStreamIdStep; diff --git a/ydb/public/sdk/cpp/src/client/topic/impl/topic_impl.h b/ydb/public/sdk/cpp/src/client/topic/impl/topic_impl.h index 810c647c0786..5c2d927ea2fe 100644 --- a/ydb/public/sdk/cpp/src/client/topic/impl/topic_impl.h +++ b/ydb/public/sdk/cpp/src/client/topic/impl/topic_impl.h @@ -282,7 +282,9 @@ class TTopicClient::TImpl : public TClientImplCommon { request.set_partition_id(partitionId); request.set_consumer(TStringType{consumerName}); request.set_offset(offset); - + if (settings.ReadSessionId_) { + request.set_read_session_id(*settings.ReadSessionId_); + } return RunSimple( std::move(request), &Ydb::Topic::V1::TopicService::Stub::AsyncCommitOffset, diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h b/ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h index ece96617d623..80fa87a54102 100644 --- a/ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h @@ -943,6 +943,8 @@ class TReadSessionActor : public TActorBootstrapped { NPersQueue::TTopicsListController TopicsHandler; NPersQueue::TTopicsToConverter TopicsList; + + std::deque> Locks; }; } diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp b/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp index 43f335ac67f0..df203a86c673 100644 --- a/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -144,8 +145,8 @@ TString PartitionResponseToLog(const NKikimrClient::TPersQueuePartitionResponse& class TPartitionActor : public NActors::TActorBootstrapped { public: TPartitionActor(const TActorId& parentId, const TString& clientId, const ui64 cookie, const TString& session, const ui32 generation, - const ui32 step, const NPersQueue::TTopicConverterPtr& topic, const ui32 partition, const ui64 tabletID, - const TReadSessionActor::TTopicCounters& counters, const TString& clientDC); + const ui32 step, const NPersQueue::TTopicConverterPtr& topic, const TString& database, const ui32 partition, const ui64 tabletID, + const TReadSessionActor::TTopicCounters& counters, const TString& clientDC, std::set parents); ~TPartitionActor(); void Bootstrap(const NActors::TActorContext& ctx); @@ -156,21 +157,24 @@ class TPartitionActor : public NActors::TActorBootstrapped { private: STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { - CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) - HFunc(TEvPQProxy::TEvDeadlineExceeded, Handle) + CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup); + HFunc(TEvPQProxy::TEvDeadlineExceeded, Handle); - HFunc(NActors::TEvents::TEvPoisonPill, HandlePoison) - HFunc(TEvPQProxy::TEvRead, Handle) - HFunc(TEvPQProxy::TEvCommit, Handle) - HFunc(TEvPQProxy::TEvReleasePartition, Handle) - HFunc(TEvPQProxy::TEvLockPartition, Handle) - HFunc(TEvPQProxy::TEvGetStatus, Handle) - HFunc(TEvPQProxy::TEvRestartPipe, Handle) + HFunc(NActors::TEvents::TEvPoisonPill, HandlePoison); + HFunc(TEvPQProxy::TEvRead, Handle); + HFunc(TEvPQProxy::TEvCommit, Handle); + HFunc(TEvPQProxy::TEvReleasePartition, Handle); + HFunc(TEvPQProxy::TEvLockPartition, Handle); + HFunc(TEvPQProxy::TEvGetStatus, Handle); + HFunc(TEvPQProxy::TEvRestartPipe, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvPersQueue::TEvResponse, Handle); HFunc(TEvPersQueue::TEvHasDataInfoResponse, Handle); + + HFunc(NKqp::TEvKqp::TEvCreateSessionResponse, Handle); + HFunc(NKqp::TEvKqp::TEvQueryResponse, Handle); default: break; }; @@ -192,6 +196,9 @@ class TPartitionActor : public NActors::TActorBootstrapped { void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const TActorContext& ctx); + void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); void HandleWakeup(const NActors::TActorContext& ctx); @@ -202,6 +209,7 @@ class TPartitionActor : public NActors::TActorBootstrapped { void RestartPipe(const NActors::TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode); void WaitDataInPartition(const NActors::TActorContext& ctx); void SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx); + void CommitDone(ui64 cookie, const TActorContext& ctx); void SendPartitionReady(const TActorContext& ctx); private: @@ -214,6 +222,7 @@ class TPartitionActor : public NActors::TActorBootstrapped { const ui32 Step; NPersQueue::TTopicConverterPtr Topic; + TString Database; const ui32 Partition; const ui64 TabletID; @@ -237,6 +246,7 @@ class TPartitionActor : public NActors::TActorBootstrapped { ui64 EndOffset; ui64 SizeLag; + bool ClientHasAnyCommits = false; TString ReadGuid; // empty if not reading @@ -255,6 +265,9 @@ class TPartitionActor : public NActors::TActorBootstrapped { bool FirstRead; bool ReadingFinishedSent; + + std::unordered_map> Kqps; + std::set Parents; }; @@ -974,6 +987,7 @@ void TReadSessionActor::Handle(V1::TEvPQProxy::TEvAuthResultOk::TPtr& ev, const topicHolder.IsServerless = t.IsServerless; topicHolder.FolderId = t.FolderId; topicHolder.FullConverter = t.TopicNameConverter; + topicHolder.PartitionGraph = t.PartitionGraph; FullPathToConverter[t.TopicNameConverter->GetPrimaryPath()] = t.TopicNameConverter; const auto& second = t.TopicNameConverter->GetSecondaryPath(); if (!second.empty()) { @@ -995,14 +1009,21 @@ void TReadSessionActor::Handle(V1::TEvPQProxy::TEvAuthResultOk::TPtr& ev, const ctx.Schedule(Min(CommitInterval, CHECK_ACL_DELAY), new TEvents::TEvWakeup()); } else { for (auto& [name, t] : ev->Get()->TopicAndTablets) { - if (Topics.find(t.TopicNameConverter->GetInternalName()) == Topics.end()) { + auto it = Topics.find(t.TopicNameConverter->GetInternalName()); + if (it == Topics.end()) { CloseSession(TStringBuilder() << "list of topics changed - new topic '" << t.TopicNameConverter->GetInternalName() << "' found", NPersQueue::NErrorCode::BAD_REQUEST, ctx); return; } + it->second.PartitionGraph = t.PartitionGraph; } } + + while (!Locks.empty()) { + ctx.Send(ctx.SelfID, std::move(Locks.front())); + Locks.pop_front(); + } } @@ -1043,6 +1064,20 @@ void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const T ); return; } + + auto* partitionNode = jt->second.PartitionGraph->GetPartition(record.GetPartition()); + if (!partitionNode) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " lock for unknown partition = " << record.GetPartition() + ); + Locks.push_back(ev->Release()); + if (!AuthInflight) { + SendAuthRequest(ctx); + } + return; + } + // ToDo[counters] if (NumPartitionsFromTopic[intName]++ == 0) { if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { @@ -1057,8 +1092,8 @@ void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const T IActor* partitionActor = new TPartitionActor( ctx.SelfID, InternalClientId, Cookie, Session, record.GetGeneration(), - record.GetStep(), jt->second.FullConverter, record.GetPartition(), record.GetTabletId(), it->second, - ClientDC + record.GetStep(), jt->second.FullConverter, Database.empty() ? NKikimr::NPQ::GetDatabaseFromConfig(AppData(ctx)->PQConfig) : Database, record.GetPartition(), record.GetTabletId(), it->second, + ClientDC, jt->second.PartitionGraph->GetPartition(record.GetPartition())->AllParents ); TActorId actorId = ctx.Register(partitionActor); @@ -1819,8 +1854,8 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvReadingFinished::TPtr& ev, const T TPartitionActor::TPartitionActor( const TActorId& parentId, const TString& internalClientId, const ui64 cookie, const TString& session, - const ui32 generation, const ui32 step, const NPersQueue::TTopicConverterPtr& topic, const ui32 partition, - const ui64 tabletID, const TReadSessionActor::TTopicCounters& counters, const TString& clientDC + const ui32 generation, const ui32 step, const NPersQueue::TTopicConverterPtr& topic, const TString& database, const ui32 partition, + const ui64 tabletID, const TReadSessionActor::TTopicCounters& counters, const TString& clientDC, std::set parents ) : ParentId(parentId) , InternalClientId(internalClientId) @@ -1830,6 +1865,7 @@ TPartitionActor::TPartitionActor( , Generation(generation) , Step(step) , Topic(topic) + , Database(database) , Partition(partition) , TabletID(tabletID) , ReadOffset(0) @@ -1856,6 +1892,7 @@ TPartitionActor::TPartitionActor( , Counters(counters) , FirstRead(true) , ReadingFinishedSent(false) + , Parents(parents) { } @@ -1888,28 +1925,82 @@ void TPartitionActor::CheckRelease(const TActorContext& ctx) { void TPartitionActor::SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx) { - NKikimrClient::TPersQueueRequest request; - request.MutablePartitionRequest()->SetTopic(Topic->GetClientsideName()); - request.MutablePartitionRequest()->SetPartition(Partition); - request.MutablePartitionRequest()->SetCookie(readId); + if (!ClientHasAnyCommits && Parents.size() != 0) { + std::vector commits; + for (auto& parent: Parents) { + NKikimr::NGRpcProxy::V1::TDistributedCommitHelper::TCommitInfo commit {.PartitionId = parent->Id, .Offset = Max(), .KillReadSession = false, .OnlyCheckCommitedToFinish = true, .ReadSessionId = Session}; + commits.push_back(commit); + } + NKikimr::NGRpcProxy::V1::TDistributedCommitHelper::TCommitInfo commit {.PartitionId = Partition, .Offset = (i64)offset, .KillReadSession = false, .OnlyCheckCommitedToFinish = false, .ReadSessionId = Session}; + commits.push_back(commit); + auto kqp = std::make_shared(Database, InternalClientId, Topic->GetPrimaryPath(), commits, readId); + Kqps.emplace(readId, kqp); - Y_ABORT_UNLESS(PipeClient); + kqp->SendCreateSessionRequest(ctx); + } else { + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(Topic->GetClientsideName()); + request.MutablePartitionRequest()->SetPartition(Partition); + request.MutablePartitionRequest()->SetCookie(readId); - ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); - auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); - commit->SetClientId(InternalClientId); - commit->SetOffset(offset); - Y_ABORT_UNLESS(!Session.empty()); - commit->SetSessionId(Session); + Y_ABORT_UNLESS(PipeClient); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" - << Partition << " committing to position " << offset << " prev " << CommittedOffset - << " end " << EndOffset << " by cookie " << readId); + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); + auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); + commit->SetClientId(InternalClientId); + commit->SetOffset(offset); + Y_ABORT_UNLESS(!Session.empty()); + commit->SetSessionId(Session); - TAutoPtr req(new TEvPersQueue::TEvRequest); - req->Record.Swap(&request); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" + << Partition << " committing to position " << offset << " prev " << CommittedOffset + << " end " << EndOffset << " by cookie " << readId); - NTabletPipe::SendData(ctx, PipeClient, req.Release()); + TAutoPtr req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); + } +} + +void TPartitionActor::CommitDone(ui64 cookie, const TActorContext& ctx) { + if (CommitsInfly.empty()) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " unwaited commit-response with cookie " << cookie << "; waiting for nothing"); + return; + } + ui64 readId = CommitsInfly.front().first; + + if (cookie != readId) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " unwaited commit-response with cookie " << cookie << "; waiting for " << readId); + return; + } + + Counters.Commits.Inc(); + ClientHasAnyCommits = true; + + CommittedOffset = CommitsInfly.front().second; + CommitsInfly.pop_front(); + if (readId != Max()) //this readId is reserved for upcommits on client skipping with ClientCommitOffset + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(readId, Topic, Partition)); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); + + while (!CommitsInfly.empty() && CommitsInfly.front().second == Max()) { //this is cookies that have no effect on this partition + readId = CommitsInfly.front().first; + CommitsInfly.pop_front(); + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(readId, Topic, Partition)); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << "partition :" << Partition + << " commit done with no effect with cookie " << readId); + } + + CheckRelease(ctx); + PipeGeneration = 0; //reset tries counter - all ok } void TPartitionActor::SendPartitionReady(const TActorContext& ctx) { @@ -2050,6 +2141,7 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo if (resp.HasWriteTimestampMS()) WTime = resp.GetWriteTimestampMS(); WriteTimestampEstimateMs = resp.GetWriteTimestampEstimateMS(); + ClientHasAnyCommits = resp.GetClientHasAnyCommits(); InitDone = true; PipeGeneration = 0; //reset tries counter - all ok LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INIT DONE " << Topic->GetPrintableString() @@ -2065,43 +2157,8 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo return; } - if (!result.HasCmdReadResult()) { //this is commit response - if (CommitsInfly.empty()) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() - << " partition:" << Partition - << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for nothing"); - return; - } - ui64 readId = CommitsInfly.front().first; - - if (result.GetCookie() != readId) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() - << " partition:" << Partition - << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for " << readId); - return; - } - - Counters.Commits.Inc(); - - CommittedOffset = CommitsInfly.front().second; - CommitsInfly.pop_front(); - if (readId != Max()) //this readId is reserved for upcommits on client skipping with ClientCommitOffset - ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(readId, Topic, Partition)); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() - << " partition:" << Partition - << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); - - while (!CommitsInfly.empty() && CommitsInfly.front().second == Max()) { //this is cookies that have no effect on this partition - readId = CommitsInfly.front().first; - CommitsInfly.pop_front(); - ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(readId, Topic, Partition)); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() - << "partition :" << Partition - << " commit done with no effect with cookie " << readId); - } - - CheckRelease(ctx); - PipeGeneration = 0; //reset tries counter - all ok + if (!result.HasCmdReadResult()) { // this is commit response + CommitDone(result.GetCookie(), ctx); return; } @@ -2316,6 +2373,46 @@ void TPartitionActor::Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const TActo InitLockPartition(ctx); } +void TPartitionActor::Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev, const NActors::TActorContext& ctx) { + auto kqpIt = Kqps.find(ev->Cookie); + if (kqpIt == Kqps.end()) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("unexpected cookie at KQP create session response", NPersQueue::NErrorCode::ERROR)); + return; + } + + if (!kqpIt->second->Handle(ev, ctx)) { + const auto& record = ev->Get()->Record; + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + record.GetError(), NPersQueue::NErrorCode::ERROR)); + } +} + +void TPartitionActor::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + + auto kqpIt = Kqps.find(ev->Cookie); + if (kqpIt == Kqps.end()) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("unexpected cookie at KQP query response", NPersQueue::NErrorCode::ERROR)); + return; + } + + if (record.GetYdbStatus() != Ydb::StatusIds::SUCCESS) { + + auto kqpQueryError = TStringBuilder() << "Kqp error. Status# " << record.GetYdbStatus() << ", "; + + NYql::TIssues issues; + NYql::IssuesFromMessage(record.GetResponse().GetQueryIssues(), issues); + kqpQueryError << issues.ToString(); + + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(kqpQueryError, NPersQueue::NErrorCode::ERROR)); + return; + } + + auto step = kqpIt->second->Handle(ev, ctx); + if (step == NKikimr::NGRpcProxy::V1::TDistributedCommitHelper::ECurrentStep::DONE) { + CommitDone(ev->Cookie, ctx); + } +} + void TPartitionActor::InitStartReading(const TActorContext& ctx) { Y_ABORT_UNLESS(AllPrepareInited); diff --git a/ydb/services/lib/actors/type_definitions.h b/ydb/services/lib/actors/type_definitions.h index 6446d6965ab1..5c169e0d8948 100644 --- a/ydb/services/lib/actors/type_definitions.h +++ b/ydb/services/lib/actors/type_definitions.h @@ -1,5 +1,6 @@ #pragma once +#include "ydb/core/persqueue/utils.h" #include #include @@ -25,6 +26,7 @@ struct TTopicInitInfo { TString FolderId; NKikimrPQ::TPQTabletConfig::EMeteringMode MeteringMode; THashMap Partitions; + std::shared_ptr PartitionGraph; }; using TTopicInitInfoMap = THashMap; @@ -45,6 +47,7 @@ struct TTopicHolder { TVector Groups; THashMap Partitions; + std::shared_ptr PartitionGraph; inline static TTopicHolder FromTopicInfo(const TTopicInitInfo& info) { @@ -59,6 +62,7 @@ struct TTopicHolder { .MeteringMode = info.MeteringMode, .FullConverter = info.TopicNameConverter, .Partitions = info.Partitions, + .PartitionGraph = info.PartitionGraph }; } }; diff --git a/ydb/services/persqueue_v1/actors/commit_offset_actor.cpp b/ydb/services/persqueue_v1/actors/commit_offset_actor.cpp index 9e51df7359ec..471162c5f9ae 100644 --- a/ydb/services/persqueue_v1/actors/commit_offset_actor.cpp +++ b/ydb/services/persqueue_v1/actors/commit_offset_actor.cpp @@ -29,7 +29,6 @@ TCommitOffsetActor::TCommitOffsetActor( } - TCommitOffsetActor::~TCommitOffsetActor() = default; @@ -76,7 +75,6 @@ void TCommitOffsetActor::Bootstrap(const TActorContext& ctx) { )); } - void TCommitOffsetActor::Die(const TActorContext& ctx) { if (PipeClient) NTabletPipe::CloseClient(ctx, PipeClient); @@ -87,7 +85,6 @@ void TCommitOffsetActor::Die(const TActorContext& ctx) { } void TCommitOffsetActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "CommitOffset auth ok, got " << ev->Get()->TopicAndTablets.size() << " topics"); TopicAndTablets = std::move(ev->Get()->TopicAndTablets); if (TopicAndTablets.empty()) { @@ -102,42 +99,64 @@ void TCommitOffsetActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TAc return; } - ui64 tabletId = topicInitInfo.Partitions.at(PartitionId).TabletId; + auto commitRequest = dynamic_cast(GetProtoRequest()); - NTabletPipe::TClientConfig clientConfig; - clientConfig.RetryPolicy = { - .RetryLimitCount = 6, - .MinRetryTime = TDuration::MilliSeconds(10), - .MaxRetryTime = TDuration::MilliSeconds(100), - .BackoffMultiplier = 2, - .DoFirstRetryInstantly = true - }; - - PipeClient = ctx.Register(NTabletPipe::CreateClient(ctx.SelfID, tabletId, clientConfig)); + auto* partitionNode = topicInitInfo.PartitionGraph->GetPartition(commitRequest->partition_id()); - auto client_req = dynamic_cast(GetProtoRequest()); + if (partitionNode->AllParents.size() == 0 && partitionNode->DirectChildren.size() == 0) { + SendCommit(topicInitInfo, commitRequest, ctx); + } else { + auto killReadSession = commitRequest->read_session_id().empty(); + std::vector commits; - NKikimrClient::TPersQueueRequest request; - request.MutablePartitionRequest()->SetTopic(topicInitInfo.TopicNameConverter->GetPrimaryPath()); - request.MutablePartitionRequest()->SetPartition(client_req->partition_id()); + for (auto& parent: partitionNode->AllParents) { + TDistributedCommitHelper::TCommitInfo commit {.PartitionId = parent->Id, .Offset = Max(), .KillReadSession = killReadSession, .OnlyCheckCommitedToFinish = false}; + commits.push_back(commit); + } - Y_ABORT_UNLESS(PipeClient); + for (auto& child: partitionNode->AllChildren) { + TDistributedCommitHelper::TCommitInfo commit {.PartitionId = child->Id, .Offset = 0, .KillReadSession = killReadSession, .OnlyCheckCommitedToFinish = false}; + commits.push_back(commit); + } - auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); - commit->SetClientId(ClientId); - commit->SetOffset(client_req->offset()); - commit->SetStrict(true); + TDistributedCommitHelper::TCommitInfo commit {.PartitionId = partitionNode->Id, .Offset = commitRequest->offset(), .KillReadSession = killReadSession, .OnlyCheckCommitedToFinish = false}; - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "strict CommitOffset, partition " << client_req->partition_id() - << " committing to position " << client_req->offset() /*<< " prev " << CommittedOffset - << " end " << EndOffset << " by cookie " << readId*/); + if (!commitRequest->read_session_id().empty()) { + commit.ReadSessionId = commitRequest->read_session_id(); + } + commits.push_back(commit); - TAutoPtr req(new TEvPersQueue::TEvRequest); - req->Record.Swap(&request); + Kqp = std::make_unique(Request().GetDatabaseName().GetOrElse(TString()), ClientId, topic, commits); + Kqp->SendCreateSessionRequest(ctx); + } +} - NTabletPipe::SendData(ctx, PipeClient, req.Release()); +void TCommitOffsetActor::Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev, const NActors::TActorContext& ctx) { + if (!Kqp->Handle(ev, ctx)) { + AnswerError(ev->Get()->Record.GetError(), PersQueue::ErrorCode::ERROR, ctx); + } } +void TCommitOffsetActor::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + if (record.GetYdbStatus() != Ydb::StatusIds::SUCCESS) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "strict CommitOffset failed. Kqp error: " << ev->Get()->Record); + + Ydb::Topic::CommitOffsetResult result; + Request().SendResult(result, record.GetYdbStatus()); + Die(ctx); + return; + } + + auto step = Kqp->Handle(ev, ctx); + + if (step == TDistributedCommitHelper::ECurrentStep::DONE) { + Ydb::Topic::CommitOffsetResult result; + Request().SendResult(result, Ydb::StatusIds::SUCCESS); + Die(ctx); + return; + } +} void TCommitOffsetActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { if (ev->Get()->Record.GetStatus() != NMsgBusProxy::MSTATUS_OK) { @@ -157,18 +176,54 @@ void TCommitOffsetActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActo Die(ctx); } +void TCommitOffsetActor::SendCommit(const TTopicInitInfo& topic, const Ydb::Topic::CommitOffsetRequest* commitRequest, const TActorContext& ctx) { + ui64 tabletId = topic.Partitions.at(PartitionId).TabletId; -void TCommitOffsetActor::AnswerError(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { + NTabletPipe::TClientConfig clientConfig; + clientConfig.RetryPolicy = { + .RetryLimitCount = 6, + .MinRetryTime = TDuration::MilliSeconds(10), + .MaxRetryTime = TDuration::MilliSeconds(100), + .BackoffMultiplier = 2, + .DoFirstRetryInstantly = true + }; + + PipeClient = ctx.Register(NTabletPipe::CreateClient(ctx.SelfID, tabletId, clientConfig)); + + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(topic.TopicNameConverter->GetPrimaryPath()); + request.MutablePartitionRequest()->SetPartition(commitRequest->partition_id()); + + Y_ABORT_UNLESS(PipeClient); + auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); + commit->SetClientId(ClientId); + commit->SetOffset(commitRequest->offset()); + commit->SetStrict(true); + if (!commitRequest->read_session_id().empty()) { + commit->SetSessionId(commitRequest->read_session_id()); + } + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "strict CommitOffset, partition " << commitRequest->partition_id() + << " committing to position " << commitRequest->offset() /*<< " prev " << CommittedOffset + << " end " << EndOffset << " by cookie " << readId*/); + + TAutoPtr req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); +} + +void TCommitOffsetActor::AnswerError(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { Ydb::Topic::CommitOffsetResponse response; response.mutable_operation()->set_ready(true); auto issue = response.mutable_operation()->add_issues(); FillIssue(issue, errorCode, errorReason); - response.mutable_operation()->set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); - Reply(ConvertPersQueueInternalCodeToStatus(errorCode), response.operation().issues(), ctx); + auto status = ConvertPersQueueInternalCodeToStatus(errorCode); + response.mutable_operation()->set_status(status); + Reply(status, response.operation().issues(), ctx); } - void TCommitOffsetActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { AnswerError(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); } @@ -186,5 +241,4 @@ void TCommitOffsetActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, con AnswerError(TStringBuilder() <<"pipe to tablet destroyed" << ev->Get()->TabletId, PersQueue::ErrorCode::TABLET_PIPE_DISCONNECTED, ctx); } - } diff --git a/ydb/services/persqueue_v1/actors/commit_offset_actor.h b/ydb/services/persqueue_v1/actors/commit_offset_actor.h index 6eefc867ebb7..6daa6df3c54d 100644 --- a/ydb/services/persqueue_v1/actors/commit_offset_actor.h +++ b/ydb/services/persqueue_v1/actors/commit_offset_actor.h @@ -1,7 +1,10 @@ #pragma once #include "events.h" +#include "distributed_commit_helper.h" + +#include #include #include @@ -52,6 +55,9 @@ class TCommitOffsetActor : public TRpcOperationRequestActor Kqp; }; } diff --git a/ydb/services/persqueue_v1/actors/distributed_commit_helper.cpp b/ydb/services/persqueue_v1/actors/distributed_commit_helper.cpp new file mode 100644 index 000000000000..eb6410dc55b2 --- /dev/null +++ b/ydb/services/persqueue_v1/actors/distributed_commit_helper.cpp @@ -0,0 +1,126 @@ +#include "distributed_commit_helper.h" +#include "ydb/core/kqp/common/simple/services.h" + +namespace NKikimr::NGRpcProxy::V1 { + +TDistributedCommitHelper::TDistributedCommitHelper(TString database, TString consumer, TString path, std::vector commits, ui64 cookie) + : DataBase(database) + , Consumer(consumer) + , Path(path) + , Commits(std::move(commits)) + , Step(BEGIN_TRANSACTION_SENDED) + , Cookie(cookie) +{} + +TDistributedCommitHelper::ECurrentStep TDistributedCommitHelper::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const TActorContext& ctx) { + switch (Step) { + case BEGIN_TRANSACTION_SENDED: + Step = OFFSETS_SENDED; + SendCommits(ev, ctx); + break; + case OFFSETS_SENDED: + Step = COMMIT_SENDED; + CommitTx(ctx); + break; + case COMMIT_SENDED: + Step = DONE; + CloseKqpSession(ctx); + break; + case DONE: + break; + } + return Step; +} + +void TDistributedCommitHelper::SendCreateSessionRequest(const TActorContext& ctx) { + auto ev = MakeCreateSessionRequest(); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release(), 0, Cookie); +} + +void TDistributedCommitHelper::BeginTransaction(const NActors::TActorContext& ctx) { + auto begin = MakeHolder(); + + begin->Record.MutableRequest()->SetAction(NKikimrKqp::QUERY_ACTION_BEGIN_TX); + begin->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); + begin->Record.MutableRequest()->SetSessionId(KqpSessionId); + begin->Record.MutableRequest()->SetDatabase(DataBase); + + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), begin.Release(), 0, Cookie); +} + +bool TDistributedCommitHelper::Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev, const TActorContext& ctx) { + const auto& record = ev->Get()->Record; + + if (record.GetYdbStatus() != Ydb::StatusIds::SUCCESS) { + return false; + } + + KqpSessionId = record.GetResponse().GetSessionId(); + Y_ABORT_UNLESS(!KqpSessionId.empty()); + BeginTransaction(ctx); + return true; +} + +void TDistributedCommitHelper::CloseKqpSession(const TActorContext& ctx) { + if (KqpSessionId) { + auto ev = MakeCloseSessionRequest(); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release(), 0, Cookie); + KqpSessionId = ""; + } +} + +THolder TDistributedCommitHelper::MakeCreateSessionRequest() { + auto ev = MakeHolder(); + ev->Record.MutableRequest()->SetDatabase(DataBase); + return ev; +} + +THolder TDistributedCommitHelper::MakeCloseSessionRequest() { + auto ev = MakeHolder(); + ev->Record.MutableRequest()->SetSessionId(KqpSessionId); + return ev; +} + +void TDistributedCommitHelper::SendCommits(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const NActors::TActorContext& ctx) { + auto& record = ev->Get()->Record; + TxId = record.GetResponse().GetTxMeta().id(); + Y_ABORT_UNLESS(!TxId.empty()); + + auto offsets = MakeHolder(); + offsets->Record.MutableRequest()->SetDatabase(DataBase); + offsets->Record.MutableRequest()->SetSessionId(KqpSessionId); + offsets->Record.MutableRequest()->SetType(NKikimrKqp::QUERY_TYPE_UNDEFINED); + offsets->Record.MutableRequest()->SetAction(NKikimrKqp::QUERY_ACTION_TOPIC); + offsets->Record.MutableRequest()->MutableTxControl()->set_tx_id(TxId); + offsets->Record.MutableRequest()->MutableTopicOperations()->SetConsumer(Consumer); + + auto* topic = offsets->Record.MutableRequest()->MutableTopicOperations()->AddTopics(); + topic->set_path(Path); + + for(auto &commit: Commits) { + auto* partition = topic->add_partitions(); + partition->set_partition_id(commit.PartitionId); + partition->set_force_commit(true); + partition->set_kill_read_session(commit.KillReadSession); + partition->set_only_check_commited_to_finish(commit.OnlyCheckCommitedToFinish); + partition->set_read_session_id(commit.ReadSessionId); + auto* offset = partition->add_partition_offsets(); + offset->set_end(commit.Offset); + } + + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), offsets.Release(), 0, Cookie); +} + +void TDistributedCommitHelper::CommitTx(const NActors::TActorContext& ctx) { + auto commit = MakeHolder(); + + commit->Record.MutableRequest()->SetAction(NKikimrKqp::QUERY_ACTION_COMMIT_TX); + commit->Record.MutableRequest()->MutableTxControl()->set_tx_id(TxId); + commit->Record.MutableRequest()->MutableTxControl()->set_commit_tx(true); + commit->Record.MutableRequest()->SetSessionId(KqpSessionId); + commit->Record.MutableRequest()->SetDatabase(DataBase); + + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), commit.Release(), 0, Cookie); +} + +} // namespace NKikimr::NGRpcProxy::V1 diff --git a/ydb/services/persqueue_v1/actors/distributed_commit_helper.h b/ydb/services/persqueue_v1/actors/distributed_commit_helper.h new file mode 100644 index 000000000000..7b14d47c8e90 --- /dev/null +++ b/ydb/services/persqueue_v1/actors/distributed_commit_helper.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include + +namespace NKikimr::NGRpcProxy::V1 { + +using namespace NKikimr::NGRpcService; + +class TDistributedCommitHelper { +public: + enum ECurrentStep { + BEGIN_TRANSACTION_SENDED, + OFFSETS_SENDED, + COMMIT_SENDED, + DONE + }; + + struct TCommitInfo { + ui64 PartitionId; + i64 Offset; + bool KillReadSession; + bool OnlyCheckCommitedToFinish; + TString ReadSessionId; + }; + + TDistributedCommitHelper(TString database, TString consumer, TString path, std::vector commits, ui64 cookie = 0); + + ECurrentStep Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const TActorContext& ctx); + void SendCreateSessionRequest(const TActorContext& ctx); + void BeginTransaction(const NActors::TActorContext& ctx); + bool Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev, const TActorContext& ctx); + +private: + void CloseKqpSession(const TActorContext& ctx); + THolder MakeCreateSessionRequest(); + THolder MakeCloseSessionRequest(); + void SendCommits(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const NActors::TActorContext& ctx); + void CommitTx(const NActors::TActorContext& ctx); + +private: + TString DataBase; + TString Consumer; + TString Path; + std::vector Commits; + ECurrentStep Step; + ui64 Cookie; + + TString TxId; + TString KqpSessionId; +}; + +} // namespace NKikimr::NGRpcProxy::V1 diff --git a/ydb/services/persqueue_v1/actors/events.h b/ydb/services/persqueue_v1/actors/events.h index e19c7295dc04..bf12ade4567c 100644 --- a/ydb/services/persqueue_v1/actors/events.h +++ b/ydb/services/persqueue_v1/actors/events.h @@ -89,6 +89,7 @@ struct TEvPQProxy { EvReadingStarted, EvReadingFinished, EvAlterTopicResponse, + EvParentCommitedToFinish, EvEnd }; @@ -407,17 +408,27 @@ struct TEvPQProxy { struct TEvCommitDone : public NActors::TEventLocal { - explicit TEvCommitDone(const ui64 assignId, const ui64 startCookie, const ui64 lastCookie, const ui64 offset) + explicit TEvCommitDone(const ui64 assignId, const ui64 startCookie, const ui64 lastCookie, const ui64 offset, const ui64 endOffset) : AssignId(assignId) , StartCookie(startCookie) , LastCookie(lastCookie) , Offset(offset) + , EndOffset(endOffset) { } ui64 AssignId; ui64 StartCookie; ui64 LastCookie; ui64 Offset; + ui64 EndOffset; + }; + + struct TEvParentCommitedToFinish : public NActors::TEventLocal { + explicit TEvParentCommitedToFinish(ui64 parentPartitionId) + : ParentPartitionId(parentPartitionId) + { } + + ui64 ParentPartitionId; }; struct TEvReleasePartition : public NActors::TEventLocal { @@ -474,11 +485,12 @@ struct TEvPQProxy { }; struct TEvPartitionStatus : public NActors::TEventLocal { - TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, ui64 nodeId, ui64 generation, + TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, ui64 nodeId, ui64 generation, bool clientHasAnyCommits, bool init = true) : Partition(partition) , Offset(offset) , EndOffset(endOffset) + , ClientHasAnyCommits(clientHasAnyCommits) , WriteTimestampEstimateMs(writeTimestampEstimateMs) , NodeId(nodeId) , Generation(generation) @@ -488,6 +500,7 @@ struct TEvPQProxy { TPartitionId Partition; ui64 Offset; ui64 EndOffset; + bool ClientHasAnyCommits; ui64 WriteTimestampEstimateMs; ui64 NodeId; ui64 Generation; diff --git a/ydb/services/persqueue_v1/actors/partition_actor.cpp b/ydb/services/persqueue_v1/actors/partition_actor.cpp index 6fe140ab5fd9..22863b38308b 100644 --- a/ydb/services/persqueue_v1/actors/partition_actor.cpp +++ b/ydb/services/persqueue_v1/actors/partition_actor.cpp @@ -25,8 +25,9 @@ TPartitionActor::TPartitionActor( const TActorId& parentId, const TString& clientId, const TString& clientPath, const ui64 cookie, const TString& session, const TPartitionId& partition, const ui32 generation, const ui32 step, const ui64 tabletID, const TTopicCounters& counters, bool commitsDisabled, - const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, - bool directRead, bool useMigrationProtocol, ui32 maxTimeLagMs, ui64 readTimestampMs + const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, const TString& database, + bool directRead, bool useMigrationProtocol, ui32 maxTimeLagMs, ui64 readTimestampMs, std::set parents, + std::unordered_set notCommitedToFinishParents ) : ParentId(parentId) , ClientId(clientId) @@ -64,34 +65,35 @@ TPartitionActor::TPartitionActor( , WaitDataCookie(0) , WaitForData(false) , LockCounted(false) + , Parents(parents) , Counters(counters) , CommitsDisabled(commitsDisabled) , CommitCookie(1) , Topic(topic) + , Database(database) , DirectRead(directRead) , UseMigrationProtocol(useMigrationProtocol) , FirstRead(true) , ReadingFinishedSent(false) + , NotCommitedToFinishParents(notCommitedToFinishParents) { } void TPartitionActor::MakeCommit(const TActorContext& ctx) { ui64 offset = ClientReadOffset; - if (CommitsDisabled) - return; - if (CommitsInfly.size() > MAX_COMMITS_INFLY) + if (CommitsDisabled || NotCommitedToFinishParents.size() != 0 || CommitsInfly.size() >= MAX_COMMITS_INFLY) return; //Ranges mode if (!NextRanges.Empty() && NextRanges.Min() == ClientCommitOffset) { - auto first = NextRanges.begin(); - offset = first->second; - NextRanges.EraseInterval(first->first, first->second); + auto firstRange = NextRanges.begin(); + offset = firstRange->second; + NextRanges.EraseInterval(firstRange->first, firstRange->second); ClientCommitOffset = offset; ++CommitCookie; - CommitsInfly.push_back(std::pair(CommitCookie, {CommitCookie, offset, ctx.Now()})); + CommitsInfly.emplace_back(CommitCookie, TCommitInfo{CommitCookie, offset, ctx.Now()}); if (Counters.SLITotal) Counters.SLITotal.Inc(); @@ -106,10 +108,10 @@ void TPartitionActor::MakeCommit(const TActorContext& ctx) { if (it != NextCommits.end() && *it == 0) { //commit of readed in prev session data NextCommits.erase(NextCommits.begin()); if (ClientReadOffset <= ClientCommitOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, 0, 0, CommittedOffset)); + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, 0, 0, CommittedOffset, EndOffset)); } else { ClientCommitOffset = ClientReadOffset; - CommitsInfly.push_back(std::pair(0, {0, ClientReadOffset, ctx.Now()})); + CommitsInfly.emplace_back(0, TCommitInfo{0, ClientReadOffset, ctx.Now()}); if (Counters.SLITotal) Counters.SLITotal.Inc(); if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done @@ -141,7 +143,7 @@ void TPartitionActor::MakeCommit(const TActorContext& ctx) { Y_ABORT_UNLESS(offset > ClientCommitOffset); ClientCommitOffset = offset; - CommitsInfly.push_back(std::pair(readId, {startReadId, offset, ctx.Now()})); + CommitsInfly.emplace_back(readId, TCommitInfo{startReadId, offset, ctx.Now()}); if (Counters.SLITotal) Counters.SLITotal.Inc(); @@ -158,28 +160,81 @@ void TPartitionActor::Bootstrap(const TActorContext& ctx) { } void TPartitionActor::SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx) { - NKikimrClient::TPersQueueRequest request; - request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath()); - request.MutablePartitionRequest()->SetPartition(Partition.Partition); - request.MutablePartitionRequest()->SetCookie(readId); + if (!ClientHasAnyCommits && Parents.size() != 0) { + std::vector commits; + for (auto& parent: Parents) { + TDistributedCommitHelper::TCommitInfo commit {.PartitionId = parent->Id, .Offset = Max(), .KillReadSession = false, .OnlyCheckCommitedToFinish = true, .ReadSessionId = Session}; + commits.push_back(commit); + } + TDistributedCommitHelper::TCommitInfo commit {.PartitionId = Partition.Partition, .Offset = (i64)offset, .KillReadSession = false, .OnlyCheckCommitedToFinish = false, .ReadSessionId = Session}; + commits.push_back(commit); + auto kqp = std::make_shared(Database, ClientId, Topic->GetPrimaryPath(), commits, readId); + Kqps.emplace(readId, kqp); - Y_ABORT_UNLESS(PipeClient); + kqp->SendCreateSessionRequest(ctx); + } else { + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath()); + request.MutablePartitionRequest()->SetPartition(Partition.Partition); + request.MutablePartitionRequest()->SetCookie(readId); - ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); - auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); - commit->SetClientId(ClientId); - commit->SetOffset(offset); - Y_ABORT_UNLESS(!Session.empty()); - commit->SetSessionId(Session); + Y_ABORT_UNLESS(PipeClient); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " committing to position " << offset << " prev " << CommittedOffset - << " end " << EndOffset << " by cookie " << readId); + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); + auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); + commit->SetClientId(ClientId); + commit->SetOffset(offset); + Y_ABORT_UNLESS(!Session.empty()); + commit->SetSessionId(Session); - TAutoPtr req(new TEvPersQueue::TEvRequest); - req->Record.Swap(&request); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " committing to position " << offset << " prev " << CommittedOffset + << " end " << EndOffset << " by cookie " << readId); - NTabletPipe::SendData(ctx, PipeClient, req.Release()); + TAutoPtr req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); + } +} + +void TPartitionActor::Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev, const NActors::TActorContext& ctx) { + auto kqpIt = Kqps.find(ev->Cookie); + if (kqpIt == Kqps.end()) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("unexpected cookie at KQP create session response", PersQueue::ErrorCode::ERROR)); + return; + } + + if (!kqpIt->second->Handle(ev, ctx)) { + const auto& record = ev->Get()->Record; + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + record.GetError(), PersQueue::ErrorCode::ERROR)); + } +} + +void TPartitionActor::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + + auto kqpIt = Kqps.find(ev->Cookie); + if (kqpIt == Kqps.end()) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("unexpected cookie at KQP query response", PersQueue::ErrorCode::ERROR)); + return; + } + + if (record.GetYdbStatus() != Ydb::StatusIds::SUCCESS) { + auto kqpQueryError = TStringBuilder() << "Kqp error. Status# " << record.GetYdbStatus() << ", "; + + NYql::TIssues issues; + NYql::IssuesFromMessage(record.GetResponse().GetQueryIssues(), issues); + kqpQueryError << issues.ToString(); + + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(kqpQueryError, PersQueue::ErrorCode::ERROR)); + return; + } + + auto step = kqpIt->second->Handle(ev, ctx); + if (step == TDistributedCommitHelper::ECurrentStep::DONE) { + CommitDone(ev->Cookie, ctx); + } } void TPartitionActor::SendPublishDirectRead(const ui64 directReadId, const TActorContext& ctx) { @@ -232,7 +287,6 @@ void TPartitionActor::SendForgetDirectRead(const ui64 directReadId, const TActor NTabletPipe::SendData(ctx, PipeClient, req.Release()); } - void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) { if (!PipeClient) return; @@ -256,7 +310,6 @@ void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reaso ctx.Schedule(TDuration::MilliSeconds(RESTART_PIPE_DELAY_MS), new TEvPQProxy::TEvRestartPipe()); } - void TPartitionActor::Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx) { auto it = DirectReadResults.find(ev->Get()->DirectReadId); @@ -295,8 +348,6 @@ void TPartitionActor::Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActo } - - void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TActorContext& ctx) { Y_ABORT_UNLESS(!PipeClient); @@ -544,6 +595,10 @@ bool FillBatchedData( return hasData; } +void TPartitionActor::Handle(TEvPQProxy::TEvParentCommitedToFinish::TPtr& ev, const TActorContext& ctx) { + NotCommitedToFinishParents.erase(ev->Get()->ParentPartitionId); + MakeCommit(ctx); +} void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { @@ -614,6 +669,7 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo EndOffset = resp.GetEndOffset(); SizeLag = resp.GetSizeLag(); WriteTimestampEstimateMs = resp.GetWriteTimestampEstimateMS(); + ClientHasAnyCommits = resp.GetClientHasAnyCommits(); ClientCommitOffset = ReadOffset = CommittedOffset = resp.HasOffset() ? resp.GetOffset() : 0; Y_ABORT_UNLESS(EndOffset >= CommittedOffset); @@ -628,7 +684,7 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo if (!StartReading) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, NodeId, TabletGeneration)); + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, NodeId, TabletGeneration, ClientHasAnyCommits)); } else { InitStartReading(ctx); } @@ -679,42 +735,8 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo return; } - if (!(result.HasCmdReadResult() || result.HasCmdPrepareReadResult() || result.HasCmdPublishReadResult() || result.HasCmdForgetReadResult())) { //this is commit response - if (CommitsInfly.empty()) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for nothing"); - return; - } - ui64 readId = CommitsInfly.front().first; - - if (result.GetCookie() != readId) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for " << readId); - return; - } - - Counters.Commits.Inc(); - - ui32 commitDurationMs = (ctx.Now() - CommitsInfly.front().second.StartTime).MilliSeconds(); - if (Counters.CommitLatency) { - Counters.CommitLatency.IncFor(commitDurationMs, 1); - } - - if (Counters.SLIBigLatency && commitDurationMs >= AppData(ctx)->PQConfig.GetCommitLatencyBigMs()) { - Counters.SLIBigLatency.Inc(); - } - - CommittedOffset = CommitsInfly.front().second.Offset; - ui64 startReadId = CommitsInfly.front().second.StartReadId; - ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, startReadId, readId, CommittedOffset)); - - CommitsInfly.pop_front(); - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); - - PipeGeneration = 0; //reset tries counter - all ok - MakeCommit(ctx); + if (!(result.HasCmdReadResult() || result.HasCmdPrepareReadResult() || result.HasCmdPublishReadResult() || result.HasCmdForgetReadResult())) { // this is commit response + CommitDone(result.GetCookie(), ctx); return; } @@ -881,6 +903,46 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo PipeGeneration = 0; //reset tries counter - all ok } +void TPartitionActor::CommitDone(ui64 cookie, const TActorContext& ctx) { + if (CommitsInfly.empty()) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " unwaited commit-response with cookie " << cookie << "; waiting for nothing"); + return; + } + ui64 readId = CommitsInfly.front().first; + + if (cookie != readId) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " unwaited commit-response with cookie " << cookie << "; waiting for " << readId); + return; + } + + Counters.Commits.Inc(); + ClientHasAnyCommits = true; + + ui32 commitDurationMs = (ctx.Now() - CommitsInfly.front().second.StartTime).MilliSeconds(); + if (Counters.CommitLatency) { + Counters.CommitLatency.IncFor(commitDurationMs, 1); + } + + if (Counters.SLIBigLatency && commitDurationMs >= AppData(ctx)->PQConfig.GetCommitLatencyBigMs()) { + Counters.SLIBigLatency.Inc(); + } + + CommittedOffset = CommitsInfly.front().second.Offset; + ui64 startReadId = CommitsInfly.front().second.StartReadId; + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, startReadId, readId, CommittedOffset, EndOffset)); + + Kqps.erase(CommitsInfly.front().first); + CommitsInfly.pop_front(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); + + PipeGeneration = 0; //reset tries counter - all ok + MakeCommit(ctx); +} + void TPartitionActor::SendPartitionReady(const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition << " ready for read with readOffset " << ReadOffset << " endOffset " << EndOffset); @@ -916,13 +978,10 @@ void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << ev->Get()->TabletId, NPersQueue::NErrorCode::TABLET_PIPE_DISCONNECTED); } - - void TPartitionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr&, const TActorContext& ctx) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, NodeId, TabletGeneration, false)); + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, NodeId, TabletGeneration, ClientHasAnyCommits, false)); } - void TPartitionActor::Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { ClientReadOffset = ev->Get()->ReadOffset; ClientCommitOffset = ev->Get()->CommitOffset; @@ -996,7 +1055,7 @@ void TPartitionActor::InitStartReading(const TActorContext& ctx) { return; } Y_ABORT_UNLESS(CommitsInfly.empty()); - CommitsInfly.push_back(std::pair(Max(), {Max(), ClientCommitOffset.GetOrElse(0), ctx.Now()})); + CommitsInfly.emplace_back(Max(), TCommitInfo{Max(), ClientCommitOffset.GetOrElse(0), ctx.Now()}); if (Counters.SLITotal) Counters.SLITotal.Inc(); if (PipeClient) //pipe will be recreated soon @@ -1320,7 +1379,7 @@ void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& auto request = MakeReadRequest(ReadOffset, 0, req->MaxCount, req->MaxSize, req->MaxTimeLagMs, req->ReadTimestampMs, DirectReadId); RequestInfly = true; CurrentRequest = request; - + if (!PipeClient) //Pipe will be recreated soon return; diff --git a/ydb/services/persqueue_v1/actors/partition_actor.h b/ydb/services/persqueue_v1/actors/partition_actor.h index 1cbbc5bcddf4..b2eec72172fc 100644 --- a/ydb/services/persqueue_v1/actors/partition_actor.h +++ b/ydb/services/persqueue_v1/actors/partition_actor.h @@ -1,14 +1,17 @@ #pragma once +#include #include "events.h" #include "partition_id.h" #include #include +#include #include #include #include +#include #include #include @@ -65,15 +68,16 @@ class TPartitionActor : public NActors::TActorBootstrapped { static constexpr ui32 MAX_PIPE_RESTARTS = 100; //after 100 restarts without progress kill session static constexpr ui32 RESTART_PIPE_DELAY_MS = 100; - static constexpr ui32 MAX_COMMITS_INFLY = 3; + static constexpr ui32 MAX_COMMITS_INFLY = 1; public: TPartitionActor(const TActorId& parentId, const TString& clientId, const TString& clientPath, const ui64 cookie, const TString& session, const TPartitionId& partition, ui32 generation, ui32 step, const ui64 tabletID, const TTopicCounters& counters, const bool commitsDisabled, - const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, bool directRead, - bool useMigrationProtocol, ui32 maxTimeLagMs, ui64 readTimestampMs); + const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, const TString& database, bool directRead, + bool useMigrationProtocol, ui32 maxTimeLagMs, ui64 readTimestampMs, std::set parents, + std::unordered_set notCommitedToFinishParents); ~TPartitionActor(); void Bootstrap(const NActors::TActorContext& ctx); @@ -100,6 +104,12 @@ class TPartitionActor : public NActors::TActorBootstrapped { HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvPersQueue::TEvResponse, Handle); HFunc(TEvPersQueue::TEvHasDataInfoResponse, Handle); + + HFunc(NKqp::TEvKqp::TEvCreateSessionResponse, Handle); + HFunc(NKqp::TEvKqp::TEvQueryResponse, Handle); + + HFunc(TEvPQProxy::TEvParentCommitedToFinish, Handle); + default: break; }; @@ -123,6 +133,11 @@ class TPartitionActor : public NActors::TActorBootstrapped { void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQProxy::TEvParentCommitedToFinish::TPtr& ev, const TActorContext& ctx); + void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); void HandleWakeup(const NActors::TActorContext& ctx); void DoWakeup(const NActors::TActorContext& ctx); @@ -142,11 +157,13 @@ class TPartitionActor : public NActors::TActorBootstrapped { void SendPublishDirectRead(const ui64 directReadId, const TActorContext& ctx); void SendForgetDirectRead(const ui64 directReadId, const TActorContext& ctx); void SendPartitionReady(const TActorContext& ctx); + void CommitDone(ui64 cookie, const TActorContext& ctx); NKikimrClient::TPersQueueRequest MakeCreateSessionRequest(bool initial) const; NKikimrClient::TPersQueueRequest MakeReadRequest(ui64 readOffset, ui64 lastOffset, ui64 maxCount, ui64 maxSize, ui64 maxTimeLagMs, ui64 readTimestampMs, ui64 directReadId) const; + private: const TActorId ParentId; const TString ClientId; @@ -167,6 +184,7 @@ class TPartitionActor : public NActors::TActorBootstrapped { ui64 ReadOffset; ui64 ClientReadOffset; TMaybe ClientCommitOffset; + bool ClientHasAnyCommits; bool ClientVerifyReadOffset; ui64 CommittedOffset; ui64 WriteTimestampEstimateMs; @@ -209,12 +227,16 @@ class TPartitionActor : public NActors::TActorBootstrapped { }; std::deque> CommitsInfly; //ReadId, Offset + std::unordered_map> Kqps; + + std::set Parents; TTopicCounters Counters; bool CommitsDisabled; ui64 CommitCookie; NPersQueue::TTopicConverterPtr Topic; + TString Database; bool DirectRead = false; @@ -240,6 +262,8 @@ class TPartitionActor : public NActors::TActorBootstrapped { bool FirstRead; bool ReadingFinishedSent; + + std::unordered_set NotCommitedToFinishParents; }; diff --git a/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp b/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp index 320d8284db67..27592a7862d9 100644 --- a/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp +++ b/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp @@ -104,6 +104,7 @@ bool TReadInitAndAuthActor::ProcessTopicSchemeCacheResponse( topicsIter->second.MeteringMode = pqDescr.GetPQTabletConfig().GetMeteringMode(); topicsIter->second.DbPath = pqDescr.GetPQTabletConfig().GetYdbDatabasePath(); topicsIter->second.IsServerless = entry.DomainInfo->IsServerless(); + topicsIter->second.PartitionGraph = entry.PQGroupInfo->PartitionGraph; for (const auto& partitionDescription : pqDescr.GetPartitions()) { topicsIter->second.Partitions[partitionDescription.GetPartitionId()] = @@ -272,7 +273,16 @@ void TReadInitAndAuthActor::FinishInitialization(const TActorContext& ctx) { TTopicInitInfoMap res; for (auto& [name, holder] : Topics) { res.insert(std::make_pair(name, TTopicInitInfo{ - holder.FullConverter, holder.TabletID, holder.CloudId, holder.DbId, holder.DbPath, holder.IsServerless, holder.FolderId, holder.MeteringMode, holder.Partitions + holder.FullConverter, + holder.TabletID, + holder.CloudId, + holder.DbId, + holder.DbPath, + holder.IsServerless, + holder.FolderId, + holder.MeteringMode, + holder.Partitions, + holder.PartitionGraph })); } ctx.Send(ParentId, new TEvPQProxy::TEvAuthResultOk(std::move(res))); diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.cpp b/ydb/services/persqueue_v1/actors/read_session_actor.cpp index 71c4b29852a8..4e838f22d710 100644 --- a/ydb/services/persqueue_v1/actors/read_session_actor.cpp +++ b/ydb/services/persqueue_v1/actors/read_session_actor.cpp @@ -458,7 +458,6 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvAuth::TPtr& } } - template void TReadSessionActor::Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx) { auto it = Partitions.find(ev->Get()->AssignId); @@ -642,14 +641,14 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvCommitDone:: } auto assignId = ev->Get()->AssignId; - auto it = Partitions.find(assignId); - if (it == Partitions.end()) { + auto partitionIt = Partitions.find(assignId); + if (partitionIt == Partitions.end()) { return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() << "unknown partition_session_id " << assignId << " #01", ctx); } - Y_ABORT_UNLESS(it->second.Offset < ev->Get()->Offset); - it->second.NextRanges.EraseInterval(it->second.Offset, ev->Get()->Offset); + Y_ABORT_UNLESS(partitionIt->second.Offset < ev->Get()->Offset); + partitionIt->second.NextRanges.EraseInterval(partitionIt->second.Offset, ev->Get()->Offset); if (ev->Get()->StartCookie == Max()) { // means commit at start return; @@ -664,8 +663,8 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvCommitDone:: auto c = result.mutable_committed()->add_cookies(); c->set_partition_cookie(i); c->set_assign_id(ev->Get()->AssignId); - it->second.NextCommits.erase(i); - it->second.ReadIdCommitted = i; + partitionIt->second.NextCommits.erase(i); + partitionIt->second.ReadIdCommitted = i; } } else { // commit on cookies not supported in this case Y_ABORT_UNLESS(false); @@ -674,7 +673,7 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvCommitDone:: if constexpr (UseMigrationProtocol) { auto c = result.mutable_committed()->add_offset_ranges(); c->set_assign_id(ev->Get()->AssignId); - c->set_start_offset(it->second.Offset); + c->set_start_offset(partitionIt->second.Offset); c->set_end_offset(ev->Get()->Offset); } else { auto c = result.mutable_commit_offset_response()->add_partitions_committed_offsets(); @@ -683,14 +682,35 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvCommitDone:: } } - it->second.Offset = ev->Get()->Offset; + partitionIt->second.Offset = ev->Get()->Offset; + partitionIt->second.EndOffset = ev->Get()->EndOffset; LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " replying for commits" << ": assignId# " << ev->Get()->AssignId << ", from# " << ev->Get()->StartCookie << ", to# " << ev->Get()->LastCookie - << ", offset# " << it->second.Offset); + << ", offset# " << partitionIt->second.Offset); WriteToStreamOrDie(ctx, std::move(result)); + + if (ev->Get()->Offset == ev->Get()->EndOffset) { + auto topicName = partitionIt->second.Topic->GetInternalName(); + auto topicIt = Topics.find(partitionIt->second.Topic->GetInternalName()); + if (topicIt == Topics.end()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "unknown topic partition_session_id: " << assignId, ctx); + } + + auto& topic = topicIt->second; + for (auto& child: topic.PartitionGraph->GetPartition(partitionIt->second.Partition.Partition)->DirectChildren) { + for (auto& otherPartitions: Partitions) { + if (otherPartitions.second.Partition.Partition == child->Id) { + ctx.Send(otherPartitions.second.Actor, new TEvPQProxy::TEvParentCommitedToFinish(partitionIt->second.Partition.Partition)); + } + } + } + + } + } template @@ -1086,8 +1106,14 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvAuthResultOk return CloseSession(PersQueue::ErrorCode::OVERLOAD, TStringBuilder() << "metering mode of topic: " << name << " has been changed", ctx); } + it->second.PartitionGraph = t.PartitionGraph; } } + + while (!Locks.empty()) { + ctx.Send(ctx.SelfID, std::move(Locks.front())); + Locks.pop_front(); + } } template @@ -1196,29 +1222,36 @@ void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartit auto& converter = converterIter->second; const auto name = converter->GetInternalName(); - { - auto it = Topics.find(name); - if (it == Topics.end() || (!ReadWithoutConsumer && it->second.PipeClient != ActorIdFromProto(record.GetPipeClient()))) { - LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " ignored ev lock" - << ": path# " << name - << ", reason# " << "topic is unknown"); - return; - } + auto topicIt = Topics.find(name); + if (topicIt == Topics.end() || (!ReadWithoutConsumer && topicIt->second.PipeClient != ActorIdFromProto(record.GetPipeClient()))) { + LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " ignored ev lock" + << ": path# " << name + << ", reason# " << "topic is unknown"); + return; + } - auto& topic = it->second; + auto& topic = topicIt->second; - // TODO: counters - if (NumPartitionsFromTopic[name]++ == 0) { - if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { - SetupTopicCounters(converter, topic.CloudId, topic.DbId, topic.DbPath, topic.IsServerless, topic.FolderId); - } else { - SetupTopicCounters(converter); - } + auto* partitionNode = topic.PartitionGraph->GetPartition(record.GetPartition()); + if (!partitionNode) { + Locks.push_back(ev->Release()); + if (!AuthInitActor) { + RunAuthActor(ctx); } + return; + } - topic.Partitions.emplace(record.GetPartition(), NGRpcProxy::TPartitionInfo{record.GetTabletId()}); + // TODO: counters + if (NumPartitionsFromTopic[name]++ == 0) { + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + SetupTopicCounters(converter, topic.CloudId, topic.DbId, topic.DbPath, topic.IsServerless, topic.FolderId); + } else { + SetupTopicCounters(converter); + } } + topic.Partitions.emplace(record.GetPartition(), NGRpcProxy::TPartitionInfo{record.GetTabletId()}); + // TODO: counters auto it = TopicCounters.find(name); Y_ABORT_UNLESS(it != TopicCounters.end()); @@ -1234,10 +1267,19 @@ void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartit return CloseSession(PersQueue::ErrorCode::ERROR, error, ctx); } + std::unordered_set notCommitedToFinishParents; + for (auto& parent: topic.PartitionGraph->GetPartition(record.GetPartition())->DirectParents) { + for (auto& otherPartitions: Partitions) { // TODO: map + if (otherPartitions.second.Partition.Partition == parent->Id && otherPartitions.second.Offset != otherPartitions.second.EndOffset) { + notCommitedToFinishParents.emplace(otherPartitions.second.Partition.Partition); + } + } + } + const TActorId actorId = ctx.Register(new TPartitionActor( ctx.SelfID, ClientId, ClientPath, Cookie, Session, partitionId, record.GetGeneration(), record.GetStep(), record.GetTabletId(), it->second, CommitsDisabled, ClientDC, RangesMode, - converterIter->second, DirectRead, UseMigrationProtocol, maxLag, readTimestampMs)); + converterIter->second, Request->GetDatabaseName().GetOrElse(AppData(ctx)->PQConfig.GetDatabase()), DirectRead, UseMigrationProtocol, maxLag, readTimestampMs, topic.PartitionGraph->GetPartition(partitionId.Partition)->AllParents, notCommitedToFinishParents)); if (SessionsActive) { PartsPerSession.DecFor(Partitions.size(), 1); @@ -1275,6 +1317,12 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionSta << "unknown partition_session_id " << assignId << " #02", ctx); } + auto topicName = it->second.Topic->GetInternalName(); + auto topicIt = Topics.find(it->second.Topic->GetInternalName()); + if (topicIt == Topics.end()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "unknown topic partition_session_id: " << assignId, ctx); + } TServerMessage result; result.set_status(Ydb::StatusIds::SUCCESS); @@ -1286,6 +1334,8 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionSta it->second.LockSent = true; it->second.Offset = ev->Get()->Offset; + it->second.ConsumerHasAnyCommits = ev->Get()->ClientHasAnyCommits; + it->second.EndOffset = ev->Get()->EndOffset; if constexpr (UseMigrationProtocol) { result.mutable_assigned()->mutable_topic()->set_path(it->second.Topic->GetFederationPath()); diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.h b/ydb/services/persqueue_v1/actors/read_session_actor.h index 2f0424aca785..8dc4080a5469 100644 --- a/ydb/services/persqueue_v1/actors/read_session_actor.h +++ b/ydb/services/persqueue_v1/actors/read_session_actor.h @@ -45,11 +45,13 @@ struct TPartitionActorInfo { TSet NextCommits; TDisjointIntervalTree NextRanges; ui64 Offset; + bool ConsumerHasAnyCommits; TInstant AssignTimestamp; ui64 Generation; ui64 NodeId; + ui64 EndOffset; struct TDirectReadInfo { @@ -419,6 +421,7 @@ class TReadSessionActor TMap PartitionToControlMessages; std::deque> Reads; + std::deque> Locks; ui64 Cookie; diff --git a/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp b/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp index 1c05509307b9..60f6e39edd40 100644 --- a/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp +++ b/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp @@ -61,7 +61,22 @@ void TUpdateOffsetsInTransactionActor::Proceed(const NActors::TActorContext& ctx ev->Record.MutableRequest()->MutableTxControl()->set_tx_id(req->tx().id()); ev->Record.MutableRequest()->MutableTopicOperations()->SetConsumer(req->consumer()); - *ev->Record.MutableRequest()->MutableTopicOperations()->MutableTopics() = req->topics(); + + for (const auto& topic : req->topics()) { + auto* newTopic = ev->Record.MutableRequest()->MutableTopicOperations()->AddTopics(); + newTopic->set_path(topic.path()); + + for (const auto& partition : topic.partitions()) { + auto* newPartition = newTopic->add_partitions(); + newPartition->set_partition_id(partition.partition_id()); + + for (const auto& offsetsRange : partition.partition_offsets()) { + auto* newOffsetsRange = newPartition->add_partition_offsets(); + newOffsetsRange->set_start(offsetsRange.start()); + newOffsetsRange->set_end(offsetsRange.end()); + } + } + } ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); } diff --git a/ydb/services/persqueue_v1/actors/ya.make b/ydb/services/persqueue_v1/actors/ya.make index 75aafd8cc5d6..57adbeb98975 100644 --- a/ydb/services/persqueue_v1/actors/ya.make +++ b/ydb/services/persqueue_v1/actors/ya.make @@ -28,6 +28,8 @@ SRCS( codecs.cpp commit_offset_actor.h commit_offset_actor.cpp + distributed_commit_helper.h + distributed_commit_helper.cpp events.h persqueue_utils.h persqueue_utils.cpp