Skip to content

Commit 35e326d

Browse files
authored
HTAP: single arbiter (#9199)
1 parent acee834 commit 35e326d

File tree

3 files changed

+55
-81
lines changed

3 files changed

+55
-81
lines changed

ydb/core/kqp/executer_actor/kqp_data_executer.cpp

Lines changed: 48 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2359,7 +2359,6 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
23592359

23602360
absl::flat_hash_set<ui64> sendingShardsSet;
23612361
absl::flat_hash_set<ui64> receivingShardsSet;
2362-
absl::flat_hash_set<ui64> sendingColumnShardsSet;
23632362
absl::flat_hash_set<ui64> receivingColumnShardsSet;
23642363
ui64 arbiter = 0;
23652364
std::optional<ui64> columnShardArbiter;
@@ -2382,31 +2381,20 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
23822381
}
23832382

23842383
for (auto& [shardId, tx] : evWriteTxs) {
2385-
if (ShardIdToTableInfo->at(shardId).IsOlap && HtapTx) {
2386-
if (tx->HasLocks()) {
2387-
// Locks may be broken so shards with locks need to send readsets
2388-
sendingColumnShardsSet.insert(shardId);
2389-
}
2390-
if (ShardsWithEffects.contains(shardId)) {
2391-
// Volatile transactions may abort effects, so they send readsets
2392-
if (VolatileTx) {
2393-
sendingColumnShardsSet.insert(shardId);
2394-
}
2395-
// Effects are only applied when all locks are valid
2396-
receivingColumnShardsSet.insert(shardId);
2397-
}
2398-
} else {
2399-
if (tx->HasLocks()) {
2400-
// Locks may be broken so shards with locks need to send readsets
2384+
if (tx->HasLocks()) {
2385+
// Locks may be broken so shards with locks need to send readsets
2386+
sendingShardsSet.insert(shardId);
2387+
}
2388+
if (ShardsWithEffects.contains(shardId)) {
2389+
// Volatile transactions may abort effects, so they send readsets
2390+
if (VolatileTx) {
24012391
sendingShardsSet.insert(shardId);
24022392
}
2403-
if (ShardsWithEffects.contains(shardId)) {
2404-
// Volatile transactions may abort effects, so they send readsets
2405-
if (VolatileTx) {
2406-
sendingShardsSet.insert(shardId);
2407-
}
2408-
// Effects are only applied when all locks are valid
2409-
receivingShardsSet.insert(shardId);
2393+
// Effects are only applied when all locks are valid
2394+
receivingShardsSet.insert(shardId);
2395+
2396+
if (HtapTx && ShardIdToTableInfo->at(shardId).IsOlap) {
2397+
receivingColumnShardsSet.insert(shardId);
24102398
}
24112399
}
24122400
}
@@ -2464,13 +2452,11 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
24642452
}
24652453

24662454
if (!receivingColumnShardsSet.empty()) {
2455+
AFL_ENSURE(HtapTx);
24672456
const ui32 index = RandomNumber<ui32>(receivingColumnShardsSet.size());
24682457
auto arbiterIterator = std::begin(receivingColumnShardsSet);
24692458
std::advance(arbiterIterator, index);
24702459
columnShardArbiter = *arbiterIterator;
2471-
2472-
sendingShardsSet.insert(*columnShardArbiter);
2473-
receivingShardsSet.insert(*columnShardArbiter);
24742460
}
24752461
}
24762462

@@ -2483,13 +2469,8 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
24832469
std::sort(sendingShards.begin(), sendingShards.end());
24842470
std::sort(receivingShards.begin(), receivingShards.end());
24852471

2486-
NProtoBuf::RepeatedField<ui64> sendingColumnShards(sendingColumnShardsSet.begin(), sendingColumnShardsSet.end());
2487-
NProtoBuf::RepeatedField<ui64> receivingColumnShards(receivingColumnShardsSet.begin(), receivingColumnShardsSet.end());
2488-
2489-
std::sort(sendingColumnShards.begin(), sendingColumnShards.end());
2490-
std::sort(receivingColumnShards.begin(), receivingColumnShards.end());
2491-
24922472
for (auto& [shardId, shardTx] : datashardTxs) {
2473+
AFL_ENSURE(!columnShardArbiter);
24932474
shardTx->MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit);
24942475
*shardTx->MutableLocks()->MutableSendingShards() = sendingShards;
24952476
*shardTx->MutableLocks()->MutableReceivingShards() = receivingShards;
@@ -2498,24 +2479,46 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
24982479
}
24992480
}
25002481

2501-
for (auto& [_, tx] : evWriteTxs) {
2482+
for (auto& [shardId, tx] : evWriteTxs) {
25022483
tx->MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit);
2503-
*tx->MutableLocks()->MutableSendingShards() = sendingShards;
2504-
*tx->MutableLocks()->MutableReceivingShards() = receivingShards;
2505-
*tx->MutableLocks()->MutableSendingColumnShards() = sendingColumnShards;
2506-
*tx->MutableLocks()->MutableReceivingColumnShards() = receivingColumnShards;
2507-
if (arbiter) {
2508-
tx->MutableLocks()->SetArbiterShard(arbiter);
2509-
}
2510-
if (columnShardArbiter) {
2484+
if (columnShardArbiter && *columnShardArbiter == shardId) {
25112485
tx->MutableLocks()->SetArbiterColumnShard(*columnShardArbiter);
2486+
*tx->MutableLocks()->MutableSendingShards() = sendingShards;
2487+
*tx->MutableLocks()->MutableReceivingShards() = receivingShards;
2488+
} else if (columnShardArbiter) {
2489+
tx->MutableLocks()->SetArbiterColumnShard(*columnShardArbiter);
2490+
tx->MutableLocks()->AddSendingShards(*columnShardArbiter);
2491+
tx->MutableLocks()->AddReceivingShards(*columnShardArbiter);
2492+
if (sendingShardsSet.contains(shardId)) {
2493+
tx->MutableLocks()->AddSendingShards(shardId);
2494+
}
2495+
if (receivingShardsSet.contains(shardId)) {
2496+
tx->MutableLocks()->AddReceivingShards(shardId);
2497+
}
2498+
} else {
2499+
*tx->MutableLocks()->MutableSendingShards() = sendingShards;
2500+
*tx->MutableLocks()->MutableReceivingShards() = receivingShards;
2501+
if (arbiter) {
2502+
tx->MutableLocks()->SetArbiterShard(arbiter);
2503+
}
25122504
}
25132505
}
25142506

2515-
for (auto& [_, t] : topicTxs) {
2507+
for (auto& [shardId, t] : topicTxs) {
25162508
t.tx.SetOp(NKikimrPQ::TDataTransaction::Commit);
2517-
*t.tx.MutableSendingShards() = sendingShards;
2518-
*t.tx.MutableReceivingShards() = receivingShards;
2509+
if (columnShardArbiter) {
2510+
t.tx.AddSendingShards(*columnShardArbiter);
2511+
t.tx.AddReceivingShards(*columnShardArbiter);
2512+
if (sendingShardsSet.contains(shardId)) {
2513+
t.tx.AddSendingShards(shardId);
2514+
}
2515+
if (receivingShardsSet.contains(shardId)) {
2516+
t.tx.AddReceivingShards(shardId);
2517+
}
2518+
} else {
2519+
*t.tx.MutableSendingShards() = sendingShards;
2520+
*t.tx.MutableReceivingShards() = receivingShards;
2521+
}
25192522
YQL_ENSURE(!arbiter);
25202523
}
25212524
}

ydb/core/protos/data_events.proto

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ message TKqpLocks {
3939
optional uint64 ArbiterShard = 5;
4040

4141
optional uint64 ArbiterColumnShard = 6;
42-
repeated uint64 SendingColumnShards = 7;
43-
repeated uint64 ReceivingColumnShards = 8;
4442
}
4543

4644
message TTableId {

ydb/core/tx/columnshard/columnshard__write.cpp

Lines changed: 7 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex
271271
class TCommitOperation {
272272
private:
273273
const ui64 TabletId;
274-
bool HtapFormat = false;
275274

276275
public:
277276
using TPtr = std::shared_ptr<TCommitOperation>;
@@ -294,25 +293,19 @@ class TCommitOperation {
294293
auto& locks = evWrite.Record.GetLocks();
295294
auto& lock = evWrite.Record.GetLocks().GetLocks()[0];
296295
SendingShards = std::set<ui64>(locks.GetSendingShards().begin(), locks.GetSendingShards().end());
297-
SendingColumnShards = std::set<ui64>(locks.GetSendingColumnShards().begin(), locks.GetSendingColumnShards().end());
298296
ReceivingShards = std::set<ui64>(locks.GetReceivingShards().begin(), locks.GetReceivingShards().end());
299-
ReceivingColumnShards = std::set<ui64>(locks.GetReceivingColumnShards().begin(), locks.GetReceivingColumnShards().end());
300-
HtapFormat = locks.HasArbiterColumnShard();
301297
if (!ReceivingShards.size() || !SendingShards.size()) {
302298
ReceivingShards.clear();
303299
SendingShards.clear();
304-
} else if (!HtapFormat) {
300+
} else if (!locks.HasArbiterColumnShard()) {
305301
ArbiterColumnShard = *ReceivingShards.begin();
306302
if (!ReceivingShards.contains(TabletId) && !SendingShards.contains(TabletId)) {
307303
return TConclusionStatus::Fail("shard is incorrect for sending/receiving lists");
308304
}
309305
} else {
310-
if (!ReceivingColumnShards.size() || !SendingColumnShards.size()) {
311-
return TConclusionStatus::Fail("empty sending/receiving lists for columnshards is incorrect case");
312-
}
313306
ArbiterColumnShard = locks.GetArbiterColumnShard();
314307
AFL_VERIFY(ArbiterColumnShard);
315-
if (!ReceivingColumnShards.contains(TabletId) && !SendingColumnShards.contains(TabletId)) {
308+
if (!ReceivingShards.contains(TabletId) && !SendingShards.contains(TabletId)) {
316309
return TConclusionStatus::Fail("shard is incorrect for sending/receiving lists");
317310
}
318311
}
@@ -336,30 +329,12 @@ class TCommitOperation {
336329
std::unique_ptr<NColumnShard::TEvWriteCommitSyncTransactionOperator> CreateTxOperator(
337330
const NKikimrTxColumnShard::ETransactionKind kind) const {
338331
AFL_VERIFY(ReceivingShards.size());
339-
if (HtapFormat) {
340-
if (IsPrimary()) {
341-
std::set<ui64> fullReceiving = ReceivingShards;
342-
fullReceiving.insert(ReceivingColumnShards.begin(), ReceivingColumnShards.end());
343-
AFL_VERIFY(fullReceiving.size() + 1 == ReceivingShards.size() + ReceivingColumnShards.size());
344-
345-
std::set<ui64> fullSending = SendingShards;
346-
fullSending.insert(SendingColumnShards.begin(), SendingColumnShards.end());
347-
AFL_VERIFY(fullSending.size() + 1 == SendingShards.size() + SendingColumnShards.size());
348-
349-
return std::make_unique<NColumnShard::TEvWriteCommitPrimaryTransactionOperator>(
350-
TFullTxInfo::BuildFake(kind), LockId, fullReceiving, fullSending);
351-
} else {
352-
return std::make_unique<NColumnShard::TEvWriteCommitSecondaryTransactionOperator>(TFullTxInfo::BuildFake(kind), LockId,
353-
ArbiterColumnShard, ReceivingColumnShards.contains(TabletId));
354-
}
332+
if (IsPrimary()) {
333+
return std::make_unique<NColumnShard::TEvWriteCommitPrimaryTransactionOperator>(
334+
TFullTxInfo::BuildFake(kind), LockId, ReceivingShards, SendingShards);
355335
} else {
356-
if (IsPrimary()) {
357-
return std::make_unique<NColumnShard::TEvWriteCommitPrimaryTransactionOperator>(
358-
TFullTxInfo::BuildFake(kind), LockId, ReceivingShards, SendingShards);
359-
} else {
360-
return std::make_unique<NColumnShard::TEvWriteCommitSecondaryTransactionOperator>(TFullTxInfo::BuildFake(kind), LockId,
361-
ArbiterColumnShard, ReceivingShards.contains(TabletId));
362-
}
336+
return std::make_unique<NColumnShard::TEvWriteCommitSecondaryTransactionOperator>(TFullTxInfo::BuildFake(kind), LockId,
337+
ArbiterColumnShard, ReceivingShards.contains(TabletId));
363338
}
364339
}
365340

@@ -370,8 +345,6 @@ class TCommitOperation {
370345
YDB_READONLY(ui64, TxId, 0);
371346
YDB_READONLY_DEF(std::set<ui64>, SendingShards);
372347
YDB_READONLY_DEF(std::set<ui64>, ReceivingShards);
373-
YDB_READONLY_DEF(std::set<ui64>, SendingColumnShards);
374-
YDB_READONLY_DEF(std::set<ui64>, ReceivingColumnShards);
375348
ui64 ArbiterColumnShard = 0;
376349
};
377350

0 commit comments

Comments
 (0)