Skip to content

Commit 46736d5

Browse files
committed
Make shard range index more granular and fix the "shards do not intersect" error (#20574) (#20585)
1 parent 6a79e27 commit 46736d5

File tree

2 files changed

+117
-66
lines changed

2 files changed

+117
-66
lines changed

ydb/core/tx/schemeshard/schemeshard_info_types.cpp

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,44 +2380,55 @@ void TIndexBuildInfo::AddParent(const TSerializedTableRange& range, TShardIdx sh
23802380
// For Parent == 0 only single kmeans needed, so there are two options:
23812381
// 1. It fits entirely in the single shard => local kmeans for single shard
23822382
// 2. It doesn't fit entirely in the single shard => global kmeans for all shards
2383-
const auto [parentFrom, parentTo] = KMeans.Parent == 0
2383+
auto [parentFrom, parentTo] = KMeans.Parent == 0
23842384
? std::pair<NTableIndex::TClusterId, NTableIndex::TClusterId>{0, 0}
23852385
: KMeans.RangeToBorders(range);
2386-
// TODO Make it more granular - now we just merge all intersecting ranges.
2387-
// So if all shards have ranges like [1 2] [2 3] [3 4] and so on then we scan
2388-
// all shards for each cluster even though we could only scan 2 shards for it.
23892386

2390-
// the new range does not intersect with other ranges, just add it with 1 shard
23912387
auto itFrom = Cluster2Shards.lower_bound(parentFrom);
23922388
if (itFrom == Cluster2Shards.end() || parentTo < itFrom->second.From) {
2389+
// The new range does not intersect with other ranges, just add it with 1 shard
23932390
Cluster2Shards.emplace_hint(itFrom, parentTo, TClusterShards{.From = parentFrom, .Shards = {shard}});
23942391
return;
23952392
}
23962393

2397-
// otherwise, this range has multiple shards and we need to merge all intersecting ranges
2398-
auto itTo = parentTo < itFrom->first ? itFrom : Cluster2Shards.lower_bound(parentTo);
2399-
if (itTo == Cluster2Shards.end()) {
2400-
itTo--;
2394+
for (auto it = itFrom; it != Cluster2Shards.end() && it->second.From <= parentTo && it->first >= parentFrom; it++) {
2395+
// The new shard may only intersect with existing shards by its starting or ending edge
2396+
Y_ENSURE(it->second.From == parentTo || it->first == parentFrom);
24012397
}
2402-
if (itTo->first < parentTo) {
2403-
const bool needsToReplaceFrom = itFrom == itTo;
2404-
auto node = Cluster2Shards.extract(itTo);
2405-
node.key() = parentTo;
2406-
itTo = Cluster2Shards.insert(Cluster2Shards.end(), std::move(node));
2407-
itFrom = needsToReplaceFrom ? itTo : itFrom;
2408-
}
2409-
auto& [toFrom, toShards] = itTo->second;
24102398

2411-
toFrom = std::min(toFrom, parentFrom);
2412-
toShards.emplace_back(shard);
2399+
if (parentFrom == itFrom->first) {
2400+
// Intersects by parentFrom
2401+
if (itFrom->second.From < itFrom->first) {
2402+
Cluster2Shards.emplace_hint(itFrom, itFrom->first-1, itFrom->second);
2403+
itFrom->second.From = parentFrom;
2404+
}
2405+
itFrom->second.Shards.push_back(shard);
2406+
// Increment to also check intersection by parentTo
2407+
itFrom++;
2408+
if (parentTo == parentFrom) {
2409+
return;
2410+
}
2411+
parentFrom++;
2412+
}
24132413

2414-
while (itFrom != itTo) {
2415-
const auto& [fromFrom, fromShards] = itFrom->second;
2416-
toFrom = std::min(toFrom, fromFrom);
2417-
Y_ASSERT(!fromShards.empty());
2418-
toShards.insert(toShards.end(), fromShards.begin(), fromShards.end());
2419-
itFrom = Cluster2Shards.erase(itFrom);
2414+
if (parentTo == itFrom->second.From) {
2415+
// Intersects by parentTo
2416+
if (itFrom->second.From < itFrom->first) {
2417+
auto endShards = itFrom->second.Shards;
2418+
endShards.push_back(shard);
2419+
Cluster2Shards.emplace_hint(itFrom, parentTo, TClusterShards{.From = parentTo, .Shards = std::move(endShards)});
2420+
itFrom->second.From = parentTo+1;
2421+
} else {
2422+
itFrom->second.Shards.push_back(shard);
2423+
}
2424+
if (parentTo == parentFrom) {
2425+
return;
2426+
}
2427+
parentTo--;
24202428
}
2429+
2430+
// Add the remaining range
2431+
Cluster2Shards.emplace_hint(itFrom, parentTo, TClusterShards{.From = parentFrom, .Shards = {shard}});
24212432
}
24222433

24232434
TColumnFamiliesMerger::TColumnFamiliesMerger(NKikimrSchemeOp::TPartitionConfig &container)

ydb/core/tx/schemeshard/ut_base/ut_info_types.cpp

Lines changed: 81 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -247,58 +247,98 @@ ColumnFamilies {
247247
info.KMeans.ChildBegin = 201;
248248
info.KMeans.Child = 201;
249249

250-
TVector<TCell> from = {TCell::Make((ui64)1), TCell::Make(123)};
251-
TVector<TCell> to = {TCell::Make((ui64)3), TCell::Make(123)};
252-
auto range = TSerializedTableRange(from, true, to, true);
253-
info.AddParent(range, TShardIdx(1, 1));
250+
auto addShard = [&](ui64 fromCluster, ui64 toCluster, ui32 shard, bool partialFrom = false) {
251+
TVector<TCell> from = {TCell::Make(fromCluster), TCell::Make(123)};
252+
if (partialFrom) {
253+
from.pop_back();
254+
}
255+
TVector<TCell> to = {TCell::Make(toCluster), TCell::Make(123)};
256+
auto range = TSerializedTableRange(from, true, to, true);
257+
info.AddParent(range, TShardIdx(1, shard));
258+
};
259+
auto checkShards = [&](ui64 from, ui64 to, std::vector<ui32> shards) {
260+
auto rng = info.Cluster2Shards.at(to);
261+
if (rng.From != from || rng.Shards.size() != shards.size()) {
262+
return false;
263+
}
264+
for (size_t i = 0; i < shards.size(); i++) {
265+
if (rng.Shards[i] != TShardIdx(1, shards[i])) {
266+
return false;
267+
}
268+
}
269+
return true;
270+
};
254271

272+
// no intersection + empty list
273+
addShard(1, 3, 1);
255274
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 1);
256-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(3).From, 1);
257-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(3).Shards.size(), 1);
275+
UNIT_ASSERT(checkShards(1, 3, {1}));
258276

259277
// add an intersecting shard. it even crashed here until 02.07.2025 :)
260-
from = {TCell::Make((ui64)3), TCell::Make(123)};
261-
to = {TCell::Make((ui64)4), TCell::Make(123)};
262-
range = TSerializedTableRange(from, true, to, true);
263-
info.AddParent(range, TShardIdx(1, 2));
278+
// intersection by start + both larger than 1
279+
addShard(3, 4, 2);
280+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 3);
281+
UNIT_ASSERT(checkShards(1, 2, {1}));
282+
UNIT_ASSERT(checkShards(3, 3, {1, 2}));
283+
UNIT_ASSERT(checkShards(4, 4, {2}));
264284

265-
// Cluster2Shards is a rather stupid thing - for now, it just merges all intersecting
266-
// shard ranges into a single item, thus losing the range info for individual cluster IDs
267-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 1);
268-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).From, 1);
269-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).Shards.size(), 2);
285+
// intersection by start + both 1 (duplicate range)
286+
// incomplete range is "from (3, +infinity)", thus equal to "from 4"
287+
addShard(3, 4, 3, true);
288+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 3);
289+
UNIT_ASSERT(checkShards(4, 4, {2, 3}));
270290

271-
// add a non-intersecting shard
272-
from = {TCell::Make((ui64)5), TCell::Make(123)};
273-
to = {TCell::Make((ui64)6), TCell::Make(123)};
274-
range = TSerializedTableRange(from, true, to, true);
275-
info.AddParent(range, TShardIdx(1, 3));
291+
// intersection by start + old 1 + new larger than 1
292+
addShard(4, 6, 4);
293+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 4);
294+
UNIT_ASSERT(checkShards(4, 4, {2, 3, 4}));
295+
UNIT_ASSERT(checkShards(5, 6, {4}));
276296

277-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 2);
278-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).From, 1);
279-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).Shards.size(), 2);
280-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(6).From, 5);
281-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(6).Shards.size(), 1);
297+
// intersection by start + old larger than 1 + new 1
298+
addShard(6, 6, 5);
299+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 5);
300+
UNIT_ASSERT(checkShards(5, 5, {4}));
301+
UNIT_ASSERT(checkShards(6, 6, {4, 5}));
282302

283-
// incomplete range is "from (99, +infinity)", thus equal to "from [100]"
284-
from = {TCell::Make((ui64)99)};
285-
to = {TCell::Make((ui64)100), TCell::Make(123)};
286-
range = TSerializedTableRange(from, true, to, true);
287-
info.AddParent(range, TShardIdx(1, 4));
303+
// no intersection + after non-empty list
304+
addShard(19, 20, 6);
305+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 6);
306+
UNIT_ASSERT(checkShards(19, 20, {6}));
288307

289-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 3);
290-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(100).From, 100);
291-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(100).Shards.size(), 1);
308+
// intersection by end + both larger than 1
309+
addShard(18, 19, 7);
310+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 8);
311+
UNIT_ASSERT(checkShards(18, 18, {7}));
312+
UNIT_ASSERT(checkShards(19, 19, {6, 7}));
313+
UNIT_ASSERT(checkShards(20, 20, {6}));
292314

293-
// insert and check something before an existing item
294-
from = {TCell::Make((ui64)50), TCell::Make(123)};
295-
to = {TCell::Make((ui64)51), TCell::Make(123)};
296-
range = TSerializedTableRange(from, true, to, true);
297-
info.AddParent(range, TShardIdx(1, 5));
315+
// intersection by end + both 1 (duplicate range)
316+
addShard(18, 18, 8);
317+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 8);
318+
UNIT_ASSERT(checkShards(18, 18, {7, 8}));
298319

299-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 4);
300-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(51).From, 50);
301-
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(51).Shards.size(), 1);
320+
// intersection by end + old 1 + new larger than 1
321+
addShard(16, 18, 9);
322+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 9);
323+
UNIT_ASSERT(checkShards(16, 17, {9}));
324+
UNIT_ASSERT(checkShards(18, 18, {7, 8, 9}));
325+
326+
// intersection by end + old larger than 1 + new 1
327+
addShard(16, 16, 10);
328+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 10);
329+
UNIT_ASSERT(checkShards(16, 16, {9, 10}));
330+
UNIT_ASSERT(checkShards(17, 17, {9}));
331+
332+
// intersection by both
333+
addShard(7, 9, 11);
334+
addShard(13, 15, 12);
335+
addShard(9, 13, 13);
336+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 15);
337+
UNIT_ASSERT(checkShards(7, 8, {11}));
338+
UNIT_ASSERT(checkShards(9, 9, {11, 13}));
339+
UNIT_ASSERT(checkShards(10, 12, {13}));
340+
UNIT_ASSERT(checkShards(13, 13, {12, 13}));
341+
UNIT_ASSERT(checkShards(14, 15, {12}));
302342

303343
}
304344

0 commit comments

Comments
 (0)