Skip to content

Commit f721ad0

Browse files
committed
Fix TIndexBuildInfo::AddParent access beyond map end (#20527) (#20529)
1 parent 2c5c1c1 commit f721ad0

File tree

3 files changed

+68
-3
lines changed

3 files changed

+68
-3
lines changed

ydb/core/tx/schemeshard/schemeshard_info_types.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,7 +2221,9 @@ void TIndexBuildInfo::AddParent(const TSerializedTableRange& range, TShardIdx sh
22212221
const auto [parentFrom, parentTo] = KMeans.Parent == 0
22222222
? std::pair<NTableIndex::TClusterId, NTableIndex::TClusterId>{0, 0}
22232223
: KMeans.RangeToBorders(range);
2224-
// TODO(mbkkt) We can make it more granular
2224+
// TODO Make it more granular - now we just merge all intersecting ranges.
2225+
// So if all shards have ranges like [1 2] [2 3] [3 4] and so on then we scan
2226+
// all shards for each cluster even though we could only scan 2 shards for it.
22252227

22262228
// the new range does not intersect with other ranges, just add it with 1 shard
22272229
auto itFrom = Cluster2Shards.lower_bound(parentFrom);
@@ -2233,7 +2235,7 @@ void TIndexBuildInfo::AddParent(const TSerializedTableRange& range, TShardIdx sh
22332235
// otherwise, this range has multiple shards and we need to merge all intersecting ranges
22342236
auto itTo = parentTo < itFrom->first ? itFrom : Cluster2Shards.lower_bound(parentTo);
22352237
if (itTo == Cluster2Shards.end()) {
2236-
itTo = Cluster2Shards.rbegin().base();
2238+
itTo--;
22372239
}
22382240
if (itTo->first < parentTo) {
22392241
const bool needsToReplaceFrom = itFrom == itTo;

ydb/core/tx/schemeshard/schemeshard_info_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3596,7 +3596,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
35963596
indexInfo->KMeans.Rounds = NTableIndex::NTableVectorKmeansTreeIndex::DefaultKMeansRounds;
35973597
TString createError;
35983598
indexInfo->Clusters = NKikimr::NKMeans::CreateClusters(desc.settings().settings(), indexInfo->KMeans.Rounds, createError);
3599-
Y_ENSURE(indexInfo->Clusters && createError == "");
3599+
Y_ENSURE(indexInfo->Clusters, createError);
36003600
indexInfo->SpecializedIndexDescription = std::move(desc);
36013601
} break;
36023602
case NKikimrSchemeOp::TIndexCreationConfig::SPECIALIZEDINDEXDESCRIPTION_NOT_SET:

ydb/core/tx/schemeshard/ut_base/ut_info_types.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,4 +239,67 @@ ColumnFamilies {
239239
"{ 0 -> 0, 1 -> 1 }");
240240
}
241241

242+
Y_UNIT_TEST(IndexBuildInfoAddParent) {
243+
TIndexBuildInfo info;
244+
245+
info.KMeans.ParentBegin = 1;
246+
info.KMeans.Parent = 1;
247+
info.KMeans.ChildBegin = 201;
248+
info.KMeans.Child = 201;
249+
250+
TVector<TCell> from = {TCell::Make((ui64)1), TCell::Make(123)};
251+
TVector<TCell> to = {TCell::Make((ui64)3), TCell::Make(123)};
252+
auto range = TSerializedTableRange(from, true, to, true);
253+
info.AddParent(range, TShardIdx(1, 1));
254+
255+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 1);
256+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(3).From, 1);
257+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(3).Shards.size(), 1);
258+
259+
// add an intersecting shard. it even crashed here until 02.07.2025 :)
260+
from = {TCell::Make((ui64)3), TCell::Make(123)};
261+
to = {TCell::Make((ui64)4), TCell::Make(123)};
262+
range = TSerializedTableRange(from, true, to, true);
263+
info.AddParent(range, TShardIdx(1, 2));
264+
265+
// Cluster2Shards is a rather stupid thing - for now, it just merges all intersecting
266+
// shard ranges into a single item, thus losing the range info for individual cluster IDs
267+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 1);
268+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).From, 1);
269+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).Shards.size(), 2);
270+
271+
// add a non-intersecting shard
272+
from = {TCell::Make((ui64)5), TCell::Make(123)};
273+
to = {TCell::Make((ui64)6), TCell::Make(123)};
274+
range = TSerializedTableRange(from, true, to, true);
275+
info.AddParent(range, TShardIdx(1, 3));
276+
277+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 2);
278+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).From, 1);
279+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(4).Shards.size(), 2);
280+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(6).From, 5);
281+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(6).Shards.size(), 1);
282+
283+
// incomplete range is "from (99, +infinity)", thus equal to "from [100]"
284+
from = {TCell::Make((ui64)99)};
285+
to = {TCell::Make((ui64)100), TCell::Make(123)};
286+
range = TSerializedTableRange(from, true, to, true);
287+
info.AddParent(range, TShardIdx(1, 4));
288+
289+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 3);
290+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(100).From, 100);
291+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(100).Shards.size(), 1);
292+
293+
// insert and check something before an existing item
294+
from = {TCell::Make((ui64)50), TCell::Make(123)};
295+
to = {TCell::Make((ui64)51), TCell::Make(123)};
296+
range = TSerializedTableRange(from, true, to, true);
297+
info.AddParent(range, TShardIdx(1, 5));
298+
299+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.size(), 4);
300+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(51).From, 50);
301+
UNIT_ASSERT_VALUES_EQUAL(info.Cluster2Shards.at(51).Shards.size(), 1);
302+
303+
}
304+
242305
}

0 commit comments

Comments
 (0)