Skip to content

Commit a9e6d19

Browse files
committed
Fix vector index build test and make it reproduce #18236, #18278 and #18355 (#18724)
1 parent 529314d commit a9e6d19

File tree

6 files changed

+224
-165
lines changed

6 files changed

+224
-165
lines changed

ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,10 @@ THolder<TEvSchemeShard::TEvModifySchemeTransaction> CreateBuildPropose(
371371
auto cell = TCell::Make(i);
372372
op.AddSplitBoundary()->SetSerializedKeyPrefix(TSerializedCellVec::Serialize({&cell, 1}));
373373
}
374+
// Prevent merging partitions
375+
policy.SetMinPartitionsCount(32768);
376+
policy.SetMaxPartitionsCount(0);
374377
}
375-
policy.SetMinPartitionsCount(op.SplitBoundarySize() + 1);
376-
policy.SetMaxPartitionsCount(op.SplitBoundarySize() + 1);
377378

378379
LOG_DEBUG_S((TlsActivationContext->AsActorContext()), NKikimrServices::BUILD_INDEX,
379380
"CreateBuildPropose " << buildInfo.Id << " " << buildInfo.State << " " << propose->Record.ShortDebugString());

ydb/core/tx/schemeshard/ut_helpers/helpers.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,9 +1714,9 @@ namespace NSchemeShardUT_Private {
17141714
if (cfg.KMeansTreeSettings) {
17151715
cfg.KMeansTreeSettings->SerializeTo(kmeansTreeSettings);
17161716
} else {
1717-
// some random valid settings
1718-
kmeansTreeSettings.mutable_settings()->set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT);
1719-
kmeansTreeSettings.mutable_settings()->set_vector_dimension(42);
1717+
// valid settings for tests - uint8 vectors of size 4
1718+
kmeansTreeSettings.mutable_settings()->set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UINT8);
1719+
kmeansTreeSettings.mutable_settings()->set_vector_dimension(4);
17201720
kmeansTreeSettings.mutable_settings()->set_metric(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE);
17211721
kmeansTreeSettings.set_clusters(4);
17221722
// More than 2 is too long for reboot tests
@@ -2603,4 +2603,28 @@ namespace NSchemeShardUT_Private {
26032603
SendNextValRequest(runtime, sender, path);
26042604
return WaitNextValResult(runtime, sender, expectedStatus);
26052605
}
2606+
2607+
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
2608+
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns,
2609+
const TString& rangeFlags)
2610+
{
2611+
TStringBuilder keyFmt;
2612+
for (const auto& k : pk) {
2613+
keyFmt << "'('" << k << " (Null) (Void)) ";
2614+
}
2615+
const auto columnsFmt = "'" + JoinSeq(" '", columns);
2616+
2617+
NKikimrMiniKQL::TResult result;
2618+
TString error;
2619+
NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, tabletId, Sprintf(R"((
2620+
(let range '(%s%s))
2621+
(let columns '(%s))
2622+
(let result (SelectRange '__user__%s range columns '()))
2623+
(return (AsList (SetResult 'Result result) ))
2624+
))", rangeFlags.data(), keyFmt.data(), columnsFmt.data(), table.data()), result, error);
2625+
UNIT_ASSERT_VALUES_EQUAL_C(status, NKikimrProto::EReplyStatus::OK, error);
2626+
UNIT_ASSERT_VALUES_EQUAL(error, "");
2627+
2628+
return result;
2629+
}
26062630
}

ydb/core/tx/schemeshard/ut_helpers/helpers.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ namespace NSchemeShardUT_Private {
636636
void UpdateRow(TTestActorRuntime& runtime, const TString& table, const ui32 key, const TString& value, ui64 tabletId = TTestTxConfig::FakeHiveTablets);
637637
void UpdateRowPg(TTestActorRuntime& runtime, const TString& table, const ui32 key, ui32 value, ui64 tabletId = TTestTxConfig::FakeHiveTablets);
638638
void UploadRow(TTestActorRuntime& runtime, const TString& tablePath, int partitionIdx, const TVector<ui32>& keyTags, const TVector<ui32>& valueTags, const TVector<TCell>& keys, const TVector<TCell>& values);
639+
void WriteOp(TTestActorRuntime& runtime, ui64 schemeshardId, const ui64 txId, const TString& tablePath, int partitionIdx, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const std::vector<ui32>& columnIds, TSerializedCellMatrix&& data, bool successIsExpected);
639640
void WriteRow(TTestActorRuntime& runtime, ui64 schemeshardId, const ui64 txId, const TString& tablePath, int partitionIdx, const ui32 key, const TString& value, bool successIsExpected = true);
640641
void WriteRow(TTestActorRuntime& runtime, const ui64 txId, const TString& tablePath, int partitionIdx, const ui32 key, const TString& value, bool successIsExpected = true);
641642
void DeleteRow(TTestActorRuntime& runtime, ui64 schemeshardId, const ui64 txId, const TString& tablePath, int partitionIdx, const ui32 key, bool successIsExpected = true);
@@ -649,4 +650,8 @@ namespace NSchemeShardUT_Private {
649650
TTestActorRuntime& runtime, const TString& path,
650651
Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS);
651652

653+
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
654+
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns, const TString& rangeFlags = "");
655+
656+
652657
} //NSchemeShardUT_Private

ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -119,28 +119,6 @@ Y_UNIT_TEST_SUITE(TAsyncIndexTests) {
119119
return mainTabletIds;
120120
}
121121

122-
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
123-
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns)
124-
{
125-
TStringBuilder keyFmt;
126-
for (const auto& k : pk) {
127-
keyFmt << "'('" << k << " (Null) (Void)) ";
128-
}
129-
const auto columnsFmt = "'" + JoinSeq(" '", columns);
130-
131-
NKikimrMiniKQL::TResult result;
132-
TString error;
133-
NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, tabletId, Sprintf(R"((
134-
(let range '(%s))
135-
(let columns '(%s))
136-
(let result (SelectRange '__user__%s range columns '()))
137-
(return (AsList (SetResult 'Result result) ))
138-
))", keyFmt.data(), columnsFmt.data(), table.data()), result, error);
139-
UNIT_ASSERT_VALUES_EQUAL_C(status, NKikimrProto::EReplyStatus::OK, error);
140-
141-
return result;
142-
}
143-
144122
struct TTableTraits {
145123
TString Path;
146124
TVector<TString> Key;

ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp

Lines changed: 116 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <ydb/core/tx/datashard/datashard.h>
99
#include <ydb/core/metering/metering.h>
1010

11+
#include <ydb/public/lib/deprecated/kicli/kicli.h>
1112
#include <ydb-cpp-sdk/client/table/table.h>
1213

1314
using namespace NKikimr;
@@ -79,30 +80,37 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) {
7980

8081
// Just create main table
8182
TestCreateTable(runtime, tenantSchemeShard, ++txId, "/MyRoot/ServerLessDB", R"(
82-
Name: "Table"
83-
Columns { Name: "key" Type: "Uint32" }
84-
Columns { Name: "embedding" Type: "String" }
85-
KeyColumnNames: ["key"]
83+
Name: "Table"
84+
Columns { Name: "key" Type: "Uint32" }
85+
Columns { Name: "embedding" Type: "String" }
86+
KeyColumnNames: ["key"]
87+
SplitBoundary { KeyPrefix { Tuple { Optional { Uint32: 50 } } } }
88+
SplitBoundary { KeyPrefix { Tuple { Optional { Uint32: 150 } } } }
8689
)");
8790
env.TestWaitNotification(runtime, txId, tenantSchemeShard);
8891

89-
auto fnWriteRow = [&](ui64 tabletId, ui32 key, TString embedding, const char* table) {
90-
TString writeQuery = Sprintf(R"(
91-
(
92-
(let key '( '('key (Uint32 '%u ) ) ) )
93-
(let row '( '('embedding (String '%s ) ) ) )
94-
(return (AsList (UpdateRow '__user__%s key row) ))
95-
)
96-
)", key, embedding.c_str(), table);
97-
NKikimrMiniKQL::TResult result;
98-
TString err;
99-
NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, tabletId, writeQuery, result, err);
100-
UNIT_ASSERT_VALUES_EQUAL(err, "");
101-
UNIT_ASSERT_VALUES_EQUAL(status, NKikimrProto::EReplyStatus::OK);
92+
// Write data directly into shards
93+
auto fillRows = [&](const TString & tablePath, ui32 shard, ui32 min, ui32 max) {
94+
TVector<TCell> cells;
95+
ui8 str[6] = { 0 };
96+
str[4] = (ui8)Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UINT8;
97+
for (ui32 key = min; key < max; ++key) {
98+
str[0] = ((key+106)* 7) % 256;
99+
str[1] = ((key+106)*17) % 256;
100+
str[2] = ((key+106)*37) % 256;
101+
str[3] = ((key+106)*47) % 256;
102+
cells.emplace_back(TCell::Make(key));
103+
cells.emplace_back(TCell((const char*)str, 5));
104+
}
105+
std::vector<ui32> columnIds{1, 2};
106+
TSerializedCellMatrix matrix(cells, max-min, 2);
107+
WriteOp(runtime, tenantSchemeShard, ++txId, tablePath,
108+
shard, NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT,
109+
columnIds, std::move(matrix), true);
102110
};
103-
for (ui32 key = 0; key < 200; ++key) {
104-
fnWriteRow(TTestTxConfig::FakeHiveTablets + 6, key, std::to_string(key), "Table");
105-
}
111+
fillRows("/MyRoot/ServerLessDB/Table", 0, 0, 50);
112+
fillRows("/MyRoot/ServerLessDB/Table", 1, 50, 150);
113+
fillRows("/MyRoot/ServerLessDB/Table", 2, 150, 200);
106114

107115
runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE);
108116
runtime.SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE);
@@ -118,18 +126,95 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) {
118126
meteringMessages << event->Get()->MeteringJson;
119127
});
120128

121-
TestBuildVectorIndex(runtime, ++txId, tenantSchemeShard, "/MyRoot/ServerLessDB", "/MyRoot/ServerLessDB/Table", "index1", "embedding");
129+
TBlockEvents<TEvDataShard::TEvReshuffleKMeansRequest> reshuffleBlocker(runtime, [&](const auto& ) {
130+
return true;
131+
});
132+
133+
AsyncBuildVectorIndex(runtime, ++txId, tenantSchemeShard, "/MyRoot/ServerLessDB", "/MyRoot/ServerLessDB/Table", "index1", "embedding");
122134
ui64 buildIndexId = txId;
123135

136+
// Wait for the first "reshuffle" request (samples will be already collected on the first level)
137+
// and reboot the scheme shard to verify that its intermediate state is persisted correctly.
138+
// The bug checked here: Sample.Probability was not persisted (#18236).
139+
runtime.WaitFor("ReshuffleKMeansRequest", [&]{ return reshuffleBlocker.size(); });
140+
Cerr << "... rebooting scheme shard" << Endl;
141+
RebootTablet(runtime, tenantSchemeShard, runtime.AllocateEdgeActor());
142+
143+
// Now wait for the 1st level to be finalized
144+
TBlockEvents<TEvSchemeShard::TEvModifySchemeTransaction> level1Blocker(runtime, [&](auto& ev) {
145+
const auto& record = ev->Get()->Record;
146+
if (record.GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpInitiateBuildIndexImplTable) {
147+
txId = record.GetTxId();
148+
return true;
149+
}
150+
return false;
151+
});
152+
reshuffleBlocker.Stop();
153+
reshuffleBlocker.Unblock(reshuffleBlocker.size());
154+
155+
// Reshard the first level table (0build)
156+
// First bug checked here: after restarting the schemeshard during reshuffle it
157+
// generates more clusters than requested and dies with VERIFY on shard boundaries (#18278).
158+
// Second bug checked here: posting table doesn't contain all rows from the main table
159+
// when the build table is resharded during build (#18355).
160+
{
161+
auto indexDesc = DescribePath(runtime, tenantSchemeShard, "/MyRoot/ServerLessDB/Table/index1/indexImplPostingTable0build", true, true, true);
162+
auto parts = indexDesc.GetPathDescription().GetTablePartitions();
163+
UNIT_ASSERT_EQUAL(parts.size(), 4);
164+
ui64 cluster = 1;
165+
for (const auto & x: parts) {
166+
TestSplitTable(runtime, tenantSchemeShard, ++txId, "/MyRoot/ServerLessDB/Table/index1/indexImplPostingTable0build", Sprintf(R"(
167+
SourceTabletId: %lu
168+
SplitBoundary { KeyPrefix { Tuple { Optional { Uint64: %lu } } Tuple { Optional { Uint32: 50 } } } }
169+
SplitBoundary { KeyPrefix { Tuple { Optional { Uint64: %lu } } Tuple { Optional { Uint32: 150 } } } }
170+
)", x.GetDatashardId(), cluster, cluster));
171+
env.TestWaitNotification(runtime, txId);
172+
cluster++;
173+
}
174+
}
175+
176+
level1Blocker.Stop();
177+
level1Blocker.Unblock(level1Blocker.size());
178+
179+
// Now wait for the index build
180+
{
181+
auto expectedStatus = Ydb::StatusIds::SUCCESS;
182+
TAutoPtr<IEventHandle> handle;
183+
TEvIndexBuilder::TEvCreateResponse* event = runtime.GrabEdgeEvent<TEvIndexBuilder::TEvCreateResponse>(handle);
184+
UNIT_ASSERT(event);
185+
186+
Cerr << "BUILDINDEX RESPONSE CREATE: " << event->ToString() << Endl;
187+
UNIT_ASSERT_EQUAL_C(event->Record.GetStatus(), expectedStatus,
188+
"status mismatch"
189+
<< " got " << Ydb::StatusIds::StatusCode_Name(event->Record.GetStatus())
190+
<< " expected " << Ydb::StatusIds::StatusCode_Name(expectedStatus)
191+
<< " issues was " << event->Record.GetIssues());
192+
}
193+
194+
env.TestWaitNotification(runtime, buildIndexId, tenantSchemeShard);
195+
196+
// Check row count in the posting table
197+
{
198+
auto indexDesc = DescribePath(runtime, tenantSchemeShard, "/MyRoot/ServerLessDB/Table/index1/indexImplPostingTable", true, true, true);
199+
auto parts = indexDesc.GetPathDescription().GetTablePartitions();
200+
ui32 rows = 0;
201+
for (const auto & x: parts) {
202+
auto result = ReadTable(runtime, x.GetDatashardId(), "indexImplPostingTable",
203+
{NKikimr::NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, "key"}, {"key"});
204+
auto value = NClient::TValue::Create(result);
205+
rows += value["Result"]["List"].Size();
206+
}
207+
Cerr << "... posting table contains " << rows << " rows" << Endl;
208+
UNIT_ASSERT_VALUES_EQUAL(rows, 200);
209+
}
210+
124211
auto listing = TestListBuildIndex(runtime, tenantSchemeShard, "/MyRoot/ServerLessDB");
125212
UNIT_ASSERT_VALUES_EQUAL(listing.EntriesSize(), 1);
126213

127-
env.TestWaitNotification(runtime, txId, tenantSchemeShard);
128-
129-
auto descr = TestGetBuildIndex(runtime, tenantSchemeShard, "/MyRoot/ServerLessDB", txId);
214+
auto descr = TestGetBuildIndex(runtime, tenantSchemeShard, "/MyRoot/ServerLessDB", buildIndexId);
130215
UNIT_ASSERT_VALUES_EQUAL(descr.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_DONE);
131216

132-
const TString meteringData = R"({"usage":{"start":0,"quantity":128,"finish":0,"unit":"request_unit","type":"delta"},"tags":{},"id":"106-72075186233409549-2-0-0-0-0-200-0-1290-0","cloud_id":"CLOUD_ID_VAL","source_wt":0,"source_id":"sless-docapi-ydb-ss","resource_id":"DATABASE_ID_VAL","schema":"ydb.serverless.requests.v1","folder_id":"FOLDER_ID_VAL","version":"1.0.0"})""\n";
217+
const TString meteringData = R"({"usage":{"start":0,"quantity":431,"finish":0,"unit":"request_unit","type":"delta"},"tags":{},"id":"109-72075186233409549-2-0-0-0-0-619-605-11328-10960","cloud_id":"CLOUD_ID_VAL","source_wt":0,"source_id":"sless-docapi-ydb-ss","resource_id":"DATABASE_ID_VAL","schema":"ydb.serverless.requests.v1","folder_id":"FOLDER_ID_VAL","version":"1.0.0"})""\n";
133218

134219
UNIT_ASSERT_NO_DIFF(meteringMessages, meteringData);
135220

@@ -152,6 +237,7 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) {
152237
)");
153238
env.TestWaitNotification(runtime, txId, tenantSchemeShard);
154239

240+
Cerr << "... rebooting scheme shard" << Endl;
155241
RebootTablet(runtime, tenantSchemeShard, runtime.AllocateEdgeActor());
156242

157243
TestDescribeResult(DescribePath(runtime, tenantSchemeShard, "/MyRoot/ServerLessDB/Table"),
@@ -200,16 +286,14 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) {
200286
NLs::ExtractTenantSchemeshard(&tenantSchemeShard)});
201287

202288
TestCreateTable(runtime, tenantSchemeShard, ++txId, "/MyRoot/CommonDB", R"(
203-
Name: "Table"
204-
Columns { Name: "key" Type: "Uint32" }
205-
Columns { Name: "embedding" Type: "String" }
206-
KeyColumnNames: ["key"]
289+
Name: "Table"
290+
Columns { Name: "key" Type: "Uint32" }
291+
Columns { Name: "embedding" Type: "String" }
292+
KeyColumnNames: ["key"]
207293
)");
208294
env.TestWaitNotification(runtime, txId, tenantSchemeShard);
209295

210-
for (ui32 key = 100; key < 300; ++key) {
211-
fnWriteRow(TTestTxConfig::FakeHiveTablets + 6, key, std::to_string(key), "Table");
212-
}
296+
fillRows("/MyRoot/CommonDB/Table", 0, 100, 300);
213297

214298
TVector<TString> billRecords;
215299
observerHolder = runtime.AddObserver<NMetering::TEvMetering::TEvWriteMeteringJson>([&](NMetering::TEvMetering::TEvWriteMeteringJson::TPtr& event) {

0 commit comments

Comments
 (0)