Skip to content

Commit 812d7c8

Browse files
MBkktazevaykin
andauthored
Fixes for multiple implementation tables in vector index (#6854)
Co-authored-by: azevaykin <145343289+azevaykin@users.noreply.github.com>
1 parent 6123ecb commit 812d7c8

11 files changed

+248
-206
lines changed

ydb/core/base/table_index.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ const TString* IsContains(const TVector<TString>& names, const THashSet<TString>
2424
return nullptr;
2525
}
2626

27-
bool Contains(const TVector<TString>& names, std::string_view str) {
28-
return std::find(names.begin(), names.end(), str) != names.end();
27+
bool Contains(const auto& names, std::string_view str) {
28+
return std::find(std::begin(names), std::end(names), str) != std::end(names);
2929
}
3030

31+
constexpr std::string_view ImplTables[] = {ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable};
32+
3133
}
3234

3335
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index) {
@@ -39,9 +41,6 @@ TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const T
3941
result.Keys.push_back(ik);
4042
result.Columns.emplace(ik);
4143
}
42-
} else {
43-
result.Keys.push_back(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn);
44-
result.Columns.insert(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn);
4544
}
4645

4746
for (const auto& tk : table.Keys) {
@@ -102,8 +101,13 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
102101
const bool isSecondaryIndex = indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
103102

104103
if (isSecondaryIndex) {
105-
if (index.KeyColumns == table.Keys) {
106-
explain = "table and index keys are the same";
104+
if (index.KeyColumns.size() < 1) {
105+
explain = "should be at least single index key column";
106+
return false;
107+
}
108+
if (index.KeyColumns.size() <= table.Keys.size() &&
109+
std::equal(index.KeyColumns.begin(), index.KeyColumns.end(), table.Keys.begin())) {
110+
explain = "index keys are prefix of table keys";
107111
return false;
108112
}
109113
} else {
@@ -112,7 +116,12 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
112116
return false;
113117
}
114118

119+
if (Contains(table.Keys, NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn)) {
120+
explain = TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn;
121+
return false;
122+
}
115123
if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn)) {
124+
// This isn't really needed, but it will be really strange to have column with such name but different meaning
116125
explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn;
117126
return false;
118127
}
@@ -135,7 +144,12 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
135144
}
136145

137146
bool IsImplTable(std::string_view tableName) {
138-
return std::find(std::begin(ImplTables), std::end(ImplTables), tableName) != std::end(ImplTables);
147+
return Contains(ImplTables, tableName);
148+
}
149+
150+
bool IsTmpImplTable(std::string_view tableName) {
151+
// all impl tables that ends with "tmp" should be used only for index creation and dropped when index build is finished
152+
return tableName.ends_with("tmp");
139153
}
140154

141155
}

ydb/core/base/table_index.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#pragma once
22

33
#include <ydb/core/protos/flat_scheme_op.pb.h>
4-
#include <ydb/core/base/table_vector_index.h>
54

65
#include <util/generic/hash_set.h>
76
#include <util/generic/vector.h>
@@ -21,10 +20,10 @@ struct TIndexColumns {
2120
};
2221

2322
inline constexpr const char* ImplTable = "indexImplTable";
24-
inline constexpr std::string_view ImplTables[] = {ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable};
2523

2624
bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index, TString& explain);
2725
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index);
2826
bool IsImplTable(std::string_view tableName);
27+
bool IsTmpImplTable(std::string_view tableName);
2928

3029
}

ydb/core/base/table_vector_index.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ inline constexpr const char* LevelTable_EmbeddingColumn = "-embedding";
1414
inline constexpr const char* PostingTable = "indexImplPostingTable";
1515
inline constexpr const char* PostingTable_ParentIdColumn = "-parent";
1616

17+
inline constexpr const char* TmpPostingTableSuffix0 = "0tmp";
18+
inline constexpr const char* TmpPostingTableSuffix1 = "1tmp";
1719

1820
}
1921

ydb/core/base/ut/table_index_ut.cpp

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "table_index.h"
2+
#include "table_vector_index.h"
23

34
#include <library/cpp/testing/unittest/registar.h>
45

@@ -40,39 +41,38 @@ Y_UNIT_TEST_SUITE (TableIndex) {
4041
UNIT_ASSERT(IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain));
4142
UNIT_ASSERT(explain.empty());
4243
}
43-
44-
// will be fixed:
45-
UNIT_ASSERT(IsCompatibleIndex(type, Table, {{}, {}}, explain));
46-
UNIT_ASSERT(explain.empty());
47-
48-
UNIT_ASSERT(IsCompatibleIndex(type, Table, {{"PK2"}, {}}, explain));
49-
UNIT_ASSERT(explain.empty());
5044
}
5145

5246
Y_UNIT_TEST (NotCompatibleSecondaryIndex) {
5347
TString explain;
5448
auto type = NKikimrSchemeOp::EIndexType::EIndexTypeGlobal;
5549

5650
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"NOT EXIST"}, {}}, explain));
57-
UNIT_ASSERT(!explain.empty());
51+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be in table columns, index key column NOT EXIST is missed");
5852

5953
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{}, {"NOT EXIST"}}, explain));
60-
UNIT_ASSERT(!explain.empty());
54+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index data columns should be in table columns, index data column NOT EXIST is missed");
6155

6256
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"NOT EXIST"}, {"NOT EXIST"}}, explain));
63-
UNIT_ASSERT(!explain.empty());
57+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be in table columns, index key column NOT EXIST is missed");
6458

6559
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1", "DATA1"}, {}}, explain));
66-
UNIT_ASSERT(!explain.empty());
60+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be unique, for example DATA1");
6761

6862
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1"}, {"PK2"}}, explain));
69-
UNIT_ASSERT(!explain.empty());
63+
UNIT_ASSERT_STRINGS_EQUAL(explain, "the same column can't be used as key and data column for one index, for example PK2");
7064

7165
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1"}, {"DATA1"}}, explain));
72-
UNIT_ASSERT(!explain.empty());
66+
UNIT_ASSERT_STRINGS_EQUAL(explain, "the same column can't be used as key and data column for one index, for example DATA1");
7367

7468
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1"}, {"DATA3", "DATA3"}}, explain));
75-
UNIT_ASSERT(!explain.empty());
69+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index data columns should be unique, for example DATA3");
70+
71+
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{}, {}}, explain));
72+
UNIT_ASSERT_STRINGS_EQUAL(explain, "should be at least single index key column");
73+
74+
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"PK2"}, {}}, explain));
75+
UNIT_ASSERT_STRINGS_EQUAL(explain, "index keys are prefix of table keys");
7676
}
7777

7878
Y_UNIT_TEST (CompatibleVectorIndex) {
@@ -90,46 +90,44 @@ Y_UNIT_TEST_SUITE (TableIndex) {
9090

9191
UNIT_ASSERT(IsCompatibleIndex(type, Table, {{"DATA1"}, {"DATA1"}}, explain));
9292
UNIT_ASSERT(explain.empty());
93-
94-
// will be fixed:
95-
{
96-
const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn}, {NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn}};
97-
98-
UNIT_ASSERT(IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain));
99-
UNIT_ASSERT(explain.empty());
100-
}
10193
}
10294

10395
Y_UNIT_TEST (NotCompatibleVectorIndex) {
10496
TString explain;
10597
auto type = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
10698

10799
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"NOT EXIST"}, {}}, explain));
108-
UNIT_ASSERT(!explain.empty());
100+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be in table columns, index key column NOT EXIST is missed");
109101

110102
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{}, {"NOT EXIST"}}, explain));
111-
UNIT_ASSERT(!explain.empty());
103+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index data columns should be in table columns, index data column NOT EXIST is missed");
112104

113105
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"NOT EXIST"}, {"NOT EXIST"}}, explain));
114-
UNIT_ASSERT(!explain.empty());
106+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be in table columns, index key column NOT EXIST is missed");
115107

116108
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1", "DATA1"}, {}}, explain));
117-
UNIT_ASSERT(!explain.empty());
109+
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be unique, for example DATA1");
118110

119111
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1", "DATA2"}, {}}, explain));
120-
UNIT_ASSERT(!explain.empty());
112+
UNIT_ASSERT_STRINGS_EQUAL(explain, "only single key column is supported for vector index");
121113

122114
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1"}, {"PK2"}}, explain));
123-
UNIT_ASSERT(!explain.empty());
115+
UNIT_ASSERT_STRINGS_EQUAL(explain, "the same column can't be used as key and data column for one index, for example PK2");
124116

125117
{
126118
const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn}, {"PK"}};
127119

128120
UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn}, {}}, explain));
129-
UNIT_ASSERT(!explain.empty());
121+
UNIT_ASSERT_STRINGS_EQUAL(explain, "index key column shouldn't have a reserved name: -parent");
130122

131123
UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn}}, explain));
132-
UNIT_ASSERT(!explain.empty());
124+
UNIT_ASSERT_STRINGS_EQUAL(explain, "index data column shouldn't have a reserved name: -parent");
125+
}
126+
{
127+
const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn}, {NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn}};
128+
129+
UNIT_ASSERT(!IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain));
130+
UNIT_ASSERT_STRINGS_EQUAL(explain, "table key column shouldn't have a reserved name: -parent");
133131
}
134132
}
135133
}

ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp

Lines changed: 47 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,43 @@
1313

1414
namespace NKikimr {
1515
namespace NSchemeShard {
16+
namespace {
17+
18+
ISubOperation::TPtr FinalizeIndexImplTable(TOperationContext& context, const TPath& index, const TOperationId& partId, const TString& name, const TPathId& pathId) {
19+
Y_ABORT_UNLESS(index.Child(name)->PathId == pathId);
20+
Y_ABORT_UNLESS(index.Child(name).LeafName() == name);
21+
TTableInfo::TPtr table = context.SS->Tables.at(pathId);
22+
auto transaction = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpFinalizeBuildIndexImplTable);
23+
auto operation = transaction.MutableAlterTable();
24+
operation->SetName(name);
25+
operation->MutablePartitionConfig()->MutableCompactionPolicy()->CopyFrom(table->PartitionConfig().GetCompactionPolicy());
26+
operation->MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(false);
27+
operation->MutablePartitionConfig()->SetShadowData(false);
28+
return CreateFinalizeBuildIndexImplTable(partId, transaction);
29+
}
30+
31+
ISubOperation::TPtr DropIndexImplTable(TOperationContext& context, const TPath& index, const TOperationId& nextId, const TOperationId& partId, const TString& name, const TPathId& pathId) {
32+
TPath implTable = index.Child(name);
33+
Y_ABORT_UNLESS(implTable->PathId == pathId);
34+
Y_ABORT_UNLESS(implTable.LeafName() == name);
35+
auto checks = implTable.Check();
36+
checks.NotEmpty()
37+
.IsResolved()
38+
.NotDeleted()
39+
.IsTable()
40+
.IsInsideTableIndexPath()
41+
.NotUnderDeleting()
42+
.NotUnderOperation();
43+
if (!checks) {
44+
return {CreateReject(nextId, checks.GetStatus(), checks.GetError())};
45+
}
46+
auto transaction = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable);
47+
auto operation = transaction.MutableDrop();
48+
operation->SetName(name);
49+
return CreateDropTable(partId, transaction);
50+
}
51+
52+
}
1653

1754
TVector<ISubOperation::TPtr> ApplyBuildIndex(TOperationId nextId, const TTxTransaction& tx, TOperationContext& context) {
1855
Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::EOperationType::ESchemeOpApplyIndexBuild);
@@ -50,24 +87,16 @@ TVector<ISubOperation::TPtr> ApplyBuildIndex(TOperationId nextId, const TTxTrans
5087
}
5188

5289
if (!indexName.empty()) {
53-
auto alterImplTableTransactionTemplate = [] (TPath index, TPath implIndexTable, TTableInfo::TPtr implIndexTableInfo) {
54-
auto indexImplTableAltering = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpFinalizeBuildIndexImplTable);
55-
auto alterTable = indexImplTableAltering.MutableAlterTable();
56-
alterTable->SetName(implIndexTable.LeafName());
57-
alterTable->MutablePartitionConfig()->MutableCompactionPolicy()->CopyFrom(implIndexTableInfo->PartitionConfig().GetCompactionPolicy());
58-
alterTable->MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(false);
59-
alterTable->MutablePartitionConfig()->SetShadowData(false);
60-
return indexImplTableAltering;
61-
};
62-
6390
TPath index = table.Child(indexName);
64-
for (const std::string_view implTable : NTableIndex::ImplTables) {
65-
TPath implIndexTable = index.Child(implTable.data());
66-
if (!implIndexTable.IsResolved()) {
67-
continue;
91+
Y_ABORT_UNLESS(index.Base()->GetChildren().size() >= 1);
92+
for (auto& indexChildItems : index.Base()->GetChildren()) {
93+
const auto& indexImplTableName = indexChildItems.first;
94+
const auto partId = NextPartId(nextId, result);
95+
if (NTableIndex::IsTmpImplTable(indexImplTableName)) {
96+
result.push_back(DropIndexImplTable(context, index, nextId, partId, indexImplTableName, indexChildItems.second));
97+
} else {
98+
result.push_back(FinalizeIndexImplTable(context, index, partId, indexImplTableName, indexChildItems.second));
6899
}
69-
TTableInfo::TPtr implIndexTableInfo = context.SS->Tables.at(implIndexTable.Base()->PathId);
70-
result.push_back(CreateFinalizeBuildIndexImplTable(NextPartId(nextId, result), alterImplTableTransactionTemplate(index, implIndexTable, implIndexTableInfo)));
71100
}
72101
}
73102

@@ -113,33 +142,8 @@ TVector<ISubOperation::TPtr> CancelBuildIndex(TOperationId nextId, const TTxTran
113142
TPath index = table.Child(indexName);
114143
Y_ABORT_UNLESS(index.Base()->GetChildren().size() >= 1);
115144
for (auto& indexChildItems : index.Base()->GetChildren()) {
116-
const TString& implTableName = indexChildItems.first;
117-
Y_ABORT_UNLESS(NTableIndex::IsImplTable(implTableName), "unexpected name %s", implTableName.c_str());
118-
119-
TPath implTable = index.Child(implTableName);
120-
{
121-
TPath::TChecker checks = implTable.Check();
122-
checks.NotEmpty()
123-
.IsResolved()
124-
.NotDeleted()
125-
.IsTable()
126-
.IsInsideTableIndexPath()
127-
.NotUnderDeleting()
128-
.NotUnderOperation();
129-
130-
if (!checks) {
131-
return {CreateReject(nextId, checks.GetStatus(), checks.GetError())};
132-
}
133-
}
134-
Y_ABORT_UNLESS(implTable.Base()->PathId == indexChildItems.second);
135-
136-
{
137-
auto implTableDropping = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable);
138-
auto operation = implTableDropping.MutableDrop();
139-
operation->SetName(ToString(implTable.Base()->Name));
140-
141-
result.push_back(CreateDropTable(NextPartId(nextId,result), implTableDropping));
142-
}
145+
const auto partId = NextPartId(nextId, result);
146+
result.push_back(DropIndexImplTable(context, index, nextId, partId, indexChildItems.first, indexChildItems.second));
143147
}
144148
}
145149

ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "schemeshard_path_element.h"
55
#include "schemeshard_utils.h"
66

7+
#include <ydb/core/base/table_vector_index.h>
78
#include <ydb/core/protos/flat_tx_scheme.pb.h>
89
#include <ydb/core/protos/flat_scheme_op.pb.h>
910
#include <ydb/core/ydb_convert/table_description.h>
@@ -113,11 +114,11 @@ TVector<ISubOperation::TPtr> CreateBuildIndex(TOperationId opId, const TTxTransa
113114
}
114115

115116
auto createImplTable = [&](NKikimrSchemeOp::TTableDescription&& implTableDesc) {
116-
auto outTx = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpInitiateBuildIndexImplTable);
117-
*outTx.MutableCreateTable() = implTableDesc;
118-
119117
implTableDesc.MutablePartitionConfig()->SetShadowData(true);
120118

119+
auto outTx = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpInitiateBuildIndexImplTable);
120+
*outTx.MutableCreateTable() = std::move(implTableDesc);
121+
121122
return CreateInitializeBuildIndexImplTable(NextPartId(opId, result), outTx);
122123
};
123124

@@ -130,13 +131,18 @@ TVector<ISubOperation::TPtr> CreateBuildIndex(TOperationId opId, const TTxTransa
130131
}
131132
result.push_back(createImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexLevelTableDesc)));
132133
result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc)));
134+
// TODO Maybe better to use partition from main table
135+
// This tables are temporary and handled differently in apply_build_index
136+
result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::TmpPostingTableSuffix0)));
137+
result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::TmpPostingTableSuffix1)));
133138
} else {
134139
NKikimrSchemeOp::TTableDescription indexTableDesc;
135140
// TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS
136141
if (indexDesc.IndexImplTableDescriptionsSize() == 1) {
137142
indexTableDesc = indexDesc.GetIndexImplTableDescriptions(0);
138143
}
139144
auto implTableDesc = CalcImplTableDesc(tableInfo, implTableColumns, indexTableDesc);
145+
// TODO if keep erase markers also speedup compaction or something else we can enable it for other impl tables too
140146
implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true);
141147
result.push_back(createImplTable(std::move(implTableDesc)));
142148
}

0 commit comments

Comments
 (0)