Skip to content

Commit 8c57cab

Browse files
authored
Create prefix impl table and ability to create prefixed vector index (#14530)
1 parent 1ee0489 commit 8c57cab

17 files changed

+351
-102
lines changed

ydb/core/base/table_index.cpp

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,22 @@ bool Contains(const auto& names, std::string_view str) {
2828
return std::find(std::begin(names), std::end(names), str) != std::end(names);
2929
}
3030

31-
constexpr std::string_view ImplTables[] = {
32-
ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable,
31+
bool ContainsSystemColumn(const auto& columns) {
32+
for (const auto& column : columns) {
33+
if (column.StartsWith(SYSTEM_COLUMN_PREFIX)) {
34+
return true;
35+
}
36+
}
37+
return false;
38+
}
39+
40+
const TString ImplTables[] = {
41+
ImplTable,
42+
NTableVectorKmeansTreeIndex::LevelTable,
43+
NTableVectorKmeansTreeIndex::PostingTable,
44+
NTableVectorKmeansTreeIndex::PrefixTable,
45+
TString{NTableVectorKmeansTreeIndex::PostingTable} + NTableVectorKmeansTreeIndex::BuildSuffix0,
46+
TString{NTableVectorKmeansTreeIndex::PostingTable} + NTableVectorKmeansTreeIndex::BuildSuffix1,
3347
};
3448

3549
constexpr std::string_view GlobalSecondaryImplTables[] = {
@@ -42,18 +56,21 @@ constexpr std::string_view GlobalKMeansTreeImplTables[] = {
4256
};
4357
static_assert(std::is_sorted(std::begin(GlobalKMeansTreeImplTables), std::end(GlobalKMeansTreeImplTables)));
4458

59+
constexpr std::string_view PrefixedGlobalKMeansTreeImplTables[] = {
60+
NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable, NTableVectorKmeansTreeIndex::PrefixTable,
61+
};
62+
static_assert(std::is_sorted(std::begin(PrefixedGlobalKMeansTreeImplTables), std::end(PrefixedGlobalKMeansTreeImplTables)));
63+
4564
}
4665

4766
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index) {
4867
TTableColumns result;
4968

5069
const bool isSecondaryIndex = type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
51-
if (isSecondaryIndex) {
52-
for (const auto& ik : index.KeyColumns) {
53-
result.Keys.push_back(ik);
54-
result.Columns.emplace(ik);
55-
}
56-
}
70+
std::for_each(index.KeyColumns.begin(), index.KeyColumns.end() - (isSecondaryIndex ? 0 : 1), [&] (const auto& ik) {
71+
result.Keys.push_back(ik);
72+
result.Columns.emplace(ik);
73+
});
5774

5875
for (const auto& tk : table.Keys) {
5976
if (result.Columns.emplace(tk).second) {
@@ -112,40 +129,32 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
112129

113130
const bool isSecondaryIndex = indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
114131

132+
if (index.KeyColumns.size() < 1) {
133+
explain = "should be at least single index key column";
134+
return false;
135+
}
115136
if (isSecondaryIndex) {
116-
if (index.KeyColumns.size() < 1) {
117-
explain = "should be at least single index key column";
118-
return false;
119-
}
120137
if (index.KeyColumns == table.Keys) {
121138
explain = "index keys shouldn't be table keys";
122139
return false;
123140
}
124141
} else {
125-
if (index.KeyColumns.size() != 1) {
126-
explain = "only single key column is supported for vector index";
127-
return false;
128-
}
129-
130-
if (Contains(table.Keys, NTableVectorKmeansTreeIndex::ParentColumn)) {
131-
explain = TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
142+
if (ContainsSystemColumn(table.Keys)) {
143+
explain = TStringBuilder() << "table key column shouldn't have a reserved name";
132144
return false;
133145
}
134-
if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::ParentColumn)) {
135-
// This isn't really needed, but it will be really strange to have column with such name but different meaning
136-
explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
146+
if (ContainsSystemColumn(index.KeyColumns)) {
147+
explain = TStringBuilder() << "index key column shouldn't have a reserved name";
137148
return false;
138149
}
139-
if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::ParentColumn)) {
140-
explain = TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
150+
if (ContainsSystemColumn(index.DataColumns)) {
151+
explain = TStringBuilder() << "index data column shouldn't have a reserved name";
141152
return false;
142153
}
143154
}
144155
tmp.clear();
145156
tmp.insert(table.Keys.begin(), table.Keys.end());
146-
if (isSecondaryIndex) {
147-
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end());
148-
}
157+
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end() - (isSecondaryIndex ? 0 : 1));
149158
if (const auto* broken = IsContains(index.DataColumns, tmp, true)) {
150159
explain = TStringBuilder()
151160
<< "the same column can't be used as key and data column for one index, for example " << *broken;
@@ -154,9 +163,13 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
154163
return true;
155164
}
156165

157-
std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType) {
166+
std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span<const TString> indexKeys) {
158167
if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) {
159-
return GlobalKMeansTreeImplTables;
168+
if (indexKeys.size() == 1) {
169+
return GlobalKMeansTreeImplTables;
170+
} else {
171+
return PrefixedGlobalKMeansTreeImplTables;
172+
}
160173
} else {
161174
return GlobalSecondaryImplTables;
162175
}
@@ -168,7 +181,8 @@ bool IsImplTable(std::string_view tableName) {
168181

169182
bool IsBuildImplTable(std::string_view tableName) {
170183
// all impl tables that ends with "build" should be used only for index creation and dropped when index build is finished
171-
return tableName.ends_with("build");
184+
return tableName.ends_with(NTableVectorKmeansTreeIndex::BuildSuffix0)
185+
|| tableName.ends_with(NTableVectorKmeansTreeIndex::BuildSuffix1);
172186
}
173187

174188
}

ydb/core/base/table_index.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010
#include <span>
1111
#include <string_view>
1212

13-
namespace NKikimr::NTableIndex {
13+
namespace NKikimr {
14+
15+
inline constexpr const char* SYSTEM_COLUMN_PREFIX = "__ydb_";
16+
17+
namespace NTableIndex {
1418

1519
struct TTableColumns {
1620
THashSet<TString> Columns;
@@ -27,8 +31,9 @@ inline constexpr const char* ImplTable = "indexImplTable";
2731
bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index, TString& explain);
2832
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index);
2933

30-
std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType);
34+
std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span<const TString> indexKeys);
3135
bool IsImplTable(std::string_view tableName);
3236
bool IsBuildImplTable(std::string_view tableName);
3337

3438
}
39+
}

ydb/core/base/table_vector_index.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,7 @@ inline constexpr const char* PostingTable = "indexImplPostingTable";
1818
inline constexpr const char* BuildSuffix0 = "0build";
1919
inline constexpr const char* BuildSuffix1 = "1build";
2020

21+
// Prefix table
22+
inline constexpr const char* PrefixTable = "indexImplPrefixTable";
23+
2124
}

ydb/core/base/ut/table_index_ut.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ Y_UNIT_TEST_SUITE (TableIndex) {
9393

9494
UNIT_ASSERT(IsCompatibleIndex(type, Table, {{"DATA1"}, {"DATA1"}}, explain));
9595
UNIT_ASSERT_STRINGS_EQUAL(explain, "");
96+
97+
UNIT_ASSERT(IsCompatibleIndex(type, Table, {{"DATA1", "DATA2"}, {}}, explain));
98+
UNIT_ASSERT_STRINGS_EQUAL(explain, "");
9699
}
97100

98101
Y_UNIT_TEST (NotCompatibleVectorIndex) {
@@ -111,26 +114,23 @@ Y_UNIT_TEST_SUITE (TableIndex) {
111114
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1", "DATA1"}, {}}, explain));
112115
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be unique, for example DATA1");
113116

114-
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1", "DATA2"}, {}}, explain));
115-
UNIT_ASSERT_STRINGS_EQUAL(explain, "only single key column is supported for vector index");
116-
117117
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1"}, {"PK2"}}, explain));
118118
UNIT_ASSERT_STRINGS_EQUAL(explain, "the same column can't be used as key and data column for one index, for example PK2");
119119

120120
{
121121
const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {"PK"}};
122122

123123
UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::ParentColumn}, {}}, explain));
124-
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
124+
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name");
125125

126126
UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::ParentColumn}}, explain));
127-
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
127+
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name");
128128
}
129129
{
130130
const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {NTableVectorKmeansTreeIndex::ParentColumn}};
131131

132132
UNIT_ASSERT(!IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain));
133-
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
133+
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name");
134134
}
135135
}
136136
}

ydb/core/kqp/gateway/kqp_metadata_loader.cpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -401,15 +401,14 @@ TString GetDebugString(const std::pair<NKikimr::TIndexId, TString>& id) {
401401
return TStringBuilder() << " Path: " << id.second << " TableId: " << id.first;
402402
}
403403

404-
void UpdateMetadataIfSuccess(NYql::TKikimrTableMetadataPtr& implTable, TTableMetadataResult& value) {
404+
void UpdateMetadataIfSuccess(NYql::TKikimrTableMetadataPtr* implTable, TTableMetadataResult& value) {
405+
YQL_ENSURE(implTable);
405406
YQL_ENSURE(value.Success());
406-
if (!implTable) {
407-
implTable = std::move(value.Metadata);
408-
return;
407+
while (*implTable) {
408+
YQL_ENSURE((*implTable)->Name < value.Metadata->Name);
409+
implTable = &(*implTable)->Next;
409410
}
410-
YQL_ENSURE(!implTable->Next);
411-
YQL_ENSURE(implTable->Name < value.Metadata->Name);
412-
implTable->Next = std::move(value.Metadata);
411+
*implTable = std::move(value.Metadata);
413412
}
414413

415414
void SetError(TTableMetadataResult& externalDataSourceMetadata, const TString& error) {
@@ -630,7 +629,7 @@ NThreading::TFuture<TTableMetadataResult> TKqpTableMetadataLoader::LoadIndexMeta
630629

631630
for (size_t i = 0; i < indexesCount; i++) {
632631
const auto& index = tableMetadata->Indexes[i];
633-
const auto implTablePaths = NSchemeHelpers::CreateIndexTablePath(tableName, index.Type, index.Name);
632+
const auto implTablePaths = NSchemeHelpers::CreateIndexTablePath(tableName, index);
634633
for (const auto& implTablePath : implTablePaths) {
635634
if (!index.SchemaVersion) {
636635
LOG_DEBUG_S(*ActorSystem, NKikimrServices::KQP_GATEWAY, "Load index metadata without schema version check index: " << index.Name);
@@ -664,13 +663,12 @@ NThreading::TFuture<TTableMetadataResult> TKqpTableMetadataLoader::LoadIndexMeta
664663
result.Metadata->ImplTables.resize(indexesCount);
665664
auto it = children.begin();
666665
for (size_t i = 0; i < indexesCount; i++) {
667-
for (const auto& _ : NTableIndex::GetImplTables(NYql::TIndexDescription::ConvertIndexType(
668-
result.Metadata->Indexes[i].Type))) {
666+
for (const auto& _ : result.Metadata->Indexes[i].GetImplTables()) {
669667
YQL_ENSURE(it != children.end());
670668
auto value = it++->ExtractValue();
671669
result.AddIssues(value.Issues());
672670
if (loadOk && (loadOk = value.Success())) {
673-
UpdateMetadataIfSuccess(result.Metadata->ImplTables[i], value);
671+
UpdateMetadataIfSuccess(&result.Metadata->ImplTables[i], value);
674672
}
675673
}
676674
}

ydb/core/kqp/gateway/utils/scheme_helpers.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ bool SplitTablePath(const TString& tableName, const TString& database, std::pair
4646
}
4747
}
4848

49-
TVector<TString> CreateIndexTablePath(const TString& tableName, NYql::TIndexDescription::EType indexType, const TString& indexName) {
50-
auto implTables = NTableIndex::GetImplTables(NYql::TIndexDescription::ConvertIndexType(indexType));
49+
TVector<TString> CreateIndexTablePath(const TString& tableName, const NYql::TIndexDescription& index) {
50+
const auto implTables = index.GetImplTables();
5151
TVector<TString> paths;
5252
paths.reserve(implTables.size());
5353
for (const auto& implTable : implTables) {
54-
paths.emplace_back(TStringBuilder() << tableName << "/" << indexName << "/" << implTable);
54+
paths.emplace_back(TStringBuilder() << tableName << "/" << index.Name << "/" << implTable);
5555
}
5656
return paths;
5757
}

ydb/core/kqp/gateway/utils/scheme_helpers.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ bool TrySplitTablePath(const TString& path, std::pair<TString, TString>& result,
2222
bool SplitTablePath(const TString& tableName, const TString& database, std::pair<TString, TString>& pathPair,
2323
TString& error, bool createDir);
2424

25-
TVector<TString> CreateIndexTablePath(const TString& tableName, NYql::TIndexDescription::EType indexType, const TString& indexName);
25+
TVector<TString> CreateIndexTablePath(const TString& tableName, const NYql::TIndexDescription& index);
2626

2727
bool SetDatabaseForLoginOperation(TString& result, bool getDomainLoginOnly, TMaybe<TString> domainName,
2828
const TString& database);

ydb/core/kqp/provider/yql_kikimr_exec.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,7 +1847,7 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
18471847
TStringBuilder() << "Unknown index name: " << indexName));
18481848
return SyncError();
18491849
}
1850-
auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(table.Metadata->Name, indexIter->Type, indexName);
1850+
auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(table.Metadata->Name, *indexIter);
18511851
if (indexTablePaths.size() != 1) {
18521852
ctx.AddError(
18531853
TIssue(ctx.GetPosition(indexSetting.Name().Pos()),
@@ -2642,7 +2642,7 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
26422642
if (auto maybeAnalyze = TMaybeNode<TKiAnalyzeTable>(input)) {
26432643
if (!SessionCtx->Config().FeatureFlags.GetEnableColumnStatistics()) {
26442644
ctx.AddError(TIssue("ANALYZE command is not supported because `EnableColumnStatistics` feature flag is off"));
2645-
return SyncError();
2645+
return SyncError();
26462646
}
26472647

26482648
auto cluster = TString(maybeAnalyze.Cast().DataSink().Cluster());

ydb/core/kqp/provider/yql_kikimr_gateway.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <ydb/core/external_sources/external_source_factory.h>
2020
#include <ydb/core/kqp/query_data/kqp_query_data.h>
2121
#include <ydb/core/kqp/query_data/kqp_prepared_query.h>
22+
#include <ydb/core/base/table_index.h>
2223
#include <ydb/core/protos/flat_scheme_op.pb.h>
2324
#include <ydb/core/protos/kqp.pb.h>
2425
#include <ydb/core/protos/kqp_stats.pb.h>
@@ -205,6 +206,10 @@ struct TIndexDescription {
205206
return true;
206207
}
207208
}
209+
210+
std::span<const std::string_view> GetImplTables() const {
211+
return NKikimr::NTableIndex::GetImplTables(NYql::TIndexDescription::ConvertIndexType(Type), KeyColumns);
212+
}
208213
};
209214

210215
struct TColumnFamily {
@@ -547,11 +552,15 @@ struct TKikimrTableMetadata : public TThrRefBase {
547552
auto it = message->GetSecondaryGlobalIndexMetadata().begin();
548553
ImplTables.reserve(indexesCount);
549554
for(int i = 0; i < indexesCount; ++i) {
550-
YQL_ENSURE(it != message->GetSecondaryGlobalIndexMetadata().end());
551-
auto& implTable = ImplTables.emplace_back(MakeIntrusive<TKikimrTableMetadata>(&*it++));
552-
if (Indexes[i].Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree) {
555+
decltype(ImplTables)::value_type* implTable = nullptr;
556+
for (const auto& _ : Indexes[i].GetImplTables()) {
553557
YQL_ENSURE(it != message->GetSecondaryGlobalIndexMetadata().end());
554-
implTable->Next = MakeIntrusive<TKikimrTableMetadata>(&*it++);
558+
if (implTable) {
559+
implTable = &ImplTables.emplace_back(MakeIntrusive<TKikimrTableMetadata>(&*it++));
560+
} else {
561+
(*implTable)->Next = MakeIntrusive<TKikimrTableMetadata>(&*it++);
562+
implTable = &(*implTable)->Next;
563+
}
555564
}
556565
}
557566
YQL_ENSURE(it == message->GetSecondaryGlobalIndexMetadata().end());

ydb/core/kqp/provider/yql_kikimr_opt_build.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ struct TKiExploreTxResults {
136136
});
137137
YQL_ENSURE(indexIt != tableMeta->Indexes.end(), "Index not found");
138138

139-
const auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, indexIt->Type, indexName);
139+
const auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, *indexIt);
140140

141141
THashSet<TString> indexColumns;
142142
indexColumns.reserve(indexIt->KeyColumns.size() + indexIt->DataColumns.size());
@@ -187,7 +187,7 @@ struct TKiExploreTxResults {
187187
continue;
188188
}
189189

190-
const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index.Type, index.Name);
190+
const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index);
191191
YQL_ENSURE(indexTables.size() == 1, "Only index with one impl table is supported");
192192
const auto indexTable = indexTables[0];
193193

@@ -211,7 +211,7 @@ struct TKiExploreTxResults {
211211
continue;
212212
}
213213

214-
const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index.Type, index.Name);
214+
const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index);
215215
YQL_ENSURE(indexTables.size() == 1, "Only index with one impl table is supported");
216216
const auto indexTable = indexTables[0];
217217

0 commit comments

Comments
 (0)