Skip to content

Commit 96b86ff

Browse files
committed
Implement index-only searches with covering vector indexes (#17770) (#18137)
1 parent 89e8285 commit 96b86ff

11 files changed

+278
-632
lines changed

ydb/core/base/table_index.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,11 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
154154
}
155155
tmp.clear();
156156
tmp.insert(table.Keys.begin(), table.Keys.end());
157-
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end() - (isSecondaryIndex ? 0 : 1));
157+
if (isSecondaryIndex) {
158+
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end());
159+
} else {
160+
// Vector indexes allow to add all columns both to index & data
161+
}
158162
if (const auto* broken = IsContains(index.DataColumns, tmp, true)) {
159163
explain = TStringBuilder()
160164
<< "the same column can't be used as key and data column for one index, for example " << *broken;

ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp

Lines changed: 45 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ using namespace NYql::NNodes;
1717

1818
namespace {
1919

20-
TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& /* table */, TPositionHandle pos, TExprContext& ctx,
21-
const auto& columnsToSelect) {
20+
TCoAtomList BuildKeyColumnsList(TPositionHandle pos, TExprContext& ctx, const auto& columnsToSelect) {
2221
TVector<TExprBase> columnsList;
2322
columnsList.reserve(columnsToSelect.size());
2423
for (auto column : columnsToSelect) {
@@ -35,7 +34,7 @@ TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& /* table */, TPos
3534
}
3635

3736
TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& table, TPositionHandle pos, TExprContext& ctx) {
38-
return BuildKeyColumnsList(table, pos, ctx, table.Metadata->KeyColumnNames);
37+
return BuildKeyColumnsList(pos, ctx, table.Metadata->KeyColumnNames);
3938
}
4039

4140
TCoAtomList MergeColumns(const NNodes::TCoAtomList& col1, const TVector<TString>& col2, TExprContext& ctx) {
@@ -306,21 +305,20 @@ struct TReadMatch {
306305
}
307306
};
308307

309-
template<typename TRead>
310-
bool CheckIndexCovering(const TRead& read, const TIntrusivePtr<TKikimrTableMetadata>& indexMeta) {
311-
for (const auto& col : read.Columns()) {
308+
bool CheckIndexCovering(const TCoAtomList& readColumns, const TIntrusivePtr<TKikimrTableMetadata>& indexMeta) {
309+
for (const auto& col : readColumns) {
312310
if (!indexMeta->Columns.contains(col.StringValue())) {
313-
return true;
311+
return false;
314312
}
315313
}
316-
return false;
314+
return true;
317315
}
318316

319317
TExprBase DoRewriteIndexRead(const TReadMatch& read, TExprContext& ctx,
320318
const TKikimrTableDescription& tableDesc, TIntrusivePtr<TKikimrTableMetadata> indexMeta, bool useStreamLookup,
321319
const TVector<TString>& extraColumns, const std::function<TExprBase(const TExprBase&)>& middleFilter = {})
322320
{
323-
const bool needDataRead = CheckIndexCovering(read, indexMeta);
321+
const bool isCovered = CheckIndexCovering(read.Columns(), indexMeta);
324322

325323
if (read.FullScan()) {
326324
const auto indexName = read.Index().StringValue();
@@ -329,7 +327,7 @@ TExprBase DoRewriteIndexRead(const TReadMatch& read, TExprContext& ctx,
329327
ctx.AddWarning(issue);
330328
}
331329

332-
if (!needDataRead) {
330+
if (isCovered) {
333331
// We can read all data from index table.
334332
auto ret = read.BuildRead(ctx, BuildTableMeta(*indexMeta, read.Pos(), ctx), read.Columns());
335333

@@ -522,22 +520,37 @@ void VectorReadLevel(
522520

523521
void VectorReadMain(
524522
TExprContext& ctx, TPositionHandle pos,
525-
const TKqpTable& postingTable, const TCoAtomList& postingColumns,
526-
const TKqpTable& mainTable, const TCoAtomList& mainColumns,
523+
const TKqpTable& postingTable,
524+
const TIntrusivePtr<TKikimrTableMetadata> & postingTableMeta,
525+
const TKqpTable& mainTable,
526+
const TIntrusivePtr<TKikimrTableMetadata> & mainTableMeta,
527+
const TCoAtomList& mainColumns,
527528
TExprNodePtr& read)
528529
{
529-
// TODO(mbkkt) handle covered index columns
530-
read = Build<TKqlLookupTable>(ctx, pos)
531-
.Table(postingTable)
532-
.LookupKeys(read)
533-
.Columns(postingColumns)
534-
.Done().Ptr();
530+
const bool isCovered = CheckIndexCovering(mainColumns, postingTableMeta);
535531

536-
read = Build<TKqlLookupTable>(ctx, pos)
537-
.Table(mainTable)
538-
.LookupKeys(read)
539-
.Columns(mainColumns)
540-
.Done().Ptr();
532+
if (!isCovered) {
533+
const auto postingColumns = BuildKeyColumnsList(pos, ctx, mainTableMeta->KeyColumnNames);
534+
535+
read = Build<TKqlLookupTable>(ctx, pos)
536+
.Table(postingTable)
537+
.LookupKeys(read)
538+
.Columns(postingColumns)
539+
.Done().Ptr();
540+
541+
read = Build<TKqlLookupTable>(ctx, pos)
542+
.Table(mainTable)
543+
.LookupKeys(read)
544+
.Columns(mainColumns)
545+
.Done().Ptr();
546+
} else {
547+
read = Build<TKqlStreamLookupTable>(ctx, pos)
548+
.Table(postingTable)
549+
.LookupKeys(read)
550+
.Columns(mainColumns)
551+
.Settings(settings.BuildNode(ctx, pos))
552+
.Done().Ptr();
553+
}
541554
}
542555

543556
void VectorTopMain(TExprContext& ctx, const TCoTopBase& top, TExprNodePtr& read) {
@@ -569,9 +582,8 @@ TExprBase DoRewriteTopSortOverKMeansTree(
569582
const auto postingTable = BuildTableMeta(*postingTableDesc->Metadata, pos, ctx);
570583
const auto mainTable = BuildTableMeta(*tableDesc.Metadata, pos, ctx);
571584

572-
const auto levelColumns = BuildKeyColumnsList(*levelTableDesc, pos, ctx,
585+
const auto levelColumns = BuildKeyColumnsList(pos, ctx,
573586
std::initializer_list<std::string_view>{NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn});
574-
const auto postingColumns = BuildKeyColumnsList(*postingTableDesc, pos, ctx, tableDesc.Metadata->KeyColumnNames);
575587
const auto& mainColumns = match.Columns();
576588

577589
TNodeOnNodeOwnedMap replaces;
@@ -602,7 +614,7 @@ TExprBase DoRewriteTopSortOverKMeansTree(
602614

603615
VectorReadLevel(indexDesc, ctx, pos, kqpCtx, levelLambda, top, levelTable, levelColumns, read);
604616

605-
VectorReadMain(ctx, pos, postingTable, postingColumns, mainTable, mainColumns, read);
617+
VectorReadMain(ctx, pos, postingTable, postingTableDesc->Metadata, mainTable, tableDesc.Metadata, mainColumns, read);
606618

607619
if (flatMap) {
608620
read = Build<TCoFlatMap>(ctx, flatMap.Cast().Pos())
@@ -639,13 +651,12 @@ TExprBase DoRewriteTopSortOverPrefixedKMeansTree(
639651
const auto prefixTable = BuildTableMeta(*prefixTableDesc->Metadata, pos, ctx);
640652
const auto mainTable = BuildTableMeta(*tableDesc.Metadata, pos, ctx);
641653

642-
const auto levelColumns = BuildKeyColumnsList(*levelTableDesc, pos, ctx,
654+
const auto levelColumns = BuildKeyColumnsList(pos, ctx,
643655
std::initializer_list<std::string_view>{NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn});
644-
const auto postingColumns = BuildKeyColumnsList(*postingTableDesc, pos, ctx, tableDesc.Metadata->KeyColumnNames);
645656
const auto prefixColumns = [&] {
646657
auto columns = indexDesc.KeyColumns;
647658
columns.back().assign(NTableIndex::NTableVectorKmeansTreeIndex::IdColumn);
648-
return BuildKeyColumnsList(*prefixTableDesc, pos, ctx, columns);
659+
return BuildKeyColumnsList(pos, ctx, columns);
649660
}();
650661
const auto& mainColumns = match.Columns();
651662

@@ -686,7 +697,7 @@ TExprBase DoRewriteTopSortOverPrefixedKMeansTree(
686697

687698
VectorReadLevel(indexDesc, ctx, pos, kqpCtx, levelLambda, top, levelTable, levelColumns, read);
688699

689-
VectorReadMain(ctx, pos, postingTable, postingColumns, mainTable, mainColumns, read);
700+
VectorReadMain(ctx, pos, postingTable, postingTableDesc->Metadata, mainTable, tableDesc.Metadata, mainColumns, read);
690701

691702
if (mainLambda) {
692703
read = Build<TCoMap>(ctx, flatMap.Pos())
@@ -733,9 +744,9 @@ TExprBase KqpRewriteLookupIndex(const TExprBase& node, TExprContext& ctx, const
733744
YQL_ENSURE(indexDesc->Type != TIndexDescription::EType::GlobalSyncVectorKMeansTree,
734745
"lookup doesn't support vector index: " << indexName);
735746

736-
const bool needDataRead = CheckIndexCovering(lookupIndex, implTable);
747+
const bool isCovered = CheckIndexCovering(lookupIndex.Columns(), implTable);
737748

738-
if (!needDataRead) {
749+
if (isCovered) {
739750
if (kqpCtx.Config->EnableKqpDataQueryStreamLookup) {
740751
TKqpStreamLookupSettings settings;
741752
settings.Strategy = EStreamLookupStrategyType::LookupRows;
@@ -805,8 +816,8 @@ TExprBase KqpRewriteStreamLookupIndex(const TExprBase& node, TExprContext& ctx,
805816
YQL_ENSURE(indexDesc->Type != TIndexDescription::EType::GlobalSyncVectorKMeansTree,
806817
"stream lookup doesn't support vector index: " << indexName);
807818

808-
const bool needDataRead = CheckIndexCovering(streamLookupIndex, implTable);
809-
if (!needDataRead) {
819+
const bool isCovered = CheckIndexCovering(streamLookupIndex.Columns(), implTable);
820+
if (isCovered) {
810821
return Build<TKqlStreamLookupTable>(ctx, node.Pos())
811822
.Table(BuildTableMeta(*implTable, node.Pos(), ctx))
812823
.LookupKeys(streamLookupIndex.LookupKeys())

0 commit comments

Comments
 (0)