Skip to content

Commit fe88806

Browse files
committed
Add more details to vector index rejection errors (#19455) (#20060)
1 parent 7edec7d commit fe88806

File tree

2 files changed

+90
-14
lines changed

2 files changed

+90
-14
lines changed

ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,13 @@ bool CanPushTopSort(const TCoTopBase& node, const TKikimrTableDescription& index
144144
return IsTableExistsKeySelector(node.KeySelectorLambda(), indexDesc, columns);
145145
}
146146

147-
bool CanUseVectorIndex(const TIndexDescription& indexDesc, const TExprBase& lambdaBody, const TCoTopBase& top) {
147+
bool CanUseVectorIndex(const TIndexDescription& indexDesc, const TExprBase& lambdaBody, const TCoTopBase& top, TString& error) {
148148
Y_ASSERT(indexDesc.Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree);
149149
// TODO(mbkkt) We need to account top.Count(), but not clear what to if it's value is runtime?
150+
const auto& col = indexDesc.KeyColumns.back();
150151
auto checkMember = [&] (const TExprBase& expr) {
151152
auto member = expr.Maybe<TCoMember>();
152-
return member && member.Cast().Name().Value() == indexDesc.KeyColumns.back();
153+
return member && member.Cast().Name().Value() == col;
153154
};
154155
auto checkUdf = [&] (const TExprBase& expr, bool checkMembers) {
155156
auto apply = expr.Maybe<TCoApply>();
@@ -175,18 +176,31 @@ bool CanUseVectorIndex(const TIndexDescription& indexDesc, const TExprBase& lamb
175176
auto& desc = std::get<NKikimrKqp::TVectorIndexKmeansTreeDescription>(indexDesc.SpecializedIndexDescription);
176177
switch (desc.settings().settings().metric()) {
177178
case Ydb::Table::VectorIndexSettings::SIMILARITY_INNER_PRODUCT:
178-
return !asc && methodName == "Knn.InnerProductSimilarity";
179+
if (!asc && methodName == "Knn.InnerProductSimilarity") {
180+
return true;
181+
}
182+
error = TStringBuilder() << "Knn::InnerProductSimilarity(" << col << ", ...) DESC";
183+
return false;
179184
case Ydb::Table::VectorIndexSettings::SIMILARITY_COSINE:
180185
case Ydb::Table::VectorIndexSettings::DISTANCE_COSINE:
181-
if (asc) {
182-
return methodName == "Knn.CosineDistance";
183-
} else {
184-
return methodName == "Knn.CosineSimilarity";
186+
if (asc && methodName == "Knn.CosineDistance" ||
187+
!asc && methodName == "Knn.CosineSimilarity") {
188+
return true;
185189
}
190+
error = TStringBuilder() << "Knn::CosineSimilarity(" << col << ", ...) DESC or Knn::CosineDistance(" << col << ", ...) ASC";
191+
return false;
186192
case Ydb::Table::VectorIndexSettings::DISTANCE_MANHATTAN:
187-
return asc && methodName == "Knn.ManhattanDistance";
193+
if (asc && methodName == "Knn.ManhattanDistance") {
194+
return true;
195+
}
196+
error = TStringBuilder() << "Knn::ManhattanDistance(" << col << ", ...) ASC";
197+
return false;
188198
case Ydb::Table::VectorIndexSettings::DISTANCE_EUCLIDEAN:
189-
return asc && methodName == "Knn.EuclideanDistance";
199+
if (asc && methodName == "Knn.EuclideanDistance") {
200+
return true;
201+
}
202+
error = TStringBuilder() << "Knn::EuclideanDistance(" << col << ", ...) ASC";
203+
return false;
190204
default:
191205
Y_UNREACHABLE();
192206
}
@@ -1056,10 +1070,12 @@ TExprBase KqpRewriteTopSortOverIndexRead(const TExprBase& node, TExprContext& ct
10561070
};
10571071
const auto* lambdaArgs = topBase.KeySelectorLambda().Args().Raw();
10581072
auto lambdaBody = topBase.KeySelectorLambda().Body();
1059-
bool canUseVectorIndex = CanUseVectorIndex(*indexDesc, lambdaBody, topBase);
1073+
TString error;
1074+
bool canUseVectorIndex = CanUseVectorIndex(*indexDesc, lambdaBody, topBase, error);
10601075
if (indexDesc->KeyColumns.size() > 1) {
10611076
if (!canUseVectorIndex) {
1062-
return reject("sorting doesn't call distance function, reference distance from projection not supported yet");
1077+
return reject(TStringBuilder() << "sorting must contain distance: "
1078+
<< error << ", reference distance from projection not supported yet");
10631079
}
10641080
if (!maybeFlatMap.Lambda().Body().Maybe<TCoOptionalIf>()) {
10651081
return reject("only simple conditions supported for now");
@@ -1070,7 +1086,7 @@ TExprBase KqpRewriteTopSortOverIndexRead(const TExprBase& node, TExprContext& ct
10701086
if (!canUseVectorIndex) {
10711087
auto argument = lambdaBody.Maybe<TCoMember>().Struct().Maybe<TCoArgument>();
10721088
if (!argument) {
1073-
return reject("sorting doesn't contain distance");
1089+
return reject(TStringBuilder() << "sorting must contain distance: " << error);
10741090
}
10751091
auto asStruct = maybeFlatMap.Lambda().Body().Maybe<TCoJust>().Input().Maybe<TCoAsStruct>();
10761092
if (!asStruct) {
@@ -1100,11 +1116,11 @@ TExprBase KqpRewriteTopSortOverIndexRead(const TExprBase& node, TExprContext& ct
11001116
continue;
11011117
}
11021118
lambdaBody = TExprBase{argChildren[1]};
1103-
canUseVectorIndex = CanUseVectorIndex(*indexDesc, lambdaBody, topBase);
1119+
canUseVectorIndex = CanUseVectorIndex(*indexDesc, lambdaBody, topBase, error);
11041120
break;
11051121
}
11061122
if (!canUseVectorIndex) {
1107-
return reject("neither projection nor sorting contain distance");
1123+
return reject(TStringBuilder() << "projection or sorting must contain distance: " << error);
11081124
}
11091125
lambdaArgs = maybeFlatMap.Cast().Lambda().Args().Raw();
11101126
}

ydb/core/kqp/ut/indexes/kqp_indexes_vector_ut.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <ydb/core/kqp/gateway/kqp_metadata_loader.h>
77
#include <ydb/core/kqp/host/kqp_host_impl.h>
88

9+
#include <ydb/public/sdk/cpp/adapters/issue/issue.h>
910
#include <ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/operation/operation.h>
1011
#include <ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/proto/accessor.h>
1112
#include <ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h>
@@ -27,6 +28,12 @@ using namespace NYdb::NTable;
2728

2829
Y_UNIT_TEST_SUITE(KqpVectorIndexes) {
2930

31+
NYdb::NTable::TDataQueryResult ExecuteDataQuery(TSession& session, const TString& query) {
32+
const auto txSettings = TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx();
33+
return session.ExecuteDataQuery(query, txSettings,
34+
TExecDataQuerySettings().KeepInQueryCache(true).CollectQueryStats(ECollectQueryStatsMode::Basic)).ExtractValueSync();
35+
}
36+
3037
std::vector<i64> DoPositiveQueryVectorIndex(TSession& session, const TString& query, bool covered = false) {
3138
{
3239
auto result = session.ExplainDataQuery(query).ExtractValueSync();
@@ -520,6 +527,59 @@ Y_UNIT_TEST_SUITE(KqpVectorIndexes) {
520527
DoPositiveQueriesVectorIndexOrderByCosine(session, true /*covered*/);
521528
}
522529

530+
Y_UNIT_TEST(OrderByReject) {
531+
NKikimrConfig::TFeatureFlags featureFlags;
532+
featureFlags.SetEnableVectorIndex(true);
533+
auto setting = NKikimrKqp::TKqpSetting();
534+
auto serverSettings = TKikimrSettings()
535+
.SetFeatureFlags(featureFlags)
536+
.SetKqpSettings({setting});
537+
538+
TKikimrRunner kikimr(serverSettings);
539+
kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::BUILD_INDEX, NActors::NLog::PRI_TRACE);
540+
kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_TRACE);
541+
542+
auto db = kikimr.GetTableClient();
543+
auto session = DoCreateTableForVectorIndex(db, false);
544+
545+
for (const auto & check: TVector<TVector<const char*>>({
546+
{"distance=cosine", "Knn::CosineDistance(emb, 'abc') DESC", "Knn::CosineSimilarity(emb, ...) DESC or Knn::CosineDistance(emb, ...) ASC"},
547+
{"distance=cosine", "Knn::CosineSimilarity(emb, 'abc') ASC", "Knn::CosineSimilarity(emb, ...) DESC or Knn::CosineDistance(emb, ...) ASC"},
548+
{"similarity=cosine", "Knn::CosineDistance(emb, 'abc') DESC", "Knn::CosineSimilarity(emb, ...) DESC or Knn::CosineDistance(emb, ...) ASC"},
549+
{"similarity=cosine", "Knn::CosineSimilarity(emb, 'abc') ASC", "Knn::CosineSimilarity(emb, ...) DESC or Knn::CosineDistance(emb, ...) ASC"},
550+
{"similarity=inner_product", "Knn::InnerProductSimilarity(emb, 'abc') ASC", "Knn::InnerProductSimilarity(emb, ...) DESC"},
551+
{"distance=manhattan", "Knn::ManhattanDistance(emb, 'abc') DESC", "Knn::ManhattanDistance(emb, ...) ASC"},
552+
{"distance=euclidean", "Knn::EuclideanDistance(emb, 'abc') DESC", "Knn::EuclideanDistance(emb, ...) ASC"},
553+
}))
554+
{
555+
const TString createIndex(Q_(Sprintf(R"(
556+
ALTER TABLE `/Root/TestTable`
557+
ADD INDEX index
558+
GLOBAL USING vector_kmeans_tree
559+
ON (emb)
560+
WITH (%s, vector_type="uint8", vector_dimension=2, levels=1, clusters=2);
561+
)", check[0])));
562+
auto result = session.ExecuteSchemeQuery(createIndex).ExtractValueSync();
563+
UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
564+
565+
const TString selectQuery(Q1_(Sprintf(R"(
566+
SELECT * FROM `/Root/TestTable`
567+
VIEW index ORDER BY %s
568+
)", check[1])));
569+
result = ExecuteDataQuery(session, selectQuery);
570+
UNIT_ASSERT_C(HasIssue(NYdb::NAdapters::ToYqlIssues(result.GetIssues()), NYql::TIssuesIds::KIKIMR_WRONG_INDEX_USAGE,
571+
[&](const NYql::TIssue& issue) {
572+
return issue.GetMessage().Contains("sorting must contain distance: " + TString(check[2]));
573+
}), result.GetIssues().ToString());
574+
UNIT_ASSERT(!result.IsSuccess());
575+
576+
session = db.CreateSession().GetValueSync().GetSession();
577+
const TString dropIndex(Q_("ALTER TABLE `/Root/TestTable` DROP INDEX index"));
578+
result = session.ExecuteSchemeQuery(dropIndex).ExtractValueSync();
579+
UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
580+
}
581+
}
582+
523583
}
524584

525585
}

0 commit comments

Comments
 (0)