Skip to content

Commit 576c1b6

Browse files
authored
[C++ SDK] Added vector index example for docs (#20655)
1 parent 63e1237 commit 576c1b6

File tree

5 files changed

+311
-0
lines changed

5 files changed

+311
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#include "vector_index.h"
2+
3+
#include <ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/helpers/helpers.h>
4+
5+
6+
void PrintResults(const std::vector<TResultItem>& items)
7+
{
8+
if (items.empty()) {
9+
std::cout << "No items found" << std::endl;
10+
return;
11+
}
12+
13+
for (const auto& item : items) {
14+
std::cout << "[score=" << item.Score << "] " << item.Id << ": " << item.Document << std::endl;
15+
}
16+
}
17+
18+
void VectorExample(
19+
const std::string& endpoint,
20+
const std::string& database,
21+
const std::string& tableName,
22+
const std::string& indexName)
23+
{
24+
auto driverConfig = NYdb::CreateFromEnvironment(endpoint + "/?database=" + database);
25+
NYdb::TDriver driver(driverConfig);
26+
NYdb::NQuery::TQueryClient client(driver);
27+
28+
try {
29+
DropVectorTable(client, tableName);
30+
CreateVectorTable(client, tableName);
31+
std::vector<TItem> items = {
32+
{.Id = "1", .Document = "document 1", .Embedding = {0.98, 0.1, 0.01}},
33+
{.Id = "2", .Document = "document 2", .Embedding = {1.0, 0.05, 0.05}},
34+
{.Id = "3", .Document = "document 3", .Embedding = {0.9, 0.1, 0.1}},
35+
{.Id = "4", .Document = "document 4", .Embedding = {0.03, 0.0, 0.99}},
36+
{.Id = "5", .Document = "document 5", .Embedding = {0.0, 0.0, 0.99}},
37+
{.Id = "6", .Document = "document 6", .Embedding = {0.0, 0.02, 1.0}},
38+
{.Id = "7", .Document = "document 7", .Embedding = {0.0, 1.05, 0.05}},
39+
{.Id = "8", .Document = "document 8", .Embedding = {0.02, 0.98, 0.1}},
40+
{.Id = "9", .Document = "document 9", .Embedding = {0.0, 1.0, 0.05}},
41+
};
42+
InsertItems(client, tableName, items);
43+
PrintResults(SearchItems(client, tableName, {1.0, 0.0, 0.0}, "CosineSimilarity", 3));
44+
AddIndex(driver, client, database, tableName, indexName, "similarity=cosine", 3, 1, 3);
45+
PrintResults(SearchItems(client, tableName, {1.0, 0.0, 0.0}, "CosineSimilarity", 3, indexName));
46+
} catch (const std::exception& e) {
47+
std::cerr << "Execution failed: " << e.what() << std::endl;
48+
}
49+
50+
driver.Stop(true);
51+
}
52+
53+
int main(int argc, char** argv) {
54+
std::string endpoint;
55+
std::string database;
56+
std::string tableName;
57+
std::string indexName;
58+
59+
NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
60+
61+
opts.AddLongOption('e', "endpoint", "YDB endpoint").Required().RequiredArgument("HOST:PORT").StoreResult(&endpoint);
62+
opts.AddLongOption('d', "database", "YDB database").Required().RequiredArgument("DATABASE").StoreResult(&database);
63+
opts.AddLongOption("table", "table name").Required().RequiredArgument("TABLE").StoreResult(&tableName);
64+
opts.AddLongOption("index", "index name").Required().RequiredArgument("INDEX").StoreResult(&indexName);
65+
66+
opts.SetFreeArgsMin(0);
67+
NLastGetopt::TOptsParseResult result(&opts, argc, argv);
68+
69+
VectorExample(endpoint, database, tableName, indexName);
70+
return 0;
71+
}
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#include "vector_index.h"
2+
3+
#include <format>
4+
5+
6+
void DropVectorTable(NYdb::NQuery::TQueryClient& client, const std::string& tableName)
7+
{
8+
NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([&](NYdb::NQuery::TSession session) {
9+
return session.ExecuteQuery(std::format("DROP TABLE IF EXISTS {}", tableName), NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync();
10+
}));
11+
12+
std::cout << "Vector table dropped: " << tableName << std::endl;
13+
}
14+
15+
void CreateVectorTable(NYdb::NQuery::TQueryClient& client, const std::string& tableName)
16+
{
17+
std::string query = std::format(R"(
18+
CREATE TABLE IF NOT EXISTS `{}` (
19+
id Utf8,
20+
document Utf8,
21+
embedding String,
22+
PRIMARY KEY (id)
23+
))", tableName);
24+
25+
NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([&](NYdb::NQuery::TSession session) {
26+
return session.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync();
27+
}));
28+
29+
std::cout << "Vector table created: " << tableName << std::endl;
30+
}
31+
32+
void InsertItems(
33+
NYdb::NQuery::TQueryClient& client,
34+
const std::string& tableName,
35+
const std::vector<TItem>& items)
36+
{
37+
std::string query = std::format(R"(
38+
DECLARE $items AS List<Struct<
39+
id: Utf8,
40+
document: Utf8,
41+
embedding: List<Float>
42+
>>;
43+
44+
UPSERT INTO `{0}`
45+
(
46+
id,
47+
document,
48+
embedding
49+
)
50+
SELECT
51+
id,
52+
document,
53+
Untag(Knn::ToBinaryStringFloat(embedding), "FloatVector"),
54+
FROM AS_TABLE($items);
55+
)", tableName);
56+
57+
NYdb::TParamsBuilder paramsBuilder;
58+
auto& valueBuilder = paramsBuilder.AddParam("$items");
59+
valueBuilder.BeginList();
60+
for (const auto& item : items) {
61+
valueBuilder.AddListItem();
62+
valueBuilder.BeginStruct();
63+
valueBuilder.AddMember("id").Utf8(item.Id);
64+
valueBuilder.AddMember("document").Utf8(item.Document);
65+
valueBuilder.AddMember("embedding").BeginList();
66+
for (const auto& value : item.Embedding) {
67+
valueBuilder.AddListItem().Float(value);
68+
}
69+
valueBuilder.EndList();
70+
valueBuilder.EndStruct();
71+
}
72+
valueBuilder.EndList();
73+
valueBuilder.Build();
74+
75+
NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([params = paramsBuilder.Build(), &query](NYdb::NQuery::TSession session) {
76+
return session.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx(NYdb::NQuery::TTxSettings::SerializableRW()).CommitTx(), params).ExtractValueSync();
77+
}));
78+
79+
std::cout << items.size() << " items inserted" << std::endl;
80+
}
81+
82+
void AddIndex(
83+
NYdb::TDriver& driver,
84+
NYdb::NQuery::TQueryClient& client,
85+
const std::string& database,
86+
const std::string& tableName,
87+
const std::string& indexName,
88+
const std::string& strategy,
89+
std::uint64_t dim,
90+
std::uint64_t levels,
91+
std::uint64_t clusters)
92+
{
93+
std::string query = std::format(R"(
94+
ALTER TABLE `{0}`
95+
ADD INDEX {1}__temp
96+
GLOBAL USING vector_kmeans_tree
97+
ON (embedding)
98+
WITH (
99+
{2},
100+
vector_type="Float",
101+
vector_dimension={3},
102+
levels={4},
103+
clusters={5}
104+
);
105+
)", tableName, indexName, strategy, dim, levels, clusters);
106+
107+
NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([&](NYdb::NQuery::TSession session) {
108+
return session.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync();
109+
}));
110+
111+
NYdb::NTable::TTableClient tableClient(driver);
112+
NYdb::NStatusHelpers::ThrowOnError(tableClient.RetryOperationSync([&](NYdb::NTable::TSession session) {
113+
return session.AlterTable(database + "/" + tableName, NYdb::NTable::TAlterTableSettings()
114+
.AppendRenameIndexes(NYdb::NTable::TRenameIndex{
115+
.SourceName_ = indexName + "__temp",
116+
.DestinationName_ = indexName,
117+
.ReplaceDestination_ = true
118+
})
119+
).ExtractValueSync();
120+
}));
121+
122+
std::cout << "Table index `" << indexName << "` for table `" << tableName << "` added" << std::endl;
123+
}
124+
125+
std::vector<TResultItem> SearchItems(
126+
NYdb::NQuery::TQueryClient& client,
127+
const std::string& tableName,
128+
const std::vector<float>& embedding,
129+
const std::string& strategy,
130+
std::uint64_t limit,
131+
const std::optional<std::string>& indexName)
132+
{
133+
std::string viewIndex = indexName ? "VIEW " + *indexName : "";
134+
std::string sortOrder = strategy.ends_with("Similarity") ? "DESC" : "ASC";
135+
136+
std::string query = std::format(R"(
137+
DECLARE $embedding as List<Float>;
138+
139+
$TargetEmbedding = Knn::ToBinaryStringFloat($embedding);
140+
141+
SELECT
142+
id,
143+
document,
144+
Knn::{2}(embedding, $TargetEmbedding) as score
145+
FROM {0} {1}
146+
ORDER BY score
147+
{3}
148+
LIMIT {4};
149+
)", tableName, viewIndex, strategy, sortOrder, limit);
150+
151+
NYdb::TParamsBuilder paramsBuilder;
152+
auto& valueBuilder = paramsBuilder.AddParam("$embedding");
153+
valueBuilder.BeginList();
154+
for (auto value : embedding) {
155+
valueBuilder.AddListItem().Float(value);
156+
}
157+
valueBuilder.EndList().Build();
158+
159+
std::vector<TResultItem> result;
160+
161+
NYdb::NStatusHelpers::ThrowOnError(client.RetryQuerySync([params = paramsBuilder.Build(), &query, &result](NYdb::NQuery::TSession session) {
162+
auto execResult = session.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx(NYdb::NQuery::TTxSettings::SerializableRW()).CommitTx(), params).ExtractValueSync();
163+
if (execResult.IsSuccess()) {
164+
auto parser = execResult.GetResultSetParser(0);
165+
while (parser.TryNextRow()) {
166+
result.push_back({
167+
.Id = *parser.ColumnParser(0).GetOptionalUtf8(),
168+
.Document = *parser.ColumnParser(1).GetOptionalUtf8(),
169+
.Score = *parser.ColumnParser(2).GetOptionalFloat()
170+
});
171+
}
172+
}
173+
return execResult;
174+
}));
175+
176+
return result;
177+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#pragma once
2+
3+
#include <ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/driver/driver.h>
4+
#include <ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/query/client.h>
5+
#include <ydb/public/sdk/cpp/include/ydb-cpp-sdk/client/table/table.h>
6+
7+
#include <library/cpp/getopt/last_getopt.h>
8+
9+
struct TItem {
10+
std::string Id;
11+
std::string Document;
12+
std::vector<float> Embedding;
13+
};
14+
15+
struct TResultItem {
16+
std::string Id;
17+
std::string Document;
18+
float Score;
19+
};
20+
21+
void DropVectorTable(NYdb::NQuery::TQueryClient& client, const std::string& tableName);
22+
23+
void CreateVectorTable(NYdb::NQuery::TQueryClient& client, const std::string& tableName);
24+
25+
void InsertItems(
26+
NYdb::NQuery::TQueryClient& client,
27+
const std::string& tableName,
28+
const std::vector<TItem>& items);
29+
30+
void AddIndex(
31+
NYdb::TDriver& driver,
32+
NYdb::NQuery::TQueryClient& client,
33+
const std::string& database,
34+
const std::string& tableName,
35+
const std::string& indexName,
36+
const std::string& strategy,
37+
std::uint64_t dim,
38+
std::uint64_t levels,
39+
std::uint64_t clusters);
40+
41+
std::vector<TResultItem> SearchItems(
42+
NYdb::NQuery::TQueryClient& client,
43+
const std::string& tableName,
44+
const std::vector<float>& embedding,
45+
const std::string& strategy,
46+
std::uint64_t limit,
47+
const std::optional<std::string>& indexName = std::nullopt);
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
PROGRAM()
2+
3+
SRCS(
4+
main.cpp
5+
vector_index.cpp
6+
)
7+
8+
PEERDIR(
9+
library/cpp/getopt
10+
ydb/public/sdk/cpp/src/client/query
11+
ydb/public/sdk/cpp/src/client/table
12+
ydb/public/sdk/cpp/src/client/helpers
13+
)
14+
15+
END()

ydb/public/sdk/cpp/examples/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ RECURSE(
88
topic_writer/transaction
99
ttl
1010
vector_index
11+
vector_index_builtin
1112
)

0 commit comments

Comments
 (0)