Skip to content

Commit 5c359a3

Browse files
committed
Moved commit "Create vector index in SchemeShard" from ydb repo
1 parent 0a27948 commit 5c359a3

File tree

4 files changed

+319
-18
lines changed

4 files changed

+319
-18
lines changed

include/ydb-cpp-sdk/client/table/table.h

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class ChangefeedDescription;
2323
class DescribeTableResult;
2424
class ExplicitPartitions;
2525
class GlobalIndexSettings;
26+
class VectorIndexSettings;
2627
class PartitioningSettings;
2728
class DateTypeColumnModeSettings;
2829
class TtlSettings;
@@ -190,6 +191,45 @@ struct TGlobalIndexSettings {
190191
void SerializeTo(Ydb::Table::GlobalIndexSettings& proto) const;
191192
};
192193

194+
struct TVectorIndexSettings {
195+
public:
196+
enum class EDistance {
197+
Cosine,
198+
Manhattan,
199+
Euclidean,
200+
201+
Unknown = std::numeric_limits<int>::max()
202+
};
203+
204+
enum class ESimilarity {
205+
Cosine,
206+
InnerProduct,
207+
208+
Unknown = std::numeric_limits<int>::max()
209+
};
210+
211+
enum class EVectorType {
212+
Float,
213+
Uint8,
214+
Int8,
215+
Bit,
216+
217+
Unknown = std::numeric_limits<int>::max()
218+
};
219+
using TMetric = std::variant<std::monostate, EDistance, ESimilarity>;
220+
221+
TMetric Metric;
222+
EVectorType VectorType;
223+
uint32_t VectorDimension;
224+
225+
template <typename TProto>
226+
static TVectorIndexSettings FromProto(const TProto& proto);
227+
228+
void SerializeTo(Ydb::Table::VectorIndexSettings& settings) const;
229+
230+
void Out(IOutputStream &o) const;
231+
};
232+
193233
//! Represents index description
194234
class TIndexDescription {
195235
friend class NYdb::TProtoAccessor;
@@ -200,20 +240,22 @@ class TIndexDescription {
200240
EIndexType type,
201241
const std::vector<std::string>& indexColumns,
202242
const std::vector<std::string>& dataColumns = {},
203-
const TGlobalIndexSettings& settings = {}
243+
const std::vector<TGlobalIndexSettings>& globalIndexSettings = {},
244+
const std::optional<TVectorIndexSettings>& vectorIndexSettings = {}
204245
);
205246

206247
TIndexDescription(
207248
const std::string& name,
208249
const std::vector<std::string>& indexColumns,
209250
const std::vector<std::string>& dataColumns = {},
210-
const TGlobalIndexSettings& settings = {}
251+
const std::vector<TGlobalIndexSettings>& globalIndexSettings = {}
211252
);
212253

213254
const std::string& GetIndexName() const;
214255
EIndexType GetIndexType() const;
215256
const std::vector<std::string>& GetIndexColumns() const;
216257
const std::vector<std::string>& GetDataColumns() const;
258+
const std::optional<TVectorIndexSettings>& GetVectorIndexSettings() const;
217259
uint64_t GetSizeBytes() const;
218260

219261
void SerializeTo(Ydb::Table::TableIndex& proto) const;
@@ -232,7 +274,8 @@ class TIndexDescription {
232274
EIndexType IndexType_;
233275
std::vector<std::string> IndexColumns_;
234276
std::vector<std::string> DataColumns_;
235-
TGlobalIndexSettings GlobalIndexSettings_;
277+
std::vector<TGlobalIndexSettings> GlobalIndexSettings_;
278+
std::optional<TVectorIndexSettings> VectorIndexSettings_;
236279
uint64_t SizeBytes = 0;
237280
};
238281

@@ -606,6 +649,9 @@ class TTableDescription {
606649
// unique
607650
void AddUniqueSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);
608651
void AddUniqueSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns);
652+
// vector KMeansTree
653+
void AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TVectorIndexSettings& vectorIndexSettings);
654+
void AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TVectorIndexSettings& vectorIndexSettings);
609655

610656
// default
611657
void AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);
@@ -825,6 +871,10 @@ class TTableBuilder {
825871
TTableBuilder& AddUniqueSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);
826872
TTableBuilder& AddUniqueSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns);
827873

874+
// vector KMeansTree
875+
TTableBuilder& AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TVectorIndexSettings& vectorIndexSettings);
876+
TTableBuilder& AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TVectorIndexSettings& vectorIndexSettings);
877+
828878
// default
829879
TTableBuilder& AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns);
830880
TTableBuilder& AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);

include/ydb-cpp-sdk/client/table/table_enum.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ enum class EIndexType {
2828
GlobalSync,
2929
GlobalAsync,
3030
GlobalUnique,
31+
GlobalVectorKMeansTree,
3132

3233
Unknown = std::numeric_limits<int>::max()
3334
};

src/client/table/out.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,69 @@ Y_DECLARE_OUT_SPEC(, NYdb::NTable::TCreateSessionResult, o, x) {
2323
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TDescribeTableResult, o, x) {
2424
return x.Out(o);
2525
}
26+
27+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::EDistance, stream, value) {
28+
auto convertDistance = [] (auto value) -> auto {
29+
switch (value) {
30+
case NYdb::NTable::TVectorIndexSettings::EDistance::Cosine:
31+
return "COSINE";
32+
case NYdb::NTable::TVectorIndexSettings::EDistance::Manhattan:
33+
return "MANHATTAN";
34+
case NYdb::NTable::TVectorIndexSettings::EDistance::Euclidean:
35+
return "EUCLIDEAN";
36+
case NYdb::NTable::TVectorIndexSettings::EDistance::Unknown:
37+
return "UNKNOWN";
38+
}
39+
};
40+
41+
stream << convertDistance(value);
42+
}
43+
44+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::ESimilarity, stream, value) {
45+
auto convertSimilarity = [] (auto value) -> auto {
46+
switch (value) {
47+
case NYdb::NTable::TVectorIndexSettings::ESimilarity::Cosine:
48+
return "COSINE";
49+
case NYdb::NTable::TVectorIndexSettings::ESimilarity::InnerProduct:
50+
return "INNER_PRODUCT";
51+
case NYdb::NTable::TVectorIndexSettings::ESimilarity::Unknown:
52+
return "UNKNOWN";
53+
}
54+
};
55+
56+
stream << convertSimilarity(value);
57+
}
58+
59+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::EVectorType, stream, value) {
60+
auto convertVectorType = [] (auto value) -> auto {
61+
switch (value) {
62+
case NYdb::NTable::TVectorIndexSettings::EVectorType::Float:
63+
return "FLOAT";
64+
case NYdb::NTable::TVectorIndexSettings::EVectorType::Uint8:
65+
return "UINT8";
66+
case NYdb::NTable::TVectorIndexSettings::EVectorType::Int8:
67+
return "INT8";
68+
case NYdb::NTable::TVectorIndexSettings::EVectorType::Bit:
69+
return "BIT";
70+
case NYdb::NTable::TVectorIndexSettings::EVectorType::Unknown:
71+
return "UNKNOWN";
72+
}
73+
};
74+
75+
stream << convertVectorType(value);
76+
}
77+
78+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings, stream, value) {
79+
stream << "{";
80+
81+
if (const auto* distance = std::get_if<NYdb::NTable::TVectorIndexSettings::EDistance>(&value.Metric)) {
82+
stream << " distance: " << *distance << "";
83+
} else if (const auto* similarity = std::get_if<NYdb::NTable::TVectorIndexSettings::ESimilarity>(&value.Metric)) {
84+
stream << " similarity: " << *similarity << "";
85+
}
86+
87+
stream << ", vector_type: " << value.VectorType << "";
88+
stream << ", vector_dimension: " << value.VectorDimension << "";
89+
90+
stream << " }";
91+
}

0 commit comments

Comments
 (0)