Skip to content

Commit bda8a52

Browse files
MBkktGazizonoki
authored andcommitted
Moved commit "Adjust vector index settings" from ydb repo
1 parent b195618 commit bda8a52

File tree

4 files changed

+188
-190
lines changed

4 files changed

+188
-190
lines changed

include/ydb-cpp-sdk/client/table/table.h

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class DescribeTableResult;
2424
class ExplicitPartitions;
2525
class GlobalIndexSettings;
2626
class VectorIndexSettings;
27+
class KMeansTreeSettings;
2728
class PartitioningSettings;
2829
class DateTypeColumnModeSettings;
2930
class TtlSettings;
@@ -187,8 +188,7 @@ struct TExplicitPartitions {
187188

188189
FLUENT_SETTING_VECTOR(TValue, SplitPoints);
189190

190-
template <typename TProto>
191-
static TExplicitPartitions FromProto(const TProto& proto);
191+
static TExplicitPartitions FromProto(const Ydb::Table::ExplicitPartitions& proto);
192192

193193
void SerializeTo(Ydb::Table::ExplicitPartitions& proto) const;
194194
};
@@ -199,47 +199,67 @@ struct TGlobalIndexSettings {
199199
TPartitioningSettings PartitioningSettings;
200200
TUniformOrExplicitPartitions Partitions;
201201

202-
template <typename TProto>
203-
static TGlobalIndexSettings FromProto(const TProto& proto);
202+
static TGlobalIndexSettings FromProto(const Ydb::Table::GlobalIndexSettings& proto);
204203

205204
void SerializeTo(Ydb::Table::GlobalIndexSettings& proto) const;
206205
};
207206

208207
struct TVectorIndexSettings {
209208
public:
210-
enum class EDistance {
211-
Cosine,
209+
enum class EMetric {
210+
Unspecified = 0,
211+
InnerProduct,
212+
CosineSimilarity,
213+
CosineDistance,
212214
Manhattan,
213215
Euclidean,
216+
};
214217

215-
Unknown = std::numeric_limits<int>::max()
218+
enum class EVectorType {
219+
Unspecified = 0,
220+
Float,
221+
Uint8,
222+
Int8,
223+
Bit,
216224
};
217225

218-
enum class ESimilarity {
219-
Cosine,
220-
InnerProduct,
226+
EMetric Metric = EMetric::Unspecified;
227+
EVectorType VectorType = EVectorType::Unspecified;
228+
uint32_t VectorDimension = 0;
221229

222-
Unknown = std::numeric_limits<int>::max()
230+
static TVectorIndexSettings FromProto(const Ydb::Table::VectorIndexSettings& proto);
231+
232+
void SerializeTo(Ydb::Table::VectorIndexSettings& settings) const;
233+
234+
void Out(IOutputStream &o) const;
235+
};
236+
237+
struct TKMeansTreeSettings {
238+
public:
239+
enum class EMetric {
240+
Unspecified = 0,
241+
InnerProduct,
242+
CosineSimilarity,
243+
CosineDistance,
244+
Manhattan,
245+
Euclidean,
223246
};
224247

225248
enum class EVectorType {
249+
Unspecified = 0,
226250
Float,
227251
Uint8,
228252
Int8,
229253
Bit,
230-
231-
Unknown = std::numeric_limits<int>::max()
232254
};
233-
using TMetric = std::variant<std::monostate, EDistance, ESimilarity>;
234255

235-
TMetric Metric;
236-
EVectorType VectorType;
237-
uint32_t VectorDimension;
256+
TVectorIndexSettings Settings;
257+
uint32_t Clusters = 0;
258+
uint32_t Levels = 0;
238259

239-
template <typename TProto>
240-
static TVectorIndexSettings FromProto(const TProto& proto);
260+
static TKMeansTreeSettings FromProto(const Ydb::Table::KMeansTreeSettings& proto);
241261

242-
void SerializeTo(Ydb::Table::VectorIndexSettings& settings) const;
262+
void SerializeTo(Ydb::Table::KMeansTreeSettings& settings) const;
243263

244264
void Out(IOutputStream &o) const;
245265
};
@@ -255,7 +275,7 @@ class TIndexDescription {
255275
const std::vector<std::string>& indexColumns,
256276
const std::vector<std::string>& dataColumns = {},
257277
const std::vector<TGlobalIndexSettings>& globalIndexSettings = {},
258-
const std::optional<TVectorIndexSettings>& vectorIndexSettings = {}
278+
const std::variant<std::monostate, TKMeansTreeSettings>& specializedIndexSettings = {}
259279
);
260280

261281
TIndexDescription(
@@ -269,7 +289,7 @@ class TIndexDescription {
269289
EIndexType GetIndexType() const;
270290
const std::vector<std::string>& GetIndexColumns() const;
271291
const std::vector<std::string>& GetDataColumns() const;
272-
const std::optional<TVectorIndexSettings>& GetVectorIndexSettings() const;
292+
const std::variant<std::monostate, TKMeansTreeSettings>& GetVectorIndexSettings() const;
273293
uint64_t GetSizeBytes() const;
274294

275295
void SerializeTo(Ydb::Table::TableIndex& proto) const;
@@ -289,8 +309,8 @@ class TIndexDescription {
289309
std::vector<std::string> IndexColumns_;
290310
std::vector<std::string> DataColumns_;
291311
std::vector<TGlobalIndexSettings> GlobalIndexSettings_;
292-
std::optional<TVectorIndexSettings> VectorIndexSettings_;
293-
uint64_t SizeBytes = 0;
312+
std::variant<std::monostate, TKMeansTreeSettings> SpecializedIndexSettings_;
313+
uint64_t SizeBytes_ = 0;
294314
};
295315

296316
struct TRenameIndex {
@@ -665,8 +685,8 @@ class TTableDescription {
665685
void AddUniqueSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);
666686
void AddUniqueSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns);
667687
// vector KMeansTree
668-
void AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TVectorIndexSettings& vectorIndexSettings);
669-
void AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TVectorIndexSettings& vectorIndexSettings);
688+
void AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TKMeansTreeSettings& indexSettings);
689+
void AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TKMeansTreeSettings& indexSettings);
670690

671691
// default
672692
void AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);
@@ -889,8 +909,8 @@ class TTableBuilder {
889909
TTableBuilder& AddUniqueSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns);
890910

891911
// vector KMeansTree
892-
TTableBuilder& AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TVectorIndexSettings& vectorIndexSettings);
893-
TTableBuilder& AddVectorKMeansTreeSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TVectorIndexSettings& vectorIndexSettings);
912+
TTableBuilder& AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TKMeansTreeSettings& indexSettings);
913+
TTableBuilder& AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TKMeansTreeSettings& indexSettings);
894914

895915
// default
896916
TTableBuilder& AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns);

src/api/protos/ydb_table.proto

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,15 @@ message GlobalIndexSettings {
6161
}
6262

6363
message VectorIndexSettings {
64-
enum Distance {
65-
DISTANCE_UNSPECIFIED = 0;
66-
DISTANCE_COSINE = 1;
67-
DISTANCE_MANHATTAN = 2;
68-
DISTANCE_EUCLIDEAN = 3;
64+
enum Metric {
65+
METRIC_UNSPECIFIED = 0;
66+
SIMILARITY_INNER_PRODUCT = 1;
67+
SIMILARITY_COSINE = 2;
68+
DISTANCE_COSINE = 3;
69+
DISTANCE_MANHATTAN = 4;
70+
DISTANCE_EUCLIDEAN = 5;
6971
}
7072

71-
enum Similarity {
72-
SIMILARITY_UNSPECIFIED = 0;
73-
SIMILARITY_COSINE = 1;
74-
SIMILARITY_INNER_PRODUCT = 2;
75-
}
76-
7773
enum VectorType {
7874
VECTOR_TYPE_UNSPECIFIED = 0;
7975
VECTOR_TYPE_FLOAT = 1;
@@ -82,13 +78,19 @@ message VectorIndexSettings {
8278
VECTOR_TYPE_BIT = 4;
8379
}
8480

85-
oneof metric {
86-
Distance distance = 1;
87-
Similarity similarity = 2;
88-
}
89-
VectorType vector_type = 3;
81+
Metric metric = 1;
82+
83+
VectorType vector_type = 2;
84+
85+
uint32 vector_dimension = 3;
86+
}
9087

91-
uint32 vector_dimension = 4;
88+
message KMeansTreeSettings {
89+
VectorIndexSettings settings = 1;
90+
// average count of clusters on each level of tree, 0 -- means auto
91+
uint32 clusters = 2;
92+
// average count of levels in the tree, 0 -- means auto
93+
uint32 levels = 3;
9294
}
9395

9496
message GlobalIndex {
@@ -106,7 +108,7 @@ message GlobalUniqueIndex {
106108
message GlobalVectorKMeansTreeIndex {
107109
GlobalIndexSettings level_table_settings = 1;
108110
GlobalIndexSettings posting_table_settings = 2;
109-
VectorIndexSettings vector_settings = 3;
111+
KMeansTreeSettings vector_settings = 3;
110112
}
111113

112114
// Represent secondary index

src/client/table/out.cpp

Lines changed: 35 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -24,68 +24,58 @@ Y_DECLARE_OUT_SPEC(, NYdb::NTable::TDescribeTableResult, o, x) {
2424
return x.Out(o);
2525
}
2626

27-
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::EDistance, stream, value) {
28-
auto convertDistance = [] (auto value) -> auto {
27+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::EMetric, stream, value) {
28+
auto convertDistance = [&] {
2929
switch (value) {
30-
case NYdb::NTable::TVectorIndexSettings::EDistance::Cosine:
31-
return "COSINE";
32-
case NYdb::NTable::TVectorIndexSettings::EDistance::Manhattan:
33-
return "MANHATTAN";
34-
case NYdb::NTable::TVectorIndexSettings::EDistance::Euclidean:
35-
return "EUCLIDEAN";
36-
case NYdb::NTable::TVectorIndexSettings::EDistance::Unknown:
37-
return "UNKNOWN";
30+
case NYdb::NTable::TVectorIndexSettings::EMetric::InnerProduct:
31+
return "similarity: inner_product";
32+
case NYdb::NTable::TVectorIndexSettings::EMetric::CosineSimilarity:
33+
return "similarity: cosine";
34+
case NYdb::NTable::TVectorIndexSettings::EMetric::CosineDistance:
35+
return "distance: cosine";
36+
case NYdb::NTable::TVectorIndexSettings::EMetric::Manhattan:
37+
return "distance: manhattan";
38+
case NYdb::NTable::TVectorIndexSettings::EMetric::Euclidean:
39+
return "distance: euclidean";
40+
case NYdb::NTable::TVectorIndexSettings::EMetric::Unspecified:
41+
return "metric: unspecified";
3842
}
3943
};
4044

41-
stream << convertDistance(value);
42-
}
43-
44-
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::ESimilarity, stream, value) {
45-
auto convertSimilarity = [] (auto value) -> auto {
46-
switch (value) {
47-
case NYdb::NTable::TVectorIndexSettings::ESimilarity::Cosine:
48-
return "COSINE";
49-
case NYdb::NTable::TVectorIndexSettings::ESimilarity::InnerProduct:
50-
return "INNER_PRODUCT";
51-
case NYdb::NTable::TVectorIndexSettings::ESimilarity::Unknown:
52-
return "UNKNOWN";
53-
}
54-
};
55-
56-
stream << convertSimilarity(value);
45+
stream << convertDistance();
5746
}
5847

5948
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::EVectorType, stream, value) {
60-
auto convertVectorType = [] (auto value) -> auto {
49+
auto convertVectorType = [&] {
6150
switch (value) {
6251
case NYdb::NTable::TVectorIndexSettings::EVectorType::Float:
63-
return "FLOAT";
52+
return "float";
6453
case NYdb::NTable::TVectorIndexSettings::EVectorType::Uint8:
65-
return "UINT8";
54+
return "uint8";
6655
case NYdb::NTable::TVectorIndexSettings::EVectorType::Int8:
67-
return "INT8";
56+
return "int8";
6857
case NYdb::NTable::TVectorIndexSettings::EVectorType::Bit:
69-
return "BIT";
70-
case NYdb::NTable::TVectorIndexSettings::EVectorType::Unknown:
71-
return "UNKNOWN";
58+
return "bit";
59+
case NYdb::NTable::TVectorIndexSettings::EVectorType::Unspecified:
60+
return "unspecified";
7261
}
7362
};
7463

75-
stream << convertVectorType(value);
64+
stream << convertVectorType();
7665
}
7766

7867
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings, stream, value) {
79-
stream << "{";
80-
81-
if (const auto* distance = std::get_if<NYdb::NTable::TVectorIndexSettings::EDistance>(&value.Metric)) {
82-
stream << " distance: " << *distance << "";
83-
} else if (const auto* similarity = std::get_if<NYdb::NTable::TVectorIndexSettings::ESimilarity>(&value.Metric)) {
84-
stream << " similarity: " << *similarity << "";
85-
}
86-
87-
stream << ", vector_type: " << value.VectorType << "";
88-
stream << ", vector_dimension: " << value.VectorDimension << "";
68+
stream <<
69+
"{ " << value.Metric <<
70+
", vector_type: " << value.VectorType <<
71+
", vector_dimension: " << value.VectorDimension <<
72+
" }";
73+
}
8974

90-
stream << " }";
75+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TKMeansTreeSettings, stream, value) {
76+
stream <<
77+
"{ settings: " << value.Settings <<
78+
", clusters: " << value.Clusters <<
79+
", levels: " << value.Levels <<
80+
" }";
9181
}

0 commit comments

Comments
 (0)