5
5
6
6
namespace NKikimr ::NDataShard::NKMeans {
7
7
8
- TTableRange CreateRangeFrom (const TUserTable& table, NTableIndex:: TClusterId parent, TCell& from, TCell& to) {
8
+ TTableRange CreateRangeFrom (const TUserTable& table, TClusterId parent, TCell& from, TCell& to) {
9
9
if (parent == 0 ) {
10
10
return table.GetTableRange ();
11
11
}
@@ -28,7 +28,26 @@ NTable::TLead CreateLeadFrom(const TTableRange& range) {
28
28
return lead;
29
29
}
30
30
31
- void AddRowMain2Build (TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row) {
31
+ void AddRowToLevel (TBufferData& buffer, TClusterId parent, TClusterId child, const TString& embedding, bool isPostingLevel) {
32
+ if (isPostingLevel) {
33
+ child = SetPostingParentFlag (child);
34
+ } else {
35
+ EnsureNoPostingParentFlag (child);
36
+ }
37
+
38
+ std::array<TCell, 2 > pk;
39
+ pk[0 ] = TCell::Make (parent);
40
+ pk[1 ] = TCell::Make (child);
41
+
42
+ std::array<TCell, 1 > data;
43
+ data[0 ] = TCell{embedding};
44
+
45
+ buffer.AddRow (TSerializedCellVec{pk}, TSerializedCellVec::Serialize (data));
46
+ }
47
+
48
+ void AddRowMainToBuild (TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row) {
49
+ EnsureNoPostingParentFlag (parent);
50
+
32
51
std::array<TCell, 1 > cells;
33
52
cells[0 ] = TCell::Make (parent);
34
53
auto pk = TSerializedCellVec::Serialize (cells);
@@ -37,9 +56,10 @@ void AddRowMain2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArra
37
56
TSerializedCellVec{key});
38
57
}
39
58
40
- void AddRowMain2Posting (TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row,
41
- ui32 dataPos)
59
+ void AddRowMainToPosting (TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos)
42
60
{
61
+ parent = SetPostingParentFlag (parent);
62
+
43
63
std::array<TCell, 1 > cells;
44
64
cells[0 ] = TCell::Make (parent);
45
65
auto pk = TSerializedCellVec::Serialize (cells);
@@ -48,9 +68,10 @@ void AddRowMain2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TAr
48
68
TSerializedCellVec{key});
49
69
}
50
70
51
- void AddRowBuild2Build (TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row,
52
- ui32 prefixColumns)
71
+ void AddRowBuildToBuild (TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 prefixColumns)
53
72
{
73
+ EnsureNoPostingParentFlag (parent);
74
+
54
75
std::array<TCell, 1 > cells;
55
76
cells[0 ] = TCell::Make (parent);
56
77
auto pk = TSerializedCellVec::Serialize (cells);
@@ -59,9 +80,10 @@ void AddRowBuild2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArr
59
80
TSerializedCellVec{key});
60
81
}
61
82
62
- void AddRowBuild2Posting (TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row,
63
- ui32 dataPos, ui32 prefixColumns)
83
+ void AddRowBuildToPosting (TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos, ui32 prefixColumns)
64
84
{
85
+ parent = SetPostingParentFlag (parent);
86
+
65
87
std::array<TCell, 1 > cells;
66
88
cells[0 ] = TCell::Make (parent);
67
89
auto pk = TSerializedCellVec::Serialize (cells);
@@ -70,45 +92,44 @@ void AddRowBuild2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TA
70
92
TSerializedCellVec{key});
71
93
}
72
94
73
- TTags MakeUploadTags (const TUserTable& table, const TProtoStringType& embedding,
74
- const google::protobuf::RepeatedPtrField<TProtoStringType>& data, ui32& embeddingPos,
75
- ui32& dataPos, NTable::TTag& embeddingTag)
95
+ TTags MakeScanTags (const TUserTable& table, const TProtoStringType& embedding,
96
+ const google::protobuf::RepeatedPtrField<TProtoStringType>& data, ui32& embeddingPos,
97
+ ui32& dataPos, NTable::TTag& embeddingTag)
76
98
{
77
99
auto tags = GetAllTags (table);
78
- TTags uploadTags ;
79
- uploadTags .reserve (1 + data.size ());
100
+ TTags result ;
101
+ result .reserve (1 + data.size ());
80
102
embeddingTag = tags.at (embedding);
81
103
if (auto it = std::find (data.begin (), data.end (), embedding); it != data.end ()) {
82
104
embeddingPos = it - data.begin ();
83
105
dataPos = 0 ;
84
106
} else {
85
- uploadTags .push_back (embeddingTag);
107
+ result .push_back (embeddingTag);
86
108
}
87
109
for (const auto & column : data) {
88
- uploadTags .push_back (tags.at (column));
110
+ result .push_back (tags.at (column));
89
111
}
90
- return uploadTags ;
112
+ return result ;
91
113
}
92
114
93
- std::shared_ptr<NTxProxy::TUploadTypes>
94
- MakeUploadTypes (const TUserTable& table, NKikimrTxDataShard::EKMeansState uploadState,
95
- const TProtoStringType& embedding, const google::protobuf::RepeatedPtrField<TProtoStringType>& data,
96
- ui32 prefixColumns)
115
+ std::shared_ptr<NTxProxy::TUploadTypes> MakeOutputTypes (const TUserTable& table, NKikimrTxDataShard::EKMeansState uploadState,
116
+ const TProtoStringType& embedding, const google::protobuf::RepeatedPtrField<TProtoStringType>& data,
117
+ ui32 prefixColumns)
97
118
{
98
119
auto types = GetAllTypes (table);
99
120
100
- auto uploadTypes = std::make_shared<NTxProxy::TUploadTypes>();
101
- uploadTypes ->reserve (1 + 1 + std::min ((table.KeyColumnTypes .size () - prefixColumns) + data.size (), types.size ()));
121
+ auto result = std::make_shared<NTxProxy::TUploadTypes>();
122
+ result ->reserve (1 + 1 + std::min ((table.KeyColumnTypes .size () - prefixColumns) + data.size (), types.size ()));
102
123
103
124
Ydb::Type type;
104
125
type.set_type_id (NTableIndex::ClusterIdType);
105
- uploadTypes ->emplace_back (NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type);
126
+ result ->emplace_back (NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type);
106
127
107
128
auto addType = [&](const auto & column) {
108
129
auto it = types.find (column);
109
130
if (it != types.end ()) {
110
131
NScheme::ProtoFromTypeInfo (it->second, type);
111
- uploadTypes ->emplace_back (it->first , type);
132
+ result ->emplace_back (it->first , type);
112
133
types.erase (it);
113
134
}
114
135
};
@@ -133,7 +154,7 @@ MakeUploadTypes(const TUserTable& table, NKikimrTxDataShard::EKMeansState upload
133
154
Y_ASSERT (false );
134
155
135
156
}
136
- return uploadTypes ;
157
+ return result ;
137
158
}
138
159
139
160
}
0 commit comments