Skip to content

Commit b717948

Browse files
authored
Integrity trails helper (#8964)
1 parent 4238641 commit b717948

File tree

2 files changed

+282
-0
lines changed

2 files changed

+282
-0
lines changed
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#include <library/cpp/json/json_value.h>
2+
#include <library/cpp/json/json_reader.h>
3+
#include <util/string/cast.h>
4+
#include <fstream>
5+
#include <sstream>
6+
#include <ydb/library/yql/parser/pg_wrapper/interface/type_desc.h>
7+
#include <ydb/core/scheme/scheme_tablecell.h>
8+
#define USE_CURRENT_UDF_ABI_VERSION true
9+
#include <ydb/core/tx/datashard/datashard_integrity_trails.h>
10+
#include <ydb/library/dynumber/dynumber.h>
11+
#include <ydb/library/yql/core/sql_types/simple_types.h>
12+
13+
using namespace NKikimr;
14+
15+
#define IF_TYPE(typeName) \
16+
else if (type == #typeName) { \
17+
resolved = NScheme::NTypeIds::typeName; \
18+
}
19+
20+
std::optional<NScheme::TTypeId> ResolveType(std::string typeAlias) {
21+
auto type = NYql::LookupSimpleTypeBySqlAlias(typeAlias, true);
22+
23+
if (!type) {
24+
return {};
25+
}
26+
27+
std::optional<NScheme::TTypeId> resolved = {};
28+
29+
if (false) {}
30+
IF_TYPE(Bool)
31+
IF_TYPE(Int8)
32+
IF_TYPE(Uint8)
33+
IF_TYPE(Int16)
34+
IF_TYPE(Uint16)
35+
IF_TYPE(Int32)
36+
IF_TYPE(Uint32)
37+
IF_TYPE(Int64)
38+
IF_TYPE(Uint64)
39+
IF_TYPE(Double)
40+
IF_TYPE(Float)
41+
IF_TYPE(String)
42+
IF_TYPE(Utf8)
43+
IF_TYPE(Yson)
44+
IF_TYPE(Json)
45+
IF_TYPE(Uuid)
46+
IF_TYPE(Date)
47+
IF_TYPE(Datetime)
48+
IF_TYPE(Timestamp)
49+
IF_TYPE(Interval)
50+
IF_TYPE(Decimal)
51+
IF_TYPE(DyNumber)
52+
IF_TYPE(JsonDocument)
53+
IF_TYPE(Date32)
54+
IF_TYPE(Datetime64)
55+
IF_TYPE(Timestamp64)
56+
IF_TYPE(Interval64)
57+
58+
return resolved;
59+
}
60+
61+
#define EXTRACT_VAL(cellType, protoType, cppType) \
62+
case NScheme::NTypeIds::cellType : { \
63+
cppType v = FromString<cppType>(val); \
64+
cell = TCell((const char*)&v, sizeof(v)); \
65+
break; \
66+
}
67+
68+
std::optional<TCell> ParseCell(std::string type, std::string val) {
69+
auto typeId = ResolveType(type);
70+
71+
std::optional<TCell> cell = {};
72+
73+
if (!typeId) {
74+
return {};
75+
}
76+
77+
switch (*typeId) {
78+
EXTRACT_VAL(Bool, bool, ui8);
79+
EXTRACT_VAL(Int8, int32, i8);
80+
EXTRACT_VAL(Uint8, uint32, ui8);
81+
EXTRACT_VAL(Int16, int32, i16);
82+
EXTRACT_VAL(Uint16, uint32, ui16);
83+
EXTRACT_VAL(Int32, int32, i32);
84+
EXTRACT_VAL(Uint32, uint32, ui32);
85+
EXTRACT_VAL(Int64, int64, i64);
86+
EXTRACT_VAL(Uint64, uint64, ui64);
87+
EXTRACT_VAL(Float, float, float);
88+
EXTRACT_VAL(Double, double, double);
89+
EXTRACT_VAL(Date, uint32, ui16);
90+
EXTRACT_VAL(Datetime, uint32, ui32);
91+
EXTRACT_VAL(Timestamp, uint64, ui64);
92+
EXTRACT_VAL(Interval, int64, i64);
93+
EXTRACT_VAL(Date32, int32, i32);
94+
EXTRACT_VAL(Datetime64, int64, i64);
95+
EXTRACT_VAL(Timestamp64, int64, i64);
96+
EXTRACT_VAL(Interval64, int64, i64);
97+
case NScheme::NTypeIds::Json :
98+
case NScheme::NTypeIds::Utf8 : {
99+
cell = TCell(val.data(), val.size());
100+
break;
101+
}
102+
case NScheme::NTypeIds::DyNumber : {
103+
const auto dyNumber = NDyNumber::ParseDyNumberString(val);
104+
if (!dyNumber.Defined()) {
105+
return {};
106+
}
107+
cell = TCell(dyNumber->data(), dyNumber->size());
108+
break;
109+
}
110+
case NScheme::NTypeIds::Yson :
111+
case NScheme::NTypeIds::String : {
112+
cell = TCell(val.data(), val.size());
113+
break;
114+
}
115+
case NScheme::NTypeIds::Decimal :
116+
case NScheme::NTypeIds::Uuid : {
117+
char uuid[16];
118+
cell = TCell(uuid, sizeof(uuid));
119+
break;
120+
}
121+
default:
122+
return {};
123+
};
124+
125+
return cell;
126+
}
127+
128+
std::vector<std::string> ReadPK(NJson::TJsonValue& jsonValue) {
129+
auto &pkField = jsonValue["primary_key"];
130+
131+
if (!pkField.IsArray()) {
132+
Cerr << "Scheme parsing error, primary_key is not an array" << Endl;
133+
return {};
134+
}
135+
136+
std::vector<std::string> pk;
137+
138+
auto &pkArray = pkField.GetArray();
139+
140+
for (size_t i = 0; i < pkArray.size(); ++i) {
141+
if (!pkArray[i].IsString()) {
142+
Cerr << "Scheme parsing error, primary key array element is not a string" << Endl;
143+
return {};
144+
}
145+
pk.push_back(pkArray[i].GetString());
146+
}
147+
148+
return pk;
149+
}
150+
151+
std::map<std::string, std::string> ReadColumnMapping(NJson::TJsonValue& jsonValue) {
152+
auto &columnsField = jsonValue["columns"];
153+
154+
if (!columnsField.IsArray()) {
155+
Cerr << "Scheme parsing error, columns is not an array" << Endl;
156+
return {};
157+
}
158+
159+
auto &columnsArray = columnsField.GetArray();
160+
161+
std::map<std::string, std::string> colToType;
162+
163+
for (size_t i = 0; i < columnsArray.size(); ++i) {
164+
auto &column = columnsArray[i];
165+
166+
if (!column.IsMap()) {
167+
Cerr << "Scheme parsing error, column is not an object" << Endl;
168+
return {};
169+
}
170+
171+
auto &nameField = column["name"].GetString();
172+
auto &typeField = column["type"];
173+
174+
std::string typeId = "";
175+
176+
if (typeField.Has("type_id")) {
177+
typeId = typeField["type_id"].GetString();
178+
} else if (typeField.Has("optional_type")) {
179+
typeId = typeField["optional_type"]["item"]["type_id"].GetString();
180+
}
181+
182+
if (typeId.empty()) {
183+
Cerr << "Scheme parsing error, type_id is not found" << Endl;
184+
return {};
185+
}
186+
187+
colToType[nameField] = typeId;
188+
}
189+
190+
return colToType;
191+
}
192+
193+
int main(int argc, char* argv[]) {
194+
if (argc < 3) {
195+
Cerr << "Usage: path-to-scheme.json key-column1-value ... key-columnN-value" << Endl;
196+
return 1;
197+
}
198+
199+
std::string path = argv[1];
200+
201+
std::vector<std::string> values;
202+
203+
for (int i = 2; i < argc; ++i) {
204+
values.push_back(argv[i]);
205+
}
206+
207+
std::stringstream buffer;
208+
209+
std::ifstream fileStream(path);
210+
buffer << fileStream.rdbuf();
211+
212+
std::string json = buffer.str();
213+
214+
NJson::TJsonValue jsonValue;
215+
216+
if (!NJson::ReadJsonTree(json, &jsonValue)) {
217+
Cerr << "Failed to parse JSON" << Endl;
218+
return 1;
219+
}
220+
221+
std::vector<std::string> pk = ReadPK(jsonValue);
222+
223+
if (pk.empty()) {
224+
Cerr << "Primary key is empty" << Endl;
225+
return 1;
226+
}
227+
228+
std::map<std::string, std::string> colToType = ReadColumnMapping(jsonValue);
229+
230+
if (colToType.empty()) {
231+
Cerr << "Column mapping is empty" << Endl;
232+
return 1;
233+
}
234+
235+
if (values.size() != pk.size()) {
236+
Cerr << "Key's columns count doesn't match scheme" << Endl;
237+
238+
return 1;
239+
}
240+
241+
TVector<TCell> arr(pk.size());
242+
243+
for (size_t i = 0; i < values.size(); ++i) {
244+
auto col = pk[i];
245+
auto type = colToType[col];
246+
auto cell = ParseCell(colToType[pk[i]], values[i]);
247+
248+
if (!cell) {
249+
Cerr << "Unexpected type " << type << " of column " << col << Endl;
250+
251+
return 1;
252+
}
253+
254+
arr[i] = *cell;
255+
}
256+
257+
TSerializedCellVec vec(arr);
258+
259+
Cout << "Obfuscated key: " << Endl;
260+
261+
TStringStream output;
262+
263+
NDataIntegrity::WriteTablePoint(vec.GetCells(), output);
264+
265+
Cout << output.Str() << Endl;
266+
267+
return 0;
268+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
PROGRAM()
2+
3+
SRCS(
4+
main.cpp
5+
)
6+
7+
PEERDIR(
8+
ydb/core/engine
9+
ydb/core/scheme
10+
ydb/library/yql/public/udf/service/stub
11+
ydb/library/yql/sql/pg_dummy
12+
)
13+
14+
END()

0 commit comments

Comments
 (0)