1
+ #include < ydb/library/yql/providers/common/schema/parser/yql_type_parser.h>
1
2
#include < ydb/library/yql/public/udf/udf_version.h>
2
3
#include < ydb/library/yql/public/purecalc/purecalc.h>
3
4
#include < ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
@@ -23,6 +24,12 @@ NYT::TNode CreateTypeNode(const TString& fieldType) {
23
24
.Add (fieldType);
24
25
}
25
26
27
+ NYT::TNode CreateOptionalTypeNode (const TString& fieldType) {
28
+ return NYT::TNode::CreateList ()
29
+ .Add (" OptionalType" )
30
+ .Add (CreateTypeNode (fieldType));
31
+ }
32
+
26
33
void AddField (NYT::TNode& node, const TString& fieldName, const TString& fieldType) {
27
34
node.Add (
28
35
NYT::TNode::CreateList ()
@@ -31,18 +38,29 @@ void AddField(NYT::TNode& node, const TString& fieldName, const TString& fieldTy
31
38
);
32
39
}
33
40
34
- void AddOptionalField (NYT::TNode& node, const TString& fieldName, const TString& fieldType) {
35
- node.Add (NYT::TNode::CreateList ()
36
- .Add (fieldName)
37
- .Add (NYT::TNode::CreateList ().Add (" OptionalType" ).Add (CreateTypeNode (fieldType)))
41
+ void AddTypedField (NYT::TNode& node, const TString& fieldName, const TString& fieldTypeYson) {
42
+ NYT::TNode parsedType;
43
+ Y_ENSURE (NYql::NCommon::ParseYson (parsedType, fieldTypeYson, Cerr), " Invalid field type" );
44
+
45
+ // TODO: remove this when the re-parsing is removed from pq read actor
46
+ if (parsedType == CreateTypeNode (" Json" )) {
47
+ parsedType = CreateTypeNode (" String" );
48
+ } else if (parsedType == CreateOptionalTypeNode (" Json" )) {
49
+ parsedType = CreateOptionalTypeNode (" String" );
50
+ }
51
+
52
+ node.Add (
53
+ NYT::TNode::CreateList ()
54
+ .Add (fieldName)
55
+ .Add (parsedType)
38
56
);
39
57
}
40
58
41
- NYT::TNode MakeInputSchema (const TVector<TString>& columns) {
59
+ NYT::TNode MakeInputSchema (const TVector<TString>& columns, const TVector<TString>& types ) {
42
60
auto structMembers = NYT::TNode::CreateList ();
43
61
AddField (structMembers, OffsetFieldName, " Uint64" );
44
- for (const auto & col : columns) {
45
- AddOptionalField (structMembers, col, " String " );
62
+ for (size_t i = 0 ; i < columns. size (); ++i ) {
63
+ AddTypedField (structMembers, columns[i], types[i] );
46
64
}
47
65
return NYT::TNode::CreateList ().Add (" StructType" ).Add (std::move (structMembers));
48
66
}
@@ -68,7 +86,7 @@ class TFilterInputSpec : public NYql::NPureCalc::TInputSpecBase {
68
86
TVector<NYT::TNode> Schemas;
69
87
};
70
88
71
- class TFilterInputConsumer : public NYql ::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&>> {
89
+ class TFilterInputConsumer : public NYql ::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&>> {
72
90
public:
73
91
TFilterInputConsumer (
74
92
const TFilterInputSpec& spec,
@@ -106,15 +124,15 @@ class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<std::pair<const T
106
124
}
107
125
}
108
126
109
- void OnObject (std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&> values) override {
127
+ void OnObject (std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&> values) override {
110
128
Y_ENSURE (FieldsPositions.size () == values.second .size ());
111
129
112
130
NKikimr::NMiniKQL::TThrowingBindTerminator bind;
113
131
with_lock (Worker->GetScopedAlloc ()) {
114
132
auto & holderFactory = Worker->GetGraph ().GetHolderFactory ();
115
133
116
134
// TODO: use blocks here
117
- for (size_t rowId = 0 ; rowId < values.second .front (). size (); ++rowId) {
135
+ for (size_t rowId = 0 ; rowId < values.second .front ()-> size (); ++rowId) {
118
136
NYql::NUdf::TUnboxedValue* items = nullptr ;
119
137
120
138
NYql::NUdf::TUnboxedValue result = Cache.NewArray (
@@ -126,13 +144,15 @@ class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<std::pair<const T
126
144
127
145
size_t fieldId = 0 ;
128
146
for (const auto & column : values.second ) {
129
- items[FieldsPositions[fieldId++]] = column[rowId].data () // Check that std::string_view was initialized in json_parser
130
- ? NKikimr::NMiniKQL::MakeString (column[rowId]).MakeOptional ()
131
- : NKikimr::NUdf::TUnboxedValuePod ();
147
+ items[FieldsPositions[fieldId++]] = column->at (rowId);
132
148
}
133
149
134
150
Worker->Push (std::move (result));
135
151
}
152
+
153
+ // Clear cache after on each object because
154
+ // values allocated on another allocator and should be released
155
+ Cache.Clear ();
136
156
}
137
157
}
138
158
@@ -216,7 +236,7 @@ struct NYql::NPureCalc::TInputSpecTraits<TFilterInputSpec> {
216
236
static constexpr bool IsPartial = false ;
217
237
static constexpr bool SupportPushStreamMode = true ;
218
238
219
- using TConsumerType = THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&>>>;
239
+ using TConsumerType = THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&>>>;
220
240
221
241
static TConsumerType MakeConsumer (
222
242
const TFilterInputSpec& spec,
@@ -243,13 +263,19 @@ class TJsonFilter::TImpl {
243
263
TImpl (const TVector<TString>& columns,
244
264
const TVector<TString>& types,
245
265
const TString& whereFilter,
246
- TCallback callback)
247
- : Sql(GenerateSql(columns, types, whereFilter)) {
248
- auto factory = NYql::NPureCalc::MakeProgramFactory (NYql::NPureCalc::TProgramFactoryOptions ());
266
+ TCallback callback,
267
+ std::shared_ptr<NKikimr::NMiniKQL::TScopedAlloc> alloc)
268
+ : Sql(GenerateSql(whereFilter)) {
269
+ Y_ENSURE (columns.size () == types.size (), " Number of columns and types should by equal" );
270
+ auto factory = NYql::NPureCalc::MakeProgramFactory (
271
+ NYql::NPureCalc::TProgramFactoryOptions ().SetScopedAlloc (std::move (alloc))
272
+ );
249
273
274
+ // Program should be stateless because input values
275
+ // allocated on another allocator and should be released
250
276
LOG_ROW_DISPATCHER_DEBUG (" Creating program..." );
251
277
Program = factory->MakePushStreamProgram (
252
- TFilterInputSpec (MakeInputSchema (columns)),
278
+ TFilterInputSpec (MakeInputSchema (columns, types )),
253
279
TFilterOutputSpec (MakeOutputSchema ()),
254
280
Sql,
255
281
NYql::NPureCalc::ETranslationMode::SQL
@@ -258,7 +284,7 @@ class TJsonFilter::TImpl {
258
284
LOG_ROW_DISPATCHER_DEBUG (" Program created" );
259
285
}
260
286
261
- void Push (const TVector<ui64>& offsets, const TVector<TVector<std::string_view> >& values) {
287
+ void Push (const TVector<ui64>& offsets, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >& values) {
262
288
Y_ENSURE (values, " Expected non empty schema" );
263
289
InputConsumer->OnObject (std::make_pair (offsets, values));
264
290
}
@@ -268,29 +294,9 @@ class TJsonFilter::TImpl {
268
294
}
269
295
270
296
private:
271
- TString GenerateSql (const TVector<TString>& columnNames, const TVector<TString>& columnTypes, const TString& whereFilter) {
297
+ TString GenerateSql (const TString& whereFilter) {
272
298
TStringStream str;
273
- str << " $fields = SELECT " ;
274
- Y_ABORT_UNLESS (columnNames.size () == columnTypes.size ());
275
- str << OffsetFieldName << " , " ;
276
- for (size_t i = 0 ; i < columnNames.size (); ++i) {
277
- TString columnType = columnTypes[i];
278
- TString columnName = NFq::EncloseAndEscapeString (columnNames[i], ' `' );
279
- if (columnType == " Json" ) {
280
- columnType = " String" ;
281
- } else if (columnType == " Optional<Json>" ) {
282
- columnType = " Optional<String>" ;
283
- }
284
-
285
- if (columnType.StartsWith (" Optional" )) {
286
- str << " IF(" << columnName << " IS NOT NULL, Unwrap(CAST(" << columnName << " as " << columnType << " )), NULL)" ;
287
- } else {
288
- str << " Unwrap(CAST(" << columnName << " as " << columnType << " ))" ;
289
- }
290
- str << " as " << columnName << ((i != columnNames.size () - 1 ) ? " ," : " " );
291
- }
292
- str << " FROM Input;\n " ;
293
- str << " $filtered = SELECT * FROM $fields " << whereFilter << " ;\n " ;
299
+ str << " $filtered = SELECT * FROM Input " << whereFilter << " ;\n " ;
294
300
295
301
str << " SELECT " << OffsetFieldName << " , Unwrap(Json::SerializeJson(Yson::From(RemoveMembers(TableRow(), [\" " << OffsetFieldName;
296
302
str << " \" ])))) as data FROM $filtered" ;
@@ -300,22 +306,23 @@ class TJsonFilter::TImpl {
300
306
301
307
private:
302
308
THolder<NYql::NPureCalc::TPushStreamProgram<TFilterInputSpec, TFilterOutputSpec>> Program;
303
- THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<TVector<std::string_view> >&>>> InputConsumer;
309
+ THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >&>>> InputConsumer;
304
310
const TString Sql;
305
311
};
306
312
307
313
TJsonFilter::TJsonFilter (
308
314
const TVector<TString>& columns,
309
315
const TVector<TString>& types,
310
316
const TString& whereFilter,
311
- TCallback callback)
312
- : Impl(std::make_unique<TJsonFilter::TImpl>(columns, types, whereFilter, callback)) {
317
+ TCallback callback,
318
+ std::shared_ptr<NKikimr::NMiniKQL::TScopedAlloc> alloc)
319
+ : Impl(std::make_unique<TJsonFilter::TImpl>(columns, types, whereFilter, callback, std::move(alloc))) {
313
320
}
314
321
315
322
TJsonFilter::~TJsonFilter () {
316
323
}
317
324
318
- void TJsonFilter::Push (const TVector<ui64>& offsets, const TVector<TVector<std::string_view> >& values) {
325
+ void TJsonFilter::Push (const TVector<ui64>& offsets, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector* >& values) {
319
326
Impl->Push (offsets, values);
320
327
}
321
328
@@ -327,8 +334,9 @@ std::unique_ptr<TJsonFilter> NewJsonFilter(
327
334
const TVector<TString>& columns,
328
335
const TVector<TString>& types,
329
336
const TString& whereFilter,
330
- TCallback callback) {
331
- return std::unique_ptr<TJsonFilter>(new TJsonFilter (columns, types, whereFilter, callback));
337
+ TCallback callback,
338
+ std::shared_ptr<NKikimr::NMiniKQL::TScopedAlloc> alloc) {
339
+ return std::unique_ptr<TJsonFilter>(new TJsonFilter (columns, types, whereFilter, callback, std::move (alloc)));
332
340
}
333
341
334
342
} // namespace NFq
0 commit comments