Skip to content

Commit a9441cb

Browse files
authored
YQ-3869 RD added limit for parser buffer size (#11627)
1 parent 3e28059 commit a9441cb

File tree

8 files changed

+320
-207
lines changed

8 files changed

+320
-207
lines changed

ydb/core/fq/libs/config/protos/row_dispatcher.proto

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ message TRowDispatcherCoordinatorConfig {
1515
}
1616

1717
message TJsonParserConfig {
18-
uint64 BatchSizeBytes = 1;
18+
uint64 BatchSizeBytes = 1; // default 1 MiB
1919
uint64 BatchCreationTimeoutMs = 2;
20+
uint64 BufferCellCount = 3; // (number rows) * (number columns) limit, default 10^6
2021
}
2122

2223
message TRowDispatcherConfig {

ydb/core/fq/libs/row_dispatcher/json_filter.cpp

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,17 @@ NYT::TNode MakeOutputSchema() {
7171
return NYT::TNode::CreateList().Add("StructType").Add(std::move(structMembers));
7272
}
7373

74+
struct TInputType {
75+
const TVector<ui64>& Offsets;
76+
const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& Values;
77+
const ui64 RowsOffset; // offset of first value
78+
const ui64 NumberRows;
79+
80+
ui64 GetOffset(ui64 rowId) const {
81+
return Offsets[rowId + RowsOffset];
82+
}
83+
};
84+
7485
class TFilterInputSpec : public NYql::NPureCalc::TInputSpecBase {
7586
public:
7687
TFilterInputSpec(const NYT::TNode& schema)
@@ -85,7 +96,7 @@ class TFilterInputSpec : public NYql::NPureCalc::TInputSpecBase {
8596
TVector<NYT::TNode> Schemas;
8697
};
8798

88-
class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector*>&>> {
99+
class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<TInputType> {
89100
public:
90101
TFilterInputConsumer(
91102
const TFilterInputSpec& spec,
@@ -123,36 +134,38 @@ class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<std::pair<const T
123134
}
124135
}
125136

126-
void OnObject(std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector*>&> values) override {
127-
Y_ENSURE(FieldsPositions.size() == values.second.size());
137+
void OnObject(TInputType input) override {
138+
Y_ENSURE(FieldsPositions.size() == input.Values.size());
128139

129140
NKikimr::NMiniKQL::TThrowingBindTerminator bind;
130141
with_lock (Worker->GetScopedAlloc()) {
142+
Y_DEFER {
143+
// Clear cache after each object because
144+
// values allocated on another allocator and should be released
145+
Cache.Clear();
146+
Worker->GetGraph().Invalidate();
147+
};
148+
131149
auto& holderFactory = Worker->GetGraph().GetHolderFactory();
132150

133151
// TODO: use blocks here
134-
for (size_t rowId = 0; rowId < values.second.front()->size(); ++rowId) {
152+
for (size_t rowId = 0; rowId < input.NumberRows; ++rowId) {
135153
NYql::NUdf::TUnboxedValue* items = nullptr;
136154

137155
NYql::NUdf::TUnboxedValue result = Cache.NewArray(
138156
holderFactory,
139-
static_cast<ui32>(values.second.size() + 1),
157+
static_cast<ui32>(input.Values.size() + 1),
140158
items);
141159

142-
items[OffsetPosition] = NYql::NUdf::TUnboxedValuePod(values.first[rowId]);
160+
items[OffsetPosition] = NYql::NUdf::TUnboxedValuePod(input.GetOffset(rowId));
143161

144162
size_t fieldId = 0;
145-
for (const auto& column : values.second) {
163+
for (const auto column : input.Values) {
146164
items[FieldsPositions[fieldId++]] = column->at(rowId);
147165
}
148166

149167
Worker->Push(std::move(result));
150168
}
151-
152-
// Clear cache after each object because
153-
// values allocated on another allocator and should be released
154-
Cache.Clear();
155-
Worker->GetGraph().Invalidate();
156169
}
157170
}
158171

@@ -236,7 +249,7 @@ struct NYql::NPureCalc::TInputSpecTraits<TFilterInputSpec> {
236249
static constexpr bool IsPartial = false;
237250
static constexpr bool SupportPushStreamMode = true;
238251

239-
using TConsumerType = THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector*>&>>>;
252+
using TConsumerType = THolder<NYql::NPureCalc::IConsumer<TInputType>>;
240253

241254
static TConsumerType MakeConsumer(
242255
const TFilterInputSpec& spec,
@@ -282,9 +295,9 @@ class TJsonFilter::TImpl {
282295
LOG_ROW_DISPATCHER_DEBUG("Program created");
283296
}
284297

285-
void Push(const TVector<ui64>& offsets, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector*>& values) {
298+
void Push(const TVector<ui64>& offsets, const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 rowsOffset, ui64 numberRows) {
286299
Y_ENSURE(values, "Expected non empty schema");
287-
InputConsumer->OnObject(std::make_pair(offsets, values));
300+
InputConsumer->OnObject({.Offsets = offsets, .Values = values, .RowsOffset = rowsOffset, .NumberRows = numberRows});
288301
}
289302

290303
TString GetSql() const {
@@ -305,7 +318,7 @@ class TJsonFilter::TImpl {
305318

306319
private:
307320
THolder<NYql::NPureCalc::TPushStreamProgram<TFilterInputSpec, TFilterOutputSpec>> Program;
308-
THolder<NYql::NPureCalc::IConsumer<std::pair<const TVector<ui64>&, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector*>&>>> InputConsumer;
321+
THolder<NYql::NPureCalc::IConsumer<TInputType>> InputConsumer;
309322
const TString Sql;
310323
};
311324

@@ -322,8 +335,8 @@ TJsonFilter::TJsonFilter(
322335
TJsonFilter::~TJsonFilter() {
323336
}
324337

325-
void TJsonFilter::Push(const TVector<ui64>& offsets, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector*>& values) {
326-
Impl->Push(offsets, values);
338+
void TJsonFilter::Push(const TVector<ui64>& offsets, const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 rowsOffset, ui64 numberRows) {
339+
Impl->Push(offsets, values, rowsOffset, numberRows);
327340
}
328341

329342
TString TJsonFilter::GetSql() {

ydb/core/fq/libs/row_dispatcher/json_filter.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
#include "common.h"
44

5-
#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
6-
#include <yql/essentials/public/udf/udf_data_type.h>
75
#include <yql/essentials/public/udf/udf_value.h>
86

97
namespace NFq {
@@ -23,7 +21,7 @@ class TJsonFilter {
2321

2422
~TJsonFilter();
2523

26-
void Push(const TVector<ui64>& offsets, const TVector<const NKikimr::NMiniKQL::TUnboxedValueVector*>& values);
24+
void Push(const TVector<ui64>& offsets, const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 rowsOffset, ui64 numberRows);
2725
TString GetSql();
2826

2927
private:

0 commit comments

Comments
 (0)