Skip to content

Commit 3de3ff5

Browse files
authored
Rework filters pushdown with multiusage (#8360)
1 parent 9c8e625 commit 3de3ff5

File tree

4 files changed

+117
-55
lines changed

4 files changed

+117
-55
lines changed

ydb/library/yql/core/common_opt/yql_co_finalizers.cpp

Lines changed: 108 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,22 @@ void FilterPushdownWithMultiusage(const TExprNode::TPtr& node, TNodeOnNodeOwnedM
7575
Sort(immediateParents, [](const TExprNode* left, const TExprNode* right) { return CompareNodes(*left, *right) < 0; });
7676
}
7777

78-
TVector<const TExprNode*> parentFilters;
79-
TExprNodeList parentFilterLambdas;
80-
TExprNodeList parentValueLambdas;
81-
size_t likelyCount = 0;
82-
for (auto parent : immediateParents) {
78+
struct TConsumerInfo {
79+
const TExprNode* OriginalFlatMap = nullptr;
80+
const TTypeAnnotationNode* OriginalRowType = nullptr;
81+
TExprNode::TPtr FilterLambda;
82+
TExprNode::TPtr ValueLambda;
83+
TExprNode::TPtr PushdownLambda;
84+
TString ColumnName;
85+
};
86+
87+
TVector<TConsumerInfo> consumers;
88+
bool hasOrdered = false;
89+
size_t pushdownCount = 0;
90+
const auto inputStructType = node->GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>();
91+
const auto genColumnNames = GenNoClashColumns(*inputStructType, "_yql_filter_pushdown", immediateParents.size());
92+
for (size_t i = 0; i < immediateParents.size(); ++i) {
93+
const TExprNode* parent = immediateParents[i];
8394
while (skipNodes.contains(parent->Content())) {
8495
auto newParent = optCtx.GetParentIfSingle(*parent);
8596
if (newParent) {
@@ -92,60 +103,112 @@ void FilterPushdownWithMultiusage(const TExprNode::TPtr& node, TNodeOnNodeOwnedM
92103
return;
93104
}
94105

106+
if (TCoOrderedFlatMap::Match(parent)) {
107+
hasOrdered = true;
108+
}
109+
95110
TCoFlatMapBase parentFlatMap(parent);
96111
if (auto cond = parentFlatMap.Lambda().Body().Maybe<TCoConditionalValueBase>()) {
97-
if (IsDepended(parentFlatMap.Lambda().Ref(), *node)) {
112+
const TCoArgument lambdaArg = parentFlatMap.Lambda().Args().Arg(0);
113+
auto pred = cond.Cast().Predicate();
114+
if (pred.Maybe<TCoLikely>() ||
115+
(pred.Maybe<TCoAnd>() && AnyOf(pred.Ref().ChildrenList(), [](const auto& p) { return p->IsCallable("Likely"); })) ||
116+
!IsStrict(pred.Ptr()) ||
117+
HasDependsOn(pred.Ptr(), lambdaArg.Ptr()) ||
118+
IsDepended(parentFlatMap.Lambda().Ref(), *node))
119+
{
98120
return;
99121
}
100-
likelyCount += bool(cond.Cast().Predicate().Maybe<TCoLikely>());
101-
auto pos = cond.Cast().Predicate().Pos();
102-
parentFilterLambdas.push_back(ctx.NewLambda(pos,
103-
ctx.NewArguments(pos, { parentFlatMap.Lambda().Args().Arg(0).Ptr() }),
104-
cond.Cast().Predicate().Ptr()));
105-
parentValueLambdas.push_back(ctx.NewLambda(pos,
106-
ctx.NewArguments(pos, { parentFlatMap.Lambda().Args().Arg(0).Ptr() }),
107-
cond.Cast().Value().Ptr()));
108-
parentFilters.push_back(parent);
122+
123+
TExprNodeList andPredicates;
124+
if (pred.Maybe<TCoAnd>()) {
125+
andPredicates = pred.Ref().ChildrenList();
126+
} else {
127+
andPredicates.push_back(pred.Ptr());
128+
}
129+
130+
TExprNodeList pushdownPreds;
131+
TExprNodeList restPreds;
132+
for (auto& p : andPredicates) {
133+
if (TCoMember::Match(p.Get()) && p->Child(0) == lambdaArg.Raw()) {
134+
restPreds.push_back(p);
135+
} else {
136+
pushdownPreds.push_back(p);
137+
}
138+
}
139+
140+
const TPositionHandle pos = pred.Pos();
141+
consumers.emplace_back();
142+
TConsumerInfo& consumer = consumers.back();
143+
consumer.OriginalFlatMap = parent;
144+
consumer.OriginalRowType = lambdaArg.Ref().GetTypeAnn();
145+
consumer.ColumnName = genColumnNames[i];
146+
if (!pushdownPreds.empty()) {
147+
++pushdownCount;
148+
restPreds.push_back(
149+
ctx.Builder(pos)
150+
.Callable("Member")
151+
.Add(0, lambdaArg.Ptr())
152+
.Atom(1, consumer.ColumnName)
153+
.Seal()
154+
.Build());
155+
auto restPred = ctx.NewCallable(pos, "And", std::move(restPreds));
156+
auto pushdownPred = ctx.NewCallable(pos, "And", std::move(pushdownPreds));
157+
158+
consumer.FilterLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, { lambdaArg.Ptr() }), std::move(restPred));
159+
consumer.PushdownLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, { lambdaArg.Ptr() }), std::move(pushdownPred));
160+
} else {
161+
consumer.FilterLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, { lambdaArg.Ptr() }), pred.Ptr());
162+
}
163+
consumer.ValueLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, { lambdaArg.Ptr() }), cond.Cast().Value().Ptr());
109164
} else {
110165
return;
111166
}
112167
}
113-
YQL_ENSURE(parentFilterLambdas.size() > 1);
114-
if (likelyCount == parentFilters.size()) {
168+
169+
if (!pushdownCount) {
115170
return;
116171
}
117172

118-
YQL_CLOG(DEBUG, Core) << "Pushdown " << parentFilters.size() << " filters to common parent " << node->Content();
173+
YQL_CLOG(DEBUG, Core) << "Pushdown predicate from " << pushdownCount << " filters (out of total " << consumers.size() << ") to common parent " << node->Content();
119174

120-
const auto inputStructType = node->GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>();
121-
const auto genColumnNames = GenNoClashColumns(*inputStructType, "_yql_filter_pushdown", parentFilterLambdas.size());
175+
YQL_ENSURE(consumers.size() > 1);
176+
YQL_ENSURE(consumers.size() == immediateParents.size());
122177

123178
TExprNode::TPtr mapArg = ctx.NewArgument(node->Pos(), "row");
124179
TExprNode::TPtr mapBody = mapArg;
125180
TExprNode::TPtr filterArg = ctx.NewArgument(node->Pos(), "row");
126181
TExprNodeList filterPreds;
127-
for (size_t i = 0; i < parentFilterLambdas.size(); ++i) {
128-
TString memberName = genColumnNames[i];
129-
mapBody = ctx.Builder(mapBody->Pos())
130-
.Callable("AddMember")
131-
.Add(0, mapBody)
132-
.Atom(1, memberName)
133-
.Apply(2, parentFilterLambdas[i])
134-
.With(0, mapArg)
182+
for (size_t i = 0; i < consumers.size(); ++i) {
183+
const TConsumerInfo& consumer = consumers[i];
184+
if (consumer.PushdownLambda) {
185+
mapBody = ctx.Builder(mapBody->Pos())
186+
.Callable("AddMember")
187+
.Add(0, mapBody)
188+
.Atom(1, consumer.ColumnName)
189+
.Apply(2, consumer.PushdownLambda)
190+
.With(0)
191+
.Callable("CastStruct")
192+
.Add(0, mapArg)
193+
.Add(1, ExpandType(mapArg->Pos(), *consumer.OriginalRowType, ctx))
194+
.Seal()
195+
.Done()
196+
.Seal()
135197
.Seal()
136-
.Seal()
137-
.Build();
198+
.Build();
199+
}
200+
138201
filterPreds.push_back(ctx.Builder(node->Pos())
139-
.Callable("Member")
140-
.Add(0, filterArg)
141-
.Atom(1, memberName)
202+
.Apply(consumer.FilterLambda)
203+
// CastStruct is not needed here, since FilterLambda is AND over column references
204+
.With(0, filterArg)
142205
.Seal()
143206
.Build());
144207
}
145208

146209
auto newNode = ctx.Builder(node->Pos())
147-
.Callable("OrderedFilter")
148-
.Callable(0, "OrderedMap")
210+
.Callable(hasOrdered ? "OrderedFilter" : "Filter")
211+
.Callable(0, hasOrdered ? "OrderedMap" : "Map")
149212
.Add(0, node)
150213
.Add(1, ctx.NewLambda(node->Pos(), ctx.NewArguments(node->Pos(), { mapArg }), std::move(mapBody)))
151214
.Seal()
@@ -156,12 +219,15 @@ void FilterPushdownWithMultiusage(const TExprNode::TPtr& node, TNodeOnNodeOwnedM
156219
for (size_t i = 0; i < immediateParents.size(); ++i) {
157220
const TExprNode* curr = immediateParents[i];
158221
TExprNode::TPtr resultNode = newNode;
159-
while (curr != parentFilters[i]) {
222+
const TConsumerInfo& consumer = consumers[i];
223+
while (curr != consumer.OriginalFlatMap) {
160224
if (curr->IsCallable("AssumeColumnOrder")) {
161225
resultNode = ctx.ChangeChild(*ctx.RenameNode(*curr, "AssumeColumnOrderPartial"), 0, std::move(resultNode));
162226
} else if (curr->IsCallable("ExtractMembers")) {
163227
TExprNodeList columns = curr->Child(1)->ChildrenList();
164-
columns.push_back(ctx.NewAtom(curr->Child(1)->Pos(), genColumnNames[i]));
228+
if (consumer.PushdownLambda) {
229+
columns.push_back(ctx.NewAtom(curr->Child(1)->Pos(), consumer.ColumnName));
230+
}
165231
resultNode = ctx.ChangeChildren(*curr, { resultNode, ctx.NewList(curr->Child(1)->Pos(), std::move(columns)) });
166232
} else {
167233
resultNode = ctx.ChangeChild(*curr, 0, std::move(resultNode));
@@ -173,24 +239,20 @@ void FilterPushdownWithMultiusage(const TExprNode::TPtr& node, TNodeOnNodeOwnedM
173239
TCoFlatMapBase flatMap(curr);
174240
TCoConditionalValueBase cond = flatMap.Lambda().Body().Cast<TCoConditionalValueBase>();
175241
TExprNode::TPtr input = flatMap.Input().Ptr();
176-
const TTypeAnnotationNode* originalType = input->GetTypeAnn()->Cast<TListExprType>()->GetItemType();
177-
toOptimize[parentFilters[i]] = ctx.Builder(curr->Pos())
242+
toOptimize[consumer.OriginalFlatMap] = ctx.Builder(curr->Pos())
178243
.Callable(flatMap.CallableName())
179244
.Add(0, resultNode)
180245
.Lambda(1)
181246
.Param("row")
182247
.Callable(cond.CallableName())
183-
.Callable(0, "Likely")
184-
.Callable(0, "Member")
185-
.Arg(0, "row")
186-
.Atom(1, genColumnNames[i])
187-
.Seal()
248+
.Apply(0, consumer.FilterLambda)
249+
.With(0, "row")
188250
.Seal()
189-
.Apply(1, parentValueLambdas[i])
251+
.Apply(1, consumer.ValueLambda)
190252
.With(0)
191253
.Callable("CastStruct")
192254
.Arg(0, "row")
193-
.Add(1, ExpandType(curr->Pos(), *originalType, ctx))
255+
.Add(1, ExpandType(curr->Pos(), *consumer.OriginalRowType, ctx))
194256
.Seal()
195257
.Done()
196258
.Seal()

ydb/library/yql/tests/sql/dq_file/part4/canondata/result.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,9 +1842,9 @@
18421842
],
18431843
"test.test[optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug]": [
18441844
{
1845-
"checksum": "a0ef0943fe79bc1c0de40935257d84c4",
1846-
"size": 14771,
1847-
"uri": "https://{canondata_backend}/1031349/f562047a0458cc3f13d0bd9bc809240f0048d755/resource.tar.gz#test.test_optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug_/opt.yql_patched"
1845+
"checksum": "89224897f5a2a3634b8cb1b265cc7de3",
1846+
"size": 14753,
1847+
"uri": "https://{canondata_backend}/1597364/ff1a50fe9e62a1774e3bedb2b760dab09c6ff93c/resource.tar.gz#test.test_optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug_/opt.yql_patched"
18481848
}
18491849
],
18501850
"test.test[optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Plan]": [

ydb/library/yql/tests/sql/hybrid_file/part8/canondata/result.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,9 +1877,9 @@
18771877
],
18781878
"test.test[optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug]": [
18791879
{
1880-
"checksum": "467188d975bb48ebdef28538e86a61b9",
1881-
"size": 21237,
1882-
"uri": "https://{canondata_backend}/1937424/686478058d4a01eccc043c84a5d7653887ac4a2b/resource.tar.gz#test.test_optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug_/opt.yql_patched"
1880+
"checksum": "413787bd2d3c990ac10835ff27261236",
1881+
"size": 21219,
1882+
"uri": "https://{canondata_backend}/1946324/f14625f6090841e4d498eb3efcc87665d3617c25/resource.tar.gz#test.test_optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug_/opt.yql_patched"
18831883
}
18841884
],
18851885
"test.test[optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Plan]": [

ydb/library/yql/tests/sql/yt_native_file/part4/canondata/result.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,9 +1466,9 @@
14661466
],
14671467
"test.test[optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug]": [
14681468
{
1469-
"checksum": "e59f0d78f1ee9082e2b5410b8d98e452",
1470-
"size": 19543,
1471-
"uri": "https://{canondata_backend}/1936273/b976f2b9c94b54732c7e957f2c0f8282d84e2456/resource.tar.gz#test.test_optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug_/opt.yql"
1469+
"checksum": "d7a09af4b7a7e959659a6257a995686c",
1470+
"size": 19525,
1471+
"uri": "https://{canondata_backend}/1777230/b9bbb92ea2e6c35553447e66f769394067f10f63/resource.tar.gz#test.test_optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Debug_/opt.yql"
14721472
}
14731473
],
14741474
"test.test[optimizers-yql-18408_filter_multiusage_pushdown-default.txt-Plan]": [

0 commit comments

Comments
 (0)