Skip to content

Commit 0c1c043

Browse files
loochekblinkov
authored andcommitted
KeyFilter2 pushdown fix
commit_hash:a2fbba8fa0771d8186ad495cd8ccae8731c8aa44
1 parent 12bac2c commit 0c1c043

File tree

11 files changed

+240
-4
lines changed

11 files changed

+240
-4
lines changed

yt/yql/providers/yt/common/yql_configuration.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,6 @@ constexpr bool DEFAULT_ENABLE_DQ_WRITE_CONSTRAINTS = false;
116116
constexpr bool DEFAULT_USE_QL_FILTER = false;
117117
constexpr bool DEFAULT_PRUNE_QL_FILTER_LAMBDA = true;
118118

119+
constexpr bool DEFAULT_DROP_UNUSED_KEYS_FROM_KEY_FILTER = false;
120+
119121
} // NYql

yt/yql/providers/yt/common/yql_yt_settings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ TYtConfiguration::TYtConfiguration(TTypeAnnotationContext& typeCtx)
528528
REGISTER_SETTING(*this, JobBlockOutput).Parser([](const TString& v) { return FromString<EBlockOutputMode>(v); });
529529
REGISTER_SETTING(*this, _EnableYtDqProcessWriteConstraints);
530530
REGISTER_SETTING(*this, CompactForDistinct);
531+
REGISTER_SETTING(*this, DropUnusedKeysFromKeyFilter);
531532
}
532533

533534
EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings) {

yt/yql/providers/yt/common/yql_yt_settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ struct TYtSettings {
302302
NCommon::TConfSetting<TSet<NUdf::EDataSlot>, false> JobBlockOutputSupportedDataTypes;
303303
NCommon::TConfSetting<bool, false> _EnableYtDqProcessWriteConstraints;
304304
NCommon::TConfSetting<bool, false> CompactForDistinct;
305+
NCommon::TConfSetting<bool, false> DropUnusedKeysFromKeyFilter;
305306
};
306307

307308
EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings);

yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class TYtPhysicalOptProposalTransformer : public TOptimizeTransformerBase {
158158

159159
TMaybe<bool> CanFuseLambdas(const NNodes::TCoLambda& innerLambda, const NNodes::TCoLambda& outerLambda, TExprContext& ctx) const;
160160

161+
NNodes::TExprBase RebuildKeyFilterAfterPushDown(NNodes::TExprBase filter, size_t usedKeysCount, TExprContext& ctx) const;
161162

162163
private:
163164
const TYtState::TPtr State_;

yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp

Lines changed: 97 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,60 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushMergeLimitToInput(T
7272
.Done();
7373
}
7474

75+
TExprBase TYtPhysicalOptProposalTransformer::RebuildKeyFilterAfterPushDown(TExprBase filter, size_t usedKeysCount, TExprContext& ctx) const {
76+
auto origBoundTupleType = filter.Ref().GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TTupleExprType>()->GetItems()[0]->Cast<TTupleExprType>();
77+
auto origBoundTupleKeyCount = origBoundTupleType->GetSize() - 1;
78+
79+
auto origBoundTupleArg = ctx.NewArgument(filter.Pos(), "boundTuple");
80+
TExprNode::TListType newBoundTupleItems;
81+
for (size_t i = 0; i < usedKeysCount; i++) {
82+
newBoundTupleItems.push_back(
83+
Build<TCoNth>(ctx, filter.Pos())
84+
.Tuple(origBoundTupleArg)
85+
.Index(ctx.NewAtom(filter.Pos(), i))
86+
.Done()
87+
.Ptr()
88+
);
89+
}
90+
newBoundTupleItems.push_back(
91+
Build<TCoNth>(ctx, filter.Pos())
92+
.Tuple(origBoundTupleArg)
93+
.Index(ctx.NewAtom(filter.Pos(), origBoundTupleKeyCount))
94+
.Done()
95+
.Ptr()
96+
);
97+
98+
auto handleBoundTuple = Build<TCoLambda>(ctx, filter.Pos())
99+
.Args({origBoundTupleArg})
100+
.Body<TExprList>()
101+
.Add(std::move(newBoundTupleItems))
102+
.Build()
103+
.Done();
104+
105+
return Build<TCoMap>(ctx, filter.Pos())
106+
.Input(filter)
107+
.Lambda<TCoLambda>()
108+
.Args({"boundTuple"})
109+
.Body<TExprList>()
110+
.Add<TExprApplier>()
111+
.Apply(handleBoundTuple)
112+
.With<TCoNth>(0)
113+
.Tuple("boundTuple")
114+
.Index(ctx.NewAtom(filter.Pos(), 0))
115+
.Build()
116+
.Build()
117+
.Add<TExprApplier>()
118+
.Apply(handleBoundTuple)
119+
.With<TCoNth>(0)
120+
.Tuple("boundTuple")
121+
.Index(ctx.NewAtom(filter.Pos(), 1))
122+
.Build()
123+
.Build()
124+
.Build()
125+
.Build()
126+
.Done();
127+
}
128+
75129
TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExprBase node, TExprContext& ctx) const {
76130
if (node.Ref().HasResult() && node.Ref().GetResult().Type() != TExprNode::World) {
77131
return node;
@@ -172,6 +226,9 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExp
172226
const auto kfColumns = GetKeyFilterColumns(section, kfType);
173227
YQL_ENSURE(!kfColumns.empty());
174228
for (auto path: section.Paths()) {
229+
TYtPathInfo pathInfo(path);
230+
auto pathRowSpec = pathInfo.Table->RowSpec;
231+
175232
if (auto maybeOp = getInnerOpForUpdate(path, kfColumns)) {
176233
auto innerOp = maybeOp.Cast();
177234
if (kfType == EYtSettingType::KeyFilter2) {
@@ -203,10 +260,46 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExp
203260
}
204261

205262
auto innerOpSection = innerOp.Input().Item(0);
206-
auto updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos())
207-
.InitFrom(innerOpSection)
208-
.Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *NYql::KeepOnlySettings(section.Settings().Ref(), EYtSettingType::KeyFilter | EYtSettingType::KeyFilter2, ctx), ctx))
209-
.Done();
263+
TExprNode::TPtr updatedSection;
264+
if (kfType == EYtSettingType::KeyFilter2 && State_->Configuration->DropUnusedKeysFromKeyFilter.Get().GetOrElse(DEFAULT_DROP_UNUSED_KEYS_FROM_KEY_FILTER)) {
265+
for (auto innerOpPath: innerOpSection.Paths()) {
266+
TYtPathInfo innerOpPathInfo(innerOpPath);
267+
auto innerOpPathRowSpec = innerOpPathInfo.Table->RowSpec;
268+
269+
YQL_ENSURE(kfColumns.size() <= innerOpPathRowSpec->SortedBy.size());
270+
for (size_t i = 0; i < kfColumns.size(); i++) {
271+
YQL_ENSURE(innerOpPathRowSpec->SortedBy[i] == pathRowSpec->SortedBy[i]);
272+
}
273+
}
274+
275+
TExprNode::TListType rebuiltKeyFilters;
276+
for (auto filter : keyFilters) {
277+
YQL_ENSURE(filter->ChildrenSize() == 2);
278+
auto rebuiltFilter = RebuildKeyFilterAfterPushDown(TExprBase(filter->HeadPtr()), kfColumns.size(), ctx);
279+
rebuiltKeyFilters.push_back(Build<TCoNameValueTuple>(ctx, innerOpSection.Settings().Pos())
280+
.Name().Build("keyFilter2")
281+
.Value<TExprList>()
282+
.Add(rebuiltFilter)
283+
.Add(filter->Child(1))
284+
.Build()
285+
.Done()
286+
.Ptr()
287+
);
288+
}
289+
290+
updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos())
291+
.InitFrom(innerOpSection)
292+
.Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *ctx.NewList(innerOpSection.Settings().Pos(), std::move(rebuiltKeyFilters)), ctx))
293+
.Done()
294+
.Ptr();
295+
296+
} else {
297+
updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos())
298+
.InitFrom(innerOpSection)
299+
.Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *NYql::KeepOnlySettings(section.Settings().Ref(), EYtSettingType::KeyFilter | EYtSettingType::KeyFilter2, ctx), ctx))
300+
.Done()
301+
.Ptr();
302+
}
210303

211304
auto updatedSectionList = Build<TYtSectionList>(ctx, innerOp.Input().Pos()).Add(updatedSection).Done();
212305
auto updatedInnerOp = ctx.ChangeChild(innerOp.Ref(), TYtTransientOpBase::idx_Input, updatedSectionList.Ptr());
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"key1"="1";"key2"="aaa";"key3"=1};
2+
{"key1"="2";"key2"="aab";"key3"=2};
3+
{"key1"="3";"key2"=#;"key3"=3};
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
{
2+
"_yql_row_spec" = {
3+
"Type" = [
4+
"StructType";
5+
[
6+
[
7+
"key1";
8+
[
9+
"DataType";
10+
"String";
11+
];
12+
];
13+
[
14+
"key2";
15+
[
16+
"OptionalType";
17+
[
18+
"DataType";
19+
"String";
20+
];
21+
];
22+
];
23+
[
24+
"key3";
25+
[
26+
"DataType";
27+
"Int32";
28+
];
29+
];
30+
];
31+
];
32+
"SortDirections" = [
33+
1;
34+
1;
35+
1;
36+
];
37+
"SortMembers" = [
38+
"key1";
39+
"key2";
40+
"key3";
41+
];
42+
"SortedBy" = [
43+
"key1";
44+
"key2";
45+
"key3";
46+
];
47+
"SortedByTypes" = [
48+
[
49+
"DataType";
50+
"String";
51+
];
52+
[
53+
"OptionalType";
54+
[
55+
"DataType";
56+
"String";
57+
];
58+
];
59+
[
60+
"DataType";
61+
"Int32";
62+
];
63+
];
64+
}
65+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"key1"="4";"key2"="aad";};
2+
{"key1"="5";"key2"="aae";};
3+
{"key1"="6";"key2"="aaf";};
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"_yql_row_spec" = {
3+
"Type" = [
4+
"StructType";
5+
[
6+
[
7+
"key1";
8+
[
9+
"DataType";
10+
"String";
11+
];
12+
];
13+
[
14+
"key2";
15+
[
16+
"DataType";
17+
"String";
18+
];
19+
];
20+
];
21+
];
22+
"SortDirections" = [
23+
1;
24+
1;
25+
];
26+
"SortMembers" = [
27+
"key1";
28+
"key2";
29+
];
30+
"SortedBy" = [
31+
"key1";
32+
"key2";
33+
];
34+
"SortedByTypes" = [
35+
[
36+
"DataType";
37+
"String";
38+
];
39+
[
40+
"DataType";
41+
"String";
42+
];
43+
];
44+
}
45+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
in Input1 yql-19420-input1.txt
2+
in Input2 yql-19420-input2.txt

0 commit comments

Comments
 (0)