Skip to content

Commit 806d771

Browse files
Added statistics inference for DqJoins (#11994)
1 parent c5837d4 commit 806d771

File tree

7 files changed

+251
-184
lines changed

7 files changed

+251
-184
lines changed

ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json

Lines changed: 77 additions & 77 deletions
Large diffs are not rendered by default.

ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json

Lines changed: 66 additions & 66 deletions
Large diffs are not rendered by default.

ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,39 +7,39 @@
77
"args":
88
[
99
{
10-
"op_name":"InnerJoin (Grace)",
10+
"op_name":"TableFullScan",
11+
"table":"partsupp"
12+
},
13+
{
14+
"op_name":"InnerJoin (MapJoin)",
1115
"args":
1216
[
1317
{
1418
"op_name":"TableFullScan",
15-
"table":"partsupp"
19+
"table":"supplier"
1620
},
1721
{
1822
"op_name":"InnerJoin (MapJoin)",
1923
"args":
2024
[
2125
{
22-
"op_name":"TableFullScan",
23-
"table":"supplier"
26+
"op_name":"TableLookup",
27+
"table":"region"
2428
},
2529
{
26-
"op_name":"InnerJoin (MapJoin)",
27-
"args":
28-
[
29-
{
30-
"op_name":"TableLookup",
31-
"table":"region"
32-
},
33-
{
34-
"op_name":"TableFullScan",
35-
"table":"nation"
36-
}
37-
]
30+
"op_name":"TableFullScan",
31+
"table":"nation"
3832
}
3933
]
4034
}
4135
]
42-
},
36+
}
37+
]
38+
},
39+
{
40+
"op_name":"InnerJoin (Grace)",
41+
"args":
42+
[
4343
{
4444
"op_name":"InnerJoin (Grace)",
4545
"args":
@@ -73,12 +73,12 @@
7373
]
7474
}
7575
]
76+
},
77+
{
78+
"op_name":"TableFullScan",
79+
"table":"part"
7680
}
7781
]
78-
},
79-
{
80-
"op_name":"TableFullScan",
81-
"table":"part"
8282
}
8383
]
8484
}

ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,39 +7,39 @@
77
"args":
88
[
99
{
10-
"op_name":"InnerJoin (Grace)",
10+
"op_name":"TableFullScan",
11+
"table":"partsupp"
12+
},
13+
{
14+
"op_name":"InnerJoin (MapJoin)",
1115
"args":
1216
[
1317
{
1418
"op_name":"TableFullScan",
15-
"table":"partsupp"
19+
"table":"supplier"
1620
},
1721
{
1822
"op_name":"InnerJoin (MapJoin)",
1923
"args":
2024
[
2125
{
2226
"op_name":"TableFullScan",
23-
"table":"supplier"
27+
"table":"nation"
2428
},
2529
{
26-
"op_name":"InnerJoin (MapJoin)",
27-
"args":
28-
[
29-
{
30-
"op_name":"TableFullScan",
31-
"table":"nation"
32-
},
33-
{
34-
"op_name":"TableFullScan",
35-
"table":"region"
36-
}
37-
]
30+
"op_name":"TableFullScan",
31+
"table":"region"
3832
}
3933
]
4034
}
4135
]
42-
},
36+
}
37+
]
38+
},
39+
{
40+
"op_name":"InnerJoin (Grace)",
41+
"args":
42+
[
4343
{
4444
"op_name":"InnerJoin (Grace)",
4545
"args":
@@ -73,12 +73,12 @@
7373
]
7474
}
7575
]
76+
},
77+
{
78+
"op_name":"TableFullScan",
79+
"table":"part"
7680
}
7781
]
78-
},
79-
{
80-
"op_name":"TableFullScan",
81-
"table":"part"
8282
}
8383
]
8484
}

ydb/library/yql/dq/opt/dq_opt_stat.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,69 @@ void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationCo
369369
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for GraceJoin: " << resStats->ToString();
370370
}
371371

372+
/**
373+
* Infer statistics for DqJoin
374+
* DqJoin is an intermediary join representantation in Dq
375+
*/
376+
void InferStatisticsForDqJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints) {
377+
auto inputNode = TExprBase(input);
378+
auto join = inputNode.Cast<TDqJoin>();
379+
380+
auto leftArg = join.LeftInput();
381+
auto rightArg = join.RightInput();
382+
383+
auto leftStats = typeCtx->GetStats(leftArg.Raw());
384+
auto rightStats = typeCtx->GetStats(rightArg.Raw());
385+
386+
if (!leftStats || !rightStats) {
387+
return;
388+
}
389+
390+
auto joinAlgo = FromString<EJoinAlgoType>(join.JoinAlgo().StringValue());
391+
if (joinAlgo == EJoinAlgoType::Undefined) {
392+
return;
393+
}
394+
395+
auto leftLabels = InferLabels(leftStats, join.LeftJoinKeyNames());
396+
auto rightLabels = InferLabels(rightStats, join.RightJoinKeyNames());
397+
398+
leftStats = ApplyCardinalityHints(leftStats, leftLabels, hints);
399+
rightStats = ApplyCardinalityHints(rightStats, rightLabels, hints);
400+
401+
TVector<TJoinColumn> leftJoinKeys;
402+
TVector<TJoinColumn> rightJoinKeys;
403+
404+
for (size_t i=0; i<join.LeftJoinKeyNames().Size(); i++) {
405+
auto alias = ExtractAlias(join.LeftJoinKeyNames().Item(i).StringValue());
406+
auto attrName = RemoveAliases(join.LeftJoinKeyNames().Item(i).StringValue());
407+
leftJoinKeys.push_back(TJoinColumn(alias, attrName));
408+
}
409+
for (size_t i=0; i<join.RightJoinKeyNames().Size(); i++) {
410+
auto alias = ExtractAlias(join.RightJoinKeyNames().Item(i).StringValue());
411+
auto attrName = RemoveAliases(join.RightJoinKeyNames().Item(i).StringValue());
412+
rightJoinKeys.push_back(TJoinColumn(alias, attrName));
413+
}
414+
415+
auto unionOfLabels = UnionLabels(leftLabels, rightLabels);
416+
417+
auto resStats = std::make_shared<TOptimizerStatistics>(
418+
ctx.ComputeJoinStats(
419+
*leftStats,
420+
*rightStats,
421+
leftJoinKeys,
422+
rightJoinKeys,
423+
joinAlgo,
424+
ConvertToJoinKind(join.JoinType().StringValue()),
425+
FindCardHint(unionOfLabels, hints)
426+
)
427+
);
428+
429+
resStats->Labels = std::make_shared<TVector<TString>>();
430+
resStats->Labels->insert(resStats->Labels->begin(), unionOfLabels.begin(), unionOfLabels.end());
431+
typeCtx->SetStats(join.Raw(), resStats);
432+
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for DqJoin: " << resStats->ToString();
433+
}
434+
372435
/**
373436
* Infer statistics for DqSource
374437
*

ydb/library/yql/dq/opt/dq_opt_stat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ void InferStatisticsForStage(const TExprNode::TPtr& input, TTypeAnnotationContex
1919
void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
2020
void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {});
2121
void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {});
22+
void InferStatisticsForDqJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {});
2223
void InferStatisticsForAsList(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
2324
bool InferStatisticsForListParam(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
2425

ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ bool TDqStatisticsTransformerBase::BeforeLambdas(const TExprNode::TPtr& input, T
6565
else if(TCoGraceJoinCore::Match(input.Get())) {
6666
InferStatisticsForGraceJoin(input, TypeCtx, Pctx, CardinalityHints);
6767
}
68+
else if (TDqJoin::Match(input.Get())) {
69+
InferStatisticsForDqJoin(input, TypeCtx, Pctx, CardinalityHints);
70+
}
6871

6972
// Do nothing in case of EquiJoin, otherwise the EquiJoin rule won't fire
7073
else if(TCoEquiJoin::Match(input.Get())){

0 commit comments

Comments
 (0)