Skip to content

Commit 8053e6d

Browse files
Added Query Hints for the Optimizer (#7629)
1 parent b7a4f95 commit 8053e6d

File tree

124 files changed

+2599
-1699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+2599
-1699
lines changed

ydb/core/kqp/opt/kqp_opt.h

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <ydb/core/kqp/provider/yql_kikimr_expr_nodes.h>
55
#include <ydb/core/kqp/provider/yql_kikimr_provider.h>
66
#include <ydb/core/kqp/provider/yql_kikimr_settings.h>
7+
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>
78
#include <ydb/library/yql/utils/log/log.h>
89

910
namespace NKikimr::NKqp::NOpt {
@@ -30,17 +31,45 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
3031
int JoinsCount{};
3132
int EquiJoinsCount{};
3233
std::shared_ptr<NJson::TJsonValue> OverrideStatistics{};
34+
std::shared_ptr<NYql::TCardinalityHints> CardinalityHints{};
35+
std::shared_ptr<NYql::TJoinAlgoHints> JoinAlgoHints{};
36+
37+
std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() {
38+
if (Config->OptOverrideStatistics.Get()) {
39+
if (!OverrideStatistics) {
40+
auto jsonValue = new NJson::TJsonValue();
41+
NJson::ReadJsonTree(*Config->OptOverrideStatistics.Get(), jsonValue, true);
42+
OverrideStatistics = std::shared_ptr<NJson::TJsonValue>(jsonValue);
43+
}
44+
return OverrideStatistics;
3345

34-
std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() const {
35-
if (Config->OverrideStatistics.Get()) {
36-
auto jsonValue = new NJson::TJsonValue();
37-
NJson::ReadJsonTree(*Config->OverrideStatistics.Get(), jsonValue, true);
38-
return std::shared_ptr<NJson::TJsonValue>(jsonValue);
3946
} else {
4047
return std::shared_ptr<NJson::TJsonValue>();
4148
}
4249
}
4350

51+
NYql::TCardinalityHints GetCardinalityHints() {
52+
if (Config->OptCardinalityHints.Get()) {
53+
if (!CardinalityHints) {
54+
CardinalityHints = std::make_shared<NYql::TCardinalityHints>(*Config->OptCardinalityHints.Get());
55+
}
56+
return *CardinalityHints;
57+
} else {
58+
return NYql::TCardinalityHints();
59+
}
60+
}
61+
62+
NYql::TJoinAlgoHints GetJoinAlgoHints() {
63+
if (Config->OptJoinAlgoHints.Get()) {
64+
if (!JoinAlgoHints) {
65+
JoinAlgoHints = std::make_shared<NYql::TJoinAlgoHints>(*Config->OptJoinAlgoHints.Get());
66+
}
67+
return *JoinAlgoHints;
68+
} else {
69+
return NYql::TJoinAlgoHints();
70+
}
71+
}
72+
4473
bool IsDataQuery() const {
4574
return QueryCtx->Type == NYql::EKikimrQueryType::Dml;
4675
}

ydb/core/kqp/opt/kqp_query_plan.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1993,6 +1993,58 @@ NJson::TJsonValue ReconstructQueryPlanRec(const NJson::TJsonValue& plan,
19931993
result["Stats"] = plan.GetMapSafe().at("Stats");
19941994
}
19951995

1996+
if (plan.GetMapSafe().at("Node Type") == "TableLookupJoin" && plan.GetMapSafe().contains("Table")) {
1997+
result["Node Type"] = "LookupJoin";
1998+
NJson::TJsonValue newOps;
1999+
NJson::TJsonValue op;
2000+
2001+
op["Name"] = "LookupJoin";
2002+
op["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns");
2003+
2004+
newOps.AppendValue(op);
2005+
result["Operators"] = newOps;
2006+
2007+
NJson::TJsonValue newPlans;
2008+
2009+
NJson::TJsonValue lookupPlan;
2010+
lookupPlan["Node Type"] = "TableLookup";
2011+
lookupPlan["PlanNodeType"] = "TableLookup";
2012+
2013+
NJson::TJsonValue lookupOps;
2014+
NJson::TJsonValue lookupOp;
2015+
2016+
lookupOp["Name"] = "TableLookup";
2017+
lookupOp["Columns"] = plan.GetMapSafe().at("Columns");
2018+
lookupOp["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns");
2019+
lookupOp["Table"] = plan.GetMapSafe().at("Table");
2020+
2021+
if (plan.GetMapSafe().contains("E-Cost")) {
2022+
lookupOp["E-Cost"] = plan.GetMapSafe().at("E-Cost");
2023+
}
2024+
if (plan.GetMapSafe().contains("E-Rows")) {
2025+
lookupOp["E-Rows"] = plan.GetMapSafe().at("E-Rows");
2026+
}
2027+
if (plan.GetMapSafe().contains("E-Size")) {
2028+
lookupOp["E-Size"] = plan.GetMapSafe().at("E-Size");
2029+
}
2030+
2031+
lookupOps.AppendValue(lookupOp);
2032+
lookupPlan["Operators"] = lookupOps;
2033+
2034+
newPlans.AppendValue(ReconstructQueryPlanRec(
2035+
plan.GetMapSafe().at("Plans").GetArraySafe()[0],
2036+
0,
2037+
planIndex,
2038+
precomputes,
2039+
nodeCounter));
2040+
2041+
newPlans.AppendValue(lookupPlan);
2042+
2043+
result["Plans"] = newPlans;
2044+
2045+
return result;
2046+
}
2047+
19962048
if (!plan.GetMapSafe().contains("Operators")) {
19972049
NJson::TJsonValue planInputs;
19982050

ydb/core/kqp/opt/kqp_statistics_transformer.cpp

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo
8181
keyColumns,
8282
inputStats->ColumnStatistics);
8383

84-
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table, nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols << ", byteSize: " << stats->ByteSize;
84+
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table" << stats->ToString();
8585

8686
typeCtx->SetStats(input.Get(), stats);
8787
}
@@ -90,14 +90,14 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo
9090
* Infer statistics for KQP table
9191
*/
9292
void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx,
93-
const TKqpOptimizeContext& kqpCtx) {
93+
TKqpOptimizeContext& kqpCtx) {
9494

9595
auto inputNode = TExprBase(input);
9696
auto readTable = inputNode.Cast<TKqpTable>();
9797
auto path = readTable.Path();
9898

9999
const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path.Value());
100-
if (!tableData.Metadata->StatsLoaded && !kqpCtx.Config->OverrideStatistics.Get()) {
100+
if (!tableData.Metadata->StatsLoaded && !kqpCtx.Config->OptOverrideStatistics.Get()) {
101101
return;
102102
}
103103

@@ -107,7 +107,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
107107

108108
auto keyColumns = TIntrusivePtr<TOptimizerStatistics::TKeyColumns>(new TOptimizerStatistics::TKeyColumns(tableData.Metadata->KeyColumnNames));
109109
auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, keyColumns);
110-
if (kqpCtx.Config->OverrideStatistics.Get()) {
110+
if (kqpCtx.Config->OptOverrideStatistics.Get()) {
111111
stats = OverrideStatistics(*stats, path.Value(), kqpCtx.GetOverrideStatistics());
112112
}
113113
if (stats->ColumnStatistics) {
@@ -116,7 +116,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
116116
}
117117
}
118118

119-
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols << ", byteSize: " << stats->ByteSize << ", nKeyColumns: " << stats->KeyColumns->Data.size();
119+
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ": " << stats->ToString();
120120

121121
typeCtx->SetStats(input.Get(), stats);
122122
}
@@ -141,14 +141,17 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation
141141
}
142142
auto byteSize = inputStats->ByteSize * (nAttrs / (double) inputStats->Ncols);
143143

144-
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(
144+
auto res = std::make_shared<TOptimizerStatistics>(
145145
EStatisticsType::BaseTable,
146146
inputStats->Nrows,
147147
nAttrs,
148148
byteSize,
149149
0,
150150
inputStats->KeyColumns,
151-
inputStats->ColumnStatistics));
151+
inputStats->ColumnStatistics);
152+
153+
typeCtx->SetStats(input.Get(), res);
154+
152155
}
153156

154157
/**
@@ -280,8 +283,7 @@ void InferStatisticsForReadTableIndexRanges(const TExprNode::TPtr& input, TTypeA
280283

281284
typeCtx->SetStats(input.Get(), stats);
282285

283-
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for index: nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols << ", nKeyColumns: " << stats->KeyColumns->Data.size();
284-
286+
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for index: " << stats->ToString();
285287
}
286288

287289
/***
@@ -306,21 +308,22 @@ void InferStatisticsForResultBinding(const TExprNode::TPtr& input, TTypeAnnotati
306308
std::from_chars(bindingNoStr.data(), bindingNoStr.data() + bindingNoStr.size(), bindingNo);
307309
std::from_chars(resultNoStr.data(), resultNoStr.data() + resultNoStr.size(), resultNo);
308310

309-
typeCtx->SetStats(param.Name().Raw(), txStats[bindingNo][resultNo]);
310-
typeCtx->SetStats(inputNode.Raw(), txStats[bindingNo][resultNo]);
311+
auto resStats = txStats[bindingNo][resultNo];
312+
typeCtx->SetStats(param.Name().Raw(), resStats);
313+
typeCtx->SetStats(inputNode.Raw(), resStats);
311314
}
312315
}
313316
}
314317

315318
void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx,
316-
const TKqpOptimizeContext& kqpCtx) {
319+
TKqpOptimizeContext& kqpCtx) {
317320
auto inputNode = TExprBase(input);
318321
if (auto wrapBase = inputNode.Maybe<TDqSourceWrapBase>()) {
319322
if (auto maybeS3DataSource = wrapBase.Cast().DataSource().Maybe<TS3DataSource>()) {
320323
auto s3DataSource = maybeS3DataSource.Cast();
321324
if (s3DataSource.Name()) {
322325
auto path = s3DataSource.Name().Cast().StringValue();
323-
if (kqpCtx.Config->OverrideStatistics.Get() && path) {
326+
if (kqpCtx.Config->OptOverrideStatistics.Get() && path) {
324327
auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, 0.0, 0, 0, 0.0, TIntrusivePtr<TOptimizerStatistics::TKeyColumns>());
325328
stats = OverrideStatistics(*stats, path, kqpCtx.GetOverrideStatistics());
326329
if (stats->ByteSize == 0.0) {

ydb/core/kqp/opt/kqp_statistics_transformer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ using namespace NOpt;
2929
class TKqpStatisticsTransformer : public NYql::NDq::TDqStatisticsTransformerBase {
3030

3131
const TKikimrConfiguration::TPtr& Config;
32-
const TKqpOptimizeContext& KqpCtx;
32+
TKqpOptimizeContext& KqpCtx;
3333
TVector<TVector<std::shared_ptr<TOptimizerStatistics>>> TxStats;
3434

3535
public:
3636
TKqpStatisticsTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, TTypeAnnotationContext& typeCtx,
3737
const TKikimrConfiguration::TPtr& config, const TKqpProviderContext& pctx) :
38-
TDqStatisticsTransformerBase(&typeCtx, pctx),
38+
TDqStatisticsTransformerBase(&typeCtx, pctx, kqpCtx->GetCardinalityHints()),
3939
Config(config),
4040
KqpCtx(*kqpCtx) {}
4141

ydb/core/kqp/opt/logical/kqp_opt_log.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
153153
*opt, [](auto& rels, auto label, auto node, auto stat) {
154154
rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), stat, node));
155155
},
156-
KqpCtx.EquiJoinsCount);
156+
KqpCtx.EquiJoinsCount,
157+
KqpCtx.GetCardinalityHints(),
158+
KqpCtx.GetJoinAlgoHints());
157159
DumpAppliedRule("OptimizeEquiJoinWithCosts", node.Ptr(), output.Ptr(), ctx);
158160
return output;
159161
}

ydb/core/kqp/provider/yql_kikimr_settings.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ TKikimrConfiguration::TKikimrConfiguration() {
8181
REGISTER_SETTING(*this, OptEnablePredicateExtract);
8282
REGISTER_SETTING(*this, OptEnableOlapPushdown);
8383
REGISTER_SETTING(*this, OptEnableOlapProvideComputeSharding);
84-
REGISTER_SETTING(*this, OverrideStatistics);
84+
REGISTER_SETTING(*this, OptOverrideStatistics);
85+
REGISTER_SETTING(*this, OptCardinalityHints);
86+
REGISTER_SETTING(*this, OptJoinAlgoHints);
8587
REGISTER_SETTING(*this, OverridePlanner);
8688
REGISTER_SETTING(*this, UseGraceJoinCoreForMap);
8789

ydb/core/kqp/provider/yql_kikimr_settings.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,14 @@ struct TKikimrSettings {
5050
NCommon::TConfSetting<bool, false> UseLlvm;
5151
NCommon::TConfSetting<bool, false> EnableLlvm;
5252
NCommon::TConfSetting<NDq::EHashJoinMode, false> HashJoinMode;
53-
NCommon::TConfSetting<TString, false> OverrideStatistics;
5453
NCommon::TConfSetting<ui64, false> EnableSpillingNodes;
5554
NCommon::TConfSetting<TString, false> OverridePlanner;
5655
NCommon::TConfSetting<bool, false> UseGraceJoinCoreForMap;
5756

57+
NCommon::TConfSetting<TString, false> OptOverrideStatistics;
58+
NCommon::TConfSetting<TString, false> OptCardinalityHints;
59+
NCommon::TConfSetting<TString, false> OptJoinAlgoHints;
60+
5861
/* Disable optimizer rules */
5962
NCommon::TConfSetting<bool, false> OptDisableTopSort;
6063
NCommon::TConfSetting<bool, false> OptDisableSqlInToJoin;

0 commit comments

Comments
 (0)