Skip to content

Commit 89b94f9

Browse files
Fix Olap Filter selectivity computation (#9021)
Co-authored-by: Pavel Ivanov <pudge1000-7@ydb.tech>
1 parent 326f57a commit 89b94f9

File tree

13 files changed

+220
-44
lines changed

13 files changed

+220
-44
lines changed

ydb/core/kqp/opt/kqp_query_plan.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,11 @@ class TxPlanSerializer {
11391139
{"gt", ">"},
11401140
{"gte", ">="}
11411141
};
1142+
THashSet<TString> strRegexp = {
1143+
"string_contains",
1144+
"starts_with",
1145+
"ends_with"
1146+
};
11421147
TString compSign = TString(listPtr->Child(0)->Content());
11431148
if (strComp.contains(compSign)) {
11441149
TString attr = TString(listPtr->Child(1)->Content());
@@ -1148,6 +1153,8 @@ class TxPlanSerializer {
11481153
}
11491154

11501155
return Sprintf("%s %s %s", attr.c_str(), strComp[compSign].c_str(), value.c_str());
1156+
} else if (strRegexp.contains(compSign)) {
1157+
return compSign;
11511158
}
11521159
}
11531160
}

ydb/core/kqp/opt/kqp_statistics_transformer.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,14 +380,19 @@ class TKqpOlapPredicateSelectivityComputer: public TPredicateSelectivityComputer
380380
.Struct(rowArg)
381381
.Name().Build(attr)
382382
.Done();
383-
383+
384384
auto value = TExprBase(listPtr->ChildPtr(2));
385+
if (listPtr->ChildPtr(2)->ChildrenSize() >= 2 && listPtr->ChildPtr(2)->ChildPtr(0)->Content() == "just") {
386+
value = TExprBase(listPtr->ChildPtr(2)->ChildPtr(1));
387+
}
385388
if (OlapCompSigns.contains(compSign)) {
386389
resSelectivity = this->ComputeComparisonSelectivity(member, value);
387390
} else if (compSign == "eq") {
388391
resSelectivity = this->ComputeEqualitySelectivity(member, value);
389392
} else if (compSign == "neq") {
390393
resSelectivity = 1 - this->ComputeEqualitySelectivity(member, value);
394+
} else if (RegexpSigns.contains(compSign)) {
395+
return 0.5;
391396
}
392397
}
393398
}
@@ -408,6 +413,12 @@ class TKqpOlapPredicateSelectivityComputer: public TPredicateSelectivityComputer
408413
{"gt"},
409414
{"gte"}
410415
};
416+
417+
THashSet<TString> RegexpSigns = {
418+
"string_contains",
419+
"starts_with",
420+
"ends_with"
421+
};
411422
};
412423

413424
void InferStatisticsForOlapFilter(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
@@ -422,13 +433,28 @@ void InferStatisticsForOlapFilter(const TExprNode::TPtr& input, TTypeAnnotationC
422433

423434
double selectivity = TKqpOlapPredicateSelectivityComputer(inputStats).Compute(filter.Condition());
424435

425-
auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->ByteSize * selectivity, inputStats->Cost, inputStats->KeyColumns );
436+
auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->ByteSize * selectivity, inputStats->Cost, inputStats->KeyColumns, inputStats->ColumnStatistics );
426437
outputStats.Labels = inputStats->Labels;
427438
outputStats.Selectivity *= selectivity;
428439

440+
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for OLAP Filter: " << outputStats.ToString();
441+
442+
429443
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(std::move(outputStats)) );
430444
}
431445

446+
void InferStatisticsForOlapRead(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
447+
auto inputNode = TExprBase(input);
448+
auto olapRead = inputNode.Cast<TKqpReadOlapTableRangesBase>();
449+
450+
auto process = olapRead.Process();
451+
auto lambdaStats = typeCtx->GetStats(process.Body().Raw());
452+
if (lambdaStats) {
453+
YQL_CLOG(TRACE, CoreDq) << "Infer statistics for OLAP table: " << lambdaStats->ToString();
454+
typeCtx->SetStats(input.Get(), lambdaStats);
455+
}
456+
}
457+
432458
void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx,
433459
TKqpOptimizeContext& kqpCtx) {
434460
auto inputNode = TExprBase(input);
@@ -543,6 +569,8 @@ bool TKqpStatisticsTransformer::AfterLambdasSpecific(const TExprNode::TPtr& inpu
543569
bool matched = true;
544570
if (TKqpPhysicalTx::Match(input.Get())) {
545571
AppendTxStats(input, TypeCtx, TxStats);
572+
} else if (TKqpReadOlapTableRangesBase::Match(input.Get())) {
573+
InferStatisticsForOlapRead(input, TypeCtx);
546574
} else {
547575
matched = false;
548576
}

ydb/core/kqp/ut/common/kqp_ut_common.cpp

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,26 +1344,30 @@ void WaitForZeroSessions(const NKqp::TKqpCounters& counters) {
13441344
UNIT_ASSERT_C(count, "Unable to wait for proper active session count, it looks like cancelation doesn`t work");
13451345
}
13461346

1347-
NJson::TJsonValue SimplifyPlan(NJson::TJsonValue& opt) {
1347+
NJson::TJsonValue SimplifyPlan(NJson::TJsonValue& opt, const TGetPlanParams& params) {
13481348
if (auto ops = opt.GetMapSafe().find("Operators"); ops != opt.GetMapSafe().end()) {
13491349
auto opName = ops->second.GetArraySafe()[0].GetMapSafe().at("Name").GetStringSafe();
1350-
if (opName.find("Join") != TString::npos || opName.find("Union") != TString::npos ) {
1350+
if (
1351+
opName.find("Join") != TString::npos ||
1352+
opName.find("Union") != TString::npos ||
1353+
(opName.find("Filter") != TString::npos && params.IncludeFilters)
1354+
) {
13511355
NJson::TJsonValue newChildren;
13521356

13531357
for (auto c : opt.GetMapSafe().at("Plans").GetArraySafe()) {
1354-
newChildren.AppendValue(SimplifyPlan(c));
1358+
newChildren.AppendValue(SimplifyPlan(c, params));
13551359
}
13561360

13571361
opt["Plans"] = newChildren;
13581362
return opt;
13591363
}
1360-
else if (opName.find("Table") != TString::npos ) {
1364+
else if (opName.find("Table") != TString::npos) {
13611365
return opt;
13621366
}
13631367
}
13641368

13651369
auto firstPlan = opt.GetMapSafe().at("Plans").GetArraySafe()[0];
1366-
return SimplifyPlan(firstPlan);
1370+
return SimplifyPlan(firstPlan, params);
13671371
}
13681372

13691373
bool JoinOrderAndAlgosMatch(const NJson::TJsonValue& opt, const NJson::TJsonValue& ref) {
@@ -1398,7 +1402,7 @@ bool JoinOrderAndAlgosMatch(const NJson::TJsonValue& opt, const NJson::TJsonValu
13981402
bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference){
13991403
NJson::TJsonValue optRoot;
14001404
NJson::ReadJsonTree(optimized, &optRoot, true);
1401-
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"));
1405+
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"), {});
14021406

14031407
NJson::TJsonValue refRoot;
14041408
NJson::ReadJsonTree(reference, &refRoot, true);
@@ -1407,11 +1411,18 @@ bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference){
14071411
}
14081412

14091413
/* Temporary solution to canonize tests */
1410-
NJson::TJsonValue GetDetailedJoinOrderImpl(const NJson::TJsonValue& opt) {
1414+
NJson::TJsonValue GetDetailedJoinOrderImpl(const NJson::TJsonValue& opt, const TGetPlanParams& params) {
14111415
NJson::TJsonValue res;
14121416

1417+
if (!opt.GetMapSafe().contains("Plans") && !params.IncludeTables) {
1418+
return res;
1419+
}
1420+
14131421
auto op = opt.GetMapSafe().at("Operators").GetArraySafe()[0];
14141422
res["op_name"] = op.GetMapSafe().at("Name").GetStringSafe();
1423+
if (params.IncludeOptimizerEstimation && op.GetMapSafe().contains("E-Rows")) {
1424+
res["e-size"] = op.GetMapSafe().at("E-Rows").GetStringSafe();
1425+
}
14151426

14161427

14171428
if (!opt.GetMapSafe().contains("Plans")) {
@@ -1420,17 +1431,17 @@ NJson::TJsonValue GetDetailedJoinOrderImpl(const NJson::TJsonValue& opt) {
14201431
}
14211432

14221433
auto subplans = opt.GetMapSafe().at("Plans").GetArraySafe();
1423-
for (size_t i = 0; i< subplans.size(); ++i) {
1424-
res["args"].AppendValue(GetDetailedJoinOrderImpl(subplans[i]));
1434+
for (size_t i = 0; i < subplans.size(); ++i) {
1435+
res["args"].AppendValue(GetDetailedJoinOrderImpl(subplans[i], params));
14251436
}
14261437
return res;
14271438
}
14281439

1429-
NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan) {
1440+
NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan, const TGetPlanParams& params) {
14301441
NJson::TJsonValue optRoot;
14311442
NJson::ReadJsonTree(deserializedPlan, &optRoot, true);
1432-
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"));
1433-
return GetDetailedJoinOrderImpl(SimplifyPlan(optRoot));
1443+
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"), params);
1444+
return GetDetailedJoinOrderImpl(optRoot, params);
14341445
}
14351446

14361447
NJson::TJsonValue GetJoinOrderImpl(const NJson::TJsonValue& opt) {
@@ -1452,8 +1463,8 @@ NJson::TJsonValue GetJoinOrderImpl(const NJson::TJsonValue& opt) {
14521463
NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan) {
14531464
NJson::TJsonValue optRoot;
14541465
NJson::ReadJsonTree(deserializedPlan, &optRoot, true);
1455-
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"));
1456-
return GetJoinOrderImpl(SimplifyPlan(optRoot));
1466+
optRoot = SimplifyPlan(optRoot.GetMapSafe().at("SimplifiedPlan"), {});
1467+
return GetJoinOrderImpl(optRoot);
14571468
}
14581469

14591470

ydb/core/kqp/ut/common/kqp_ut_common.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,14 @@ void WaitForZeroSessions(const NKqp::TKqpCounters& counters);
365365

366366
bool JoinOrderAndAlgosMatch(const TString& optimized, const TString& reference);
367367

368+
struct TGetPlanParams {
369+
bool IncludeFilters = false;
370+
bool IncludeOptimizerEstimation = false;
371+
bool IncludeTables = true;
372+
};
373+
368374
/* Gets join order with details as: join algo, join type and scan type. */
369-
NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan);
375+
NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan, const TGetPlanParams& params = {});
370376

371377
/* Gets tables join order without details : only tables. */
372378
NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan);

0 commit comments

Comments
 (0)