Skip to content

Commit d2b896d

Browse files
authored
Don't lose 'any' flag after CBO. (#8674)
1 parent f3bb311 commit d2b896d

16 files changed

+289
-61
lines changed

ydb/core/kqp/ut/join/data/queries/any_join.sql

Whitespace-only changes.

ydb/core/kqp/ut/join/kqp_join_ut.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1892,7 +1892,7 @@ Y_UNIT_TEST_SUITE(KqpJoin) {
18921892
UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString());
18931893
Cout << FormatResultSetYson(result.GetResultSet(0));
18941894
CompareYson(R"(
1895-
[[["02"];#;["02"];["03"];#;["03"];["1"];#;["1"]];[["02"];#;["02"];["03"];#;["04"];["1"];#;["1"]];[["02"];#;["02"];["05"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["05"];#;["06"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["06"];["2"];#;["2"]];[["03"];["03"];["03"];["08"];["02"];["07"];["1"];["1"];["1"]];[["03"];["03"];["03"];["09"];["03"];["08"];["2"];["2"];["2"]];[["09"];#;["09"];["20"];#;["09"];["1"];#;["1"]];[["09"];#;["09"];["21"];#;["10"];["2"];#;["2"]]]
1895+
[[["02"];#;["02"];["03"];#;["03"];["1"];#;["1"]];[["02"];#;["02"];["05"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["05"];["2"];#;["2"]];[["03"];["03"];["03"];["08"];["02"];["07"];["1"];["1"];["1"]];[["03"];["03"];["03"];["09"];["03"];["08"];["2"];["2"];["2"]];[["09"];#;["09"];["20"];#;["09"];["1"];#;["1"]];[["09"];#;["09"];["21"];#;["10"];["2"];#;["2"]]]
18961896
)", FormatResultSetYson(result.GetResultSet(0)));
18971897
}
18981898
}

ydb/library/yql/core/cbo/cbo_optimizer_new.cpp

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,20 +74,30 @@ void TRelOptimizerNode::Print(std::stringstream& stream, int ntabs) {
7474
stream << *Stats << "\n";
7575
}
7676

77-
TJoinOptimizerNode::TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left, const std::shared_ptr<IBaseOptimizerNode>& right,
78-
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool nonReorderable) :
79-
IBaseOptimizerNode(JoinNodeType),
80-
LeftArg(left),
81-
RightArg(right),
82-
JoinConditions(joinConditions),
83-
JoinType(joinType),
84-
JoinAlgo(joinAlgo) {
85-
IsReorderable = !nonReorderable;
86-
for (auto [l,r] : joinConditions ) {
87-
LeftJoinKeys.push_back(l.AttributeName);
88-
RightJoinKeys.push_back(r.AttributeName);
89-
}
77+
TJoinOptimizerNode::TJoinOptimizerNode(
78+
const std::shared_ptr<IBaseOptimizerNode>& left,
79+
const std::shared_ptr<IBaseOptimizerNode>& right,
80+
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
81+
const EJoinKind joinType,
82+
const EJoinAlgoType joinAlgo,
83+
bool leftAny,
84+
bool rightAny,
85+
bool nonReorderable
86+
) : IBaseOptimizerNode(JoinNodeType)
87+
, LeftArg(left)
88+
, RightArg(right)
89+
, JoinConditions(joinConditions)
90+
, JoinType(joinType)
91+
, JoinAlgo(joinAlgo)
92+
, LeftAny(leftAny)
93+
, RightAny(rightAny)
94+
, IsReorderable(!nonReorderable)
95+
{
96+
for (const auto& [l,r] : joinConditions ) {
97+
LeftJoinKeys.push_back(l.AttributeName);
98+
RightJoinKeys.push_back(r.AttributeName);
9099
}
100+
}
91101

92102
TVector<TString> TJoinOptimizerNode::Labels() {
93103
auto res = LeftArg->Labels();
@@ -101,7 +111,14 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) {
101111
stream << " ";
102112
}
103113

104-
stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo) << ") ";
114+
stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo);
115+
if (LeftAny) {
116+
stream << ",LeftAny";
117+
}
118+
if (RightAny) {
119+
stream << ",RightAny";
120+
}
121+
stream << ") ";
105122

106123
for (auto c : JoinConditions){
107124
stream << c.first.RelName << "." << c.first.AttributeName

ydb/library/yql/core/cbo/cbo_optimizer_new.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,21 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode {
295295
TVector<TString> RightJoinKeys;
296296
EJoinKind JoinType;
297297
EJoinAlgoType JoinAlgo;
298+
/////////////////// 'ANY' flag means leaving only one row from the join side.
299+
bool LeftAny;
300+
bool RightAny;
301+
///////////////////
298302
bool IsReorderable;
299303

300304
TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
301305
const std::shared_ptr<IBaseOptimizerNode>& right,
302306
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
303307
const EJoinKind joinType,
304308
const EJoinAlgoType joinAlgo,
305-
bool nonReorderable=false);
309+
bool leftAny,
310+
bool rightAny,
311+
bool nonReorderable = false
312+
);
306313
virtual ~TJoinOptimizerNode() {}
307314
virtual TVector<TString> Labels();
308315
virtual void Print(std::stringstream& stream, int ntabs=0);

ydb/library/yql/dq/opt/dq_cbo_ut.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,16 @@ Y_UNIT_TEST(JoinSearch2Rels) {
5555
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
5656
joinConditions,
5757
InnerJoin,
58-
EJoinAlgoType::GraceJoin
58+
EJoinAlgoType::GraceJoin,
59+
true,
60+
false
5961
);
6062

6163
auto res = optimizer->JoinSearch(op);
6264
std::stringstream ss;
6365
res->Print(ss);
64-
TString expected = R"__(Join: (InnerJoin,MapJoin) b.1=a.1,
66+
Cout << ss.str() << '\n';
67+
TString expected = R"__(Join: (InnerJoin,MapJoin,RightAny) b.1=a.1,
6568
Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA
6669
Rel: b
6770
Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA
@@ -93,8 +96,10 @@ Y_UNIT_TEST(JoinSearch3Rels) {
9396
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
9497
joinConditions,
9598
InnerJoin,
96-
EJoinAlgoType::GraceJoin
97-
);
99+
EJoinAlgoType::GraceJoin,
100+
false,
101+
false
102+
);
98103

99104
joinConditions.insert({
100105
NDq::TJoinColumn("a", "1"),
@@ -106,14 +111,17 @@ Y_UNIT_TEST(JoinSearch3Rels) {
106111
std::static_pointer_cast<IBaseOptimizerNode>(rel3),
107112
joinConditions,
108113
InnerJoin,
109-
EJoinAlgoType::GraceJoin
110-
);
114+
EJoinAlgoType::GraceJoin,
115+
true,
116+
false
117+
);
111118

112119
auto res = optimizer->JoinSearch(op2);
113120
std::stringstream ss;
114121
res->Print(ss);
122+
Cout << ss.str() << '\n';
115123

116-
TString expected = R"__(Join: (InnerJoin,MapJoin) a.1=b.1,a.1=c.1,
124+
TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1,a.1=c.1,
117125
Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Sel: 1, Storage: NA
118126
Join: (InnerJoin,MapJoin) b.1=a.1,
119127
Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA
@@ -223,7 +231,7 @@ void _DqOptimizeEquiJoinWithCosts(const std::function<IOptimizerNew*()>& optFact
223231
UNIT_ASSERT(equiJoin.Maybe<TCoEquiJoin>());
224232
auto resStr = NCommon::ExprToPrettyString(ctx, *res.Ptr());
225233
auto expected = R"__((
226-
(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('"join_algo" '"MapJoin"))))
234+
(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('join_algo 'MapJoin))))
227235
(return (EquiJoin '('() '"orders") '('() '"customer") $1 '()))
228236
)
229237
)__";

ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ class TConflictRulesCollector {
5757
private:
5858
auto GetLeftConflictsVisitor() {
5959
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) {
60-
if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
60+
if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType)) {
6161
ConflictRules_.emplace_back(
6262
SubtreeNodes_[child->RightArg],
6363
SubtreeNodes_[child->LeftArg]
6464
);
6565
}
6666

67-
if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
67+
if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType)) {
6868
ConflictRules_.emplace_back(
6969
SubtreeNodes_[child->LeftArg],
7070
SubtreeNodes_[child->RightArg]
@@ -77,18 +77,18 @@ class TConflictRulesCollector {
7777

7878
auto GetRightConflictsVisitor() {
7979
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) {
80-
if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
80+
if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType)) {
8181
ConflictRules_.emplace_back(
8282
SubtreeNodes_[child->LeftArg],
8383
SubtreeNodes_[child->RightArg]
8484
);
8585
}
8686

87-
if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
87+
if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType)) {
8888
ConflictRules_.emplace_back(
8989
SubtreeNodes_[child->RightArg],
9090
SubtreeNodes_[child->LeftArg]
91-
);
91+
);
9292
}
9393
};
9494

@@ -106,6 +106,20 @@ class TConflictRulesCollector {
106106
VisitJoinTree(childJoinNode->LeftArg, visitor);
107107
VisitJoinTree(childJoinNode->RightArg, visitor);
108108

109+
if (childJoinNode->LeftAny || !childJoinNode->IsReorderable) {
110+
ConflictRules_.emplace_back(
111+
SubtreeNodes_[childJoinNode->LeftArg],
112+
SubtreeNodes_[childJoinNode->RightArg]
113+
);
114+
}
115+
116+
if (childJoinNode->RightAny || !childJoinNode->IsReorderable) {
117+
ConflictRules_.emplace_back(
118+
SubtreeNodes_[childJoinNode->RightArg],
119+
SubtreeNodes_[childJoinNode->LeftArg]
120+
);
121+
}
122+
109123
visitor(childJoinNode);
110124
}
111125

ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ class TDPHypSolver {
8181
const std::shared_ptr<IBaseOptimizerNode>& left,
8282
const std::shared_ptr<IBaseOptimizerNode>& right,
8383
EJoinKind joinKind,
84+
bool leftAny,
85+
bool rightAny,
8486
bool isCommutative,
8587
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
8688
const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
@@ -409,6 +411,8 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
409411
const std::shared_ptr<IBaseOptimizerNode>& left,
410412
const std::shared_ptr<IBaseOptimizerNode>& right,
411413
EJoinKind joinKind,
414+
bool leftAny,
415+
bool rightAny,
412416
bool isCommutative,
413417
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
414418
const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
@@ -419,7 +423,7 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
419423
TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint
420424
) {
421425
double bestCost = std::numeric_limits<double>::infinity();
422-
EJoinAlgoType bestAlgo{};
426+
EJoinAlgoType bestAlgo = EJoinAlgoType::Undefined;
423427
bool bestJoinIsReversed = false;
424428

425429
for (auto joinAlgo : AllJoinAlgos) {
@@ -452,13 +456,13 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
452456
}
453457
}
454458

455-
Y_ENSURE(bestCost != std::numeric_limits<double>::infinity(), "No join was chosen!");
459+
Y_ENSURE(bestAlgo != EJoinAlgoType::Undefined, "No join was chosen!");
456460

457461
if (bestJoinIsReversed) {
458-
return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint);
462+
return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeCardHint);
459463
}
460464

461-
return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint);
465+
return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeCardHint);
462466
}
463467

464468
/*
@@ -489,6 +493,8 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS
489493
leftNodes,
490494
rightNodes,
491495
csgCmpEdge->JoinKind,
496+
csgCmpEdge->LeftAny,
497+
csgCmpEdge->RightAny,
492498
csgCmpEdge->IsCommutative,
493499
csgCmpEdge->JoinConditions,
494500
reversedEdge->JoinConditions,

0 commit comments

Comments
 (0)