Skip to content

Commit a7369f7

Browse files
authored
Delay making EquiJoin inputs unordered until CROSS JOIN is rewritten to inner (#10229)
1 parent 1120c1a commit a7369f7

File tree

23 files changed

+257
-199
lines changed

23 files changed

+257
-199
lines changed

ydb/library/yql/core/common_opt/yql_co_last.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,60 @@
11
#include "yql_co.h"
2+
#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
23
#include <ydb/library/yql/core/yql_opt_utils.h>
4+
#include <ydb/library/yql/utils/log/log.h>
35

46
namespace NYql {
57

8+
namespace {
9+
10+
using namespace NNodes;
11+
12+
std::unordered_set<ui32> GetUselessSortedJoinInputs(const TCoEquiJoin& equiJoin) {
13+
std::unordered_map<std::string_view, std::tuple<ui32, const TSortedConstraintNode*, const TChoppedConstraintNode*>> sorteds(equiJoin.ArgCount() - 2U);
14+
for (ui32 i = 0U; i + 2U < equiJoin.ArgCount(); ++i) {
15+
if (const auto joinInput = equiJoin.Arg(i).Cast<TCoEquiJoinInput>(); joinInput.Scope().Ref().IsAtom()) {
16+
const auto sorted = joinInput.List().Ref().GetConstraint<TSortedConstraintNode>();
17+
const auto chopped = joinInput.List().Ref().GetConstraint<TChoppedConstraintNode>();
18+
if (sorted || chopped)
19+
sorteds.emplace(joinInput.Scope().Ref().Content(), std::make_tuple(i, sorted, chopped));
20+
}
21+
}
22+
23+
for (std::vector<const TExprNode*> joinTreeNodes(1U, equiJoin.Arg(equiJoin.ArgCount() - 2).Raw()); !joinTreeNodes.empty();) {
24+
const auto joinTree = joinTreeNodes.back();
25+
joinTreeNodes.pop_back();
26+
27+
if (!joinTree->Child(1)->IsAtom())
28+
joinTreeNodes.emplace_back(joinTree->Child(1));
29+
30+
if (!joinTree->Child(2)->IsAtom())
31+
joinTreeNodes.emplace_back(joinTree->Child(2));
32+
33+
if (!joinTree->Head().IsAtom("Cross")) {
34+
std::unordered_map<std::string_view, TPartOfConstraintBase::TSetType> tableJoinKeys;
35+
for (const auto keys : {joinTree->Child(3), joinTree->Child(4)})
36+
for (ui32 i = 0U; i < keys->ChildrenSize(); i += 2)
37+
tableJoinKeys[keys->Child(i)->Content()].insert_unique(TPartOfConstraintBase::TPathType(1U, keys->Child(i + 1)->Content()));
38+
39+
for (const auto& [label, joinKeys]: tableJoinKeys) {
40+
if (const auto it = sorteds.find(label); sorteds.cend() != it) {
41+
const auto sorted = std::get<const TSortedConstraintNode*>(it->second);
42+
const auto chopped = std::get<const TChoppedConstraintNode*>(it->second);
43+
if (sorted && sorted->StartsWith(joinKeys) || chopped && chopped->Equals(joinKeys))
44+
sorteds.erase(it);
45+
}
46+
}
47+
}
48+
}
49+
50+
std::unordered_set<ui32> result(sorteds.size());
51+
for (const auto& sort : sorteds)
52+
result.emplace(std::get<ui32>(sort.second));
53+
return result;
54+
}
55+
56+
} // namespace
57+
658
void RegisterCoFinalCallables(TCallableOptimizerMap& map) {
759
map["UnorderedSubquery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
860
Y_UNUSED(optCtx);
@@ -11,8 +63,26 @@ void RegisterCoFinalCallables(TCallableOptimizerMap& map) {
1163
return TExprNode::TPtr();
1264
}
1365
}
66+
YQL_CLOG(DEBUG, Core) << "Replace " << node->Content() << " with Unordered";
1467
return ctx.RenameNode(*node, "Unordered");
1568
};
69+
70+
map["EquiJoin"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
71+
if (const auto indexes = GetUselessSortedJoinInputs(TCoEquiJoin(node)); !indexes.empty()) {
72+
YQL_CLOG(DEBUG, Core) << "Suppress order on " << indexes.size() << ' ' << node->Content() << " inputs";
73+
auto children = node->ChildrenList();
74+
for (const auto idx : indexes)
75+
children[idx] = ctx.Builder(children[idx]->Pos())
76+
.List()
77+
.Callable(0, "Unordered")
78+
.Add(0, children[idx]->HeadPtr())
79+
.Seal()
80+
.Add(1, children[idx]->TailPtr())
81+
.Seal().Build();
82+
return ctx.ChangeChildren(*node, std::move(children));
83+
}
84+
return node;
85+
};
1686
}
1787

1888
}

ydb/library/yql/core/common_opt/yql_co_simple1.cpp

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -3415,50 +3415,6 @@ TExprNode::TPtr PullAssumeColumnOrderOverEquiJoin(const TExprNode::TPtr& node, T
34153415
return node;
34163416
}
34173417

3418-
std::unordered_set<ui32> GetUselessSortedJoinInputs(const TCoEquiJoin& equiJoin) {
3419-
std::unordered_map<std::string_view, std::tuple<ui32, const TSortedConstraintNode*, const TChoppedConstraintNode*>> sorteds(equiJoin.ArgCount() - 2U);
3420-
for (ui32 i = 0U; i + 2U < equiJoin.ArgCount(); ++i) {
3421-
if (const auto joinInput = equiJoin.Arg(i).Cast<TCoEquiJoinInput>(); joinInput.Scope().Ref().IsAtom()) {
3422-
const auto sorted = joinInput.List().Ref().GetConstraint<TSortedConstraintNode>();
3423-
const auto chopped = joinInput.List().Ref().GetConstraint<TChoppedConstraintNode>();
3424-
if (sorted || chopped)
3425-
sorteds.emplace(joinInput.Scope().Ref().Content(), std::make_tuple(i, sorted, chopped));
3426-
}
3427-
}
3428-
3429-
for (std::vector<const TExprNode*> joinTreeNodes(1U, equiJoin.Arg(equiJoin.ArgCount() - 2).Raw()); !joinTreeNodes.empty();) {
3430-
const auto joinTree = joinTreeNodes.back();
3431-
joinTreeNodes.pop_back();
3432-
3433-
if (!joinTree->Child(1)->IsAtom())
3434-
joinTreeNodes.emplace_back(joinTree->Child(1));
3435-
3436-
if (!joinTree->Child(2)->IsAtom())
3437-
joinTreeNodes.emplace_back(joinTree->Child(2));
3438-
3439-
if (!joinTree->Head().IsAtom("Cross")) {
3440-
std::unordered_map<std::string_view, TPartOfConstraintBase::TSetType> tableJoinKeys;
3441-
for (const auto keys : {joinTree->Child(3), joinTree->Child(4)})
3442-
for (ui32 i = 0U; i < keys->ChildrenSize(); i += 2)
3443-
tableJoinKeys[keys->Child(i)->Content()].insert_unique(TPartOfConstraintBase::TPathType(1U, keys->Child(i + 1)->Content()));
3444-
3445-
for (const auto& [label, joinKeys]: tableJoinKeys) {
3446-
if (const auto it = sorteds.find(label); sorteds.cend() != it) {
3447-
const auto sorted = std::get<const TSortedConstraintNode*>(it->second);
3448-
const auto chopped = std::get<const TChoppedConstraintNode*>(it->second);
3449-
if (sorted && sorted->StartsWith(joinKeys) || chopped && chopped->Equals(joinKeys))
3450-
sorteds.erase(it);
3451-
}
3452-
}
3453-
}
3454-
}
3455-
3456-
std::unordered_set<ui32> result(sorteds.size());
3457-
for (const auto& sort : sorteds)
3458-
result.emplace(std::get<ui32>(sort.second));
3459-
return result;
3460-
}
3461-
34623418
TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStringBuf parseUdfName, const THashSet<TStringBuf>& serializeUdfNames) {
34633419
auto apply = TExprBase(node).Cast<TCoApply>();
34643420

@@ -5240,20 +5196,6 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
52405196
return ret;
52415197
}
52425198

5243-
if (const auto indexes = GetUselessSortedJoinInputs(TCoEquiJoin(node)); !indexes.empty()) {
5244-
YQL_CLOG(DEBUG, Core) << "Suppress order on " << indexes.size() << ' ' << node->Content() << " inputs.";
5245-
auto children = node->ChildrenList();
5246-
for (const auto idx : indexes)
5247-
children[idx] = ctx.Builder(children[idx]->Pos())
5248-
.List()
5249-
.Callable(0, "Unordered")
5250-
.Add(0, children[idx]->HeadPtr())
5251-
.Seal()
5252-
.Add(1, children[idx]->TailPtr())
5253-
.Seal().Build();
5254-
return ctx.ChangeChildren(*node, std::move(children));
5255-
}
5256-
52575199
for (ui32 i = 0U; i < node->ChildrenSize() - 2U; ++i) {
52585200
if (IsListReorder(node->Child(i)->Head())) {
52595201
YQL_CLOG(DEBUG, Core) << node->Content() << " with " << node->Child(i)->Content();

ydb/library/yql/tests/sql/dq_file/part0/canondata/result.json

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3161,23 +3161,23 @@
31613161
"test.test[table_range-merge_non_strict--Results]": [],
31623162
"test.test[tpch-q13-default.txt-Analyze]": [
31633163
{
3164-
"checksum": "204d6e20f210a61980dbe3196c4b4ef1",
3165-
"size": 9004,
3166-
"uri": "https://{canondata_backend}/1600758/22da4752d5b07acd953afe521e2a80fdcf854b55/resource.tar.gz#test.test_tpch-q13-default.txt-Analyze_/plan.txt"
3164+
"checksum": "cad8e4312267dce2ab1121734418115f",
3165+
"size": 9011,
3166+
"uri": "https://{canondata_backend}/1937367/c99cfb4b780550a1a456fcf97cd04e4ab58600f3/resource.tar.gz#test.test_tpch-q13-default.txt-Analyze_/plan.txt"
31673167
}
31683168
],
31693169
"test.test[tpch-q13-default.txt-Debug]": [
31703170
{
3171-
"checksum": "d63a4440675f263392ba2cf4caa0585a",
3172-
"size": 7153,
3173-
"uri": "https://{canondata_backend}/1936273/8ea0dd0bb7dcfcb9060145c85aba7872eea15de5/resource.tar.gz#test.test_tpch-q13-default.txt-Debug_/opt.yql_patched"
3171+
"checksum": "ca31ce7ecccee4d4b174642b362dfec6",
3172+
"size": 7145,
3173+
"uri": "https://{canondata_backend}/1937367/c99cfb4b780550a1a456fcf97cd04e4ab58600f3/resource.tar.gz#test.test_tpch-q13-default.txt-Debug_/opt.yql_patched"
31743174
}
31753175
],
31763176
"test.test[tpch-q13-default.txt-Plan]": [
31773177
{
3178-
"checksum": "204d6e20f210a61980dbe3196c4b4ef1",
3179-
"size": 9004,
3180-
"uri": "https://{canondata_backend}/1600758/22da4752d5b07acd953afe521e2a80fdcf854b55/resource.tar.gz#test.test_tpch-q13-default.txt-Plan_/plan.txt"
3178+
"checksum": "cad8e4312267dce2ab1121734418115f",
3179+
"size": 9011,
3180+
"uri": "https://{canondata_backend}/1937367/c99cfb4b780550a1a456fcf97cd04e4ab58600f3/resource.tar.gz#test.test_tpch-q13-default.txt-Plan_/plan.txt"
31813181
}
31823182
],
31833183
"test.test[tpch-q13-default.txt-Results]": [],

ydb/library/yql/tests/sql/dq_file/part1/canondata/result.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,9 +1497,9 @@
14971497
],
14981498
"test.test[join-lookupjoin_bug7646_subst-off-Debug]": [
14991499
{
1500-
"checksum": "80f96e322ce4197cc94f5357dc221b20",
1501-
"size": 5815,
1502-
"uri": "https://{canondata_backend}/1931696/8efbe84ad728243c3e1c1cdb30d3b3f31d345567/resource.tar.gz#test.test_join-lookupjoin_bug7646_subst-off-Debug_/opt.yql_patched"
1500+
"checksum": "c758a9f64fd8d20747a30fd595daee3a",
1501+
"size": 5798,
1502+
"uri": "https://{canondata_backend}/1942671/db8798ead58a329e866fa250746caab9ca42a26c/resource.tar.gz#test.test_join-lookupjoin_bug7646_subst-off-Debug_/opt.yql_patched"
15031503
}
15041504
],
15051505
"test.test[join-lookupjoin_bug7646_subst-off-Plan]": [
@@ -1550,9 +1550,9 @@
15501550
],
15511551
"test.test[join-mapjoin_early_rewrite-off-Debug]": [
15521552
{
1553-
"checksum": "3b47530960ab8d86a2d408bc4cd4a4db",
1554-
"size": 2594,
1555-
"uri": "https://{canondata_backend}/1599023/53262e114e5fb21cb58c259e812c31e2f63afae0/resource.tar.gz#test.test_join-mapjoin_early_rewrite-off-Debug_/opt.yql_patched"
1553+
"checksum": "36bf39e140d41063087b180f8f0cfdfa",
1554+
"size": 2582,
1555+
"uri": "https://{canondata_backend}/1942671/db8798ead58a329e866fa250746caab9ca42a26c/resource.tar.gz#test.test_join-mapjoin_early_rewrite-off-Debug_/opt.yql_patched"
15561556
}
15571557
],
15581558
"test.test[join-mapjoin_early_rewrite-off-Plan]": [

ydb/library/yql/tests/sql/dq_file/part10/canondata/result.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,9 +1147,9 @@
11471147
],
11481148
"test.test[in-in_sorted_by_tuple--Debug]": [
11491149
{
1150-
"checksum": "25eac6930d6256ef93f7c557aa32f2ce",
1151-
"size": 6188,
1152-
"uri": "https://{canondata_backend}/1931696/3b66a4b8ee9789607df97fd1b710d3ca890dd9b2/resource.tar.gz#test.test_in-in_sorted_by_tuple--Debug_/opt.yql_patched"
1150+
"checksum": "95aaa80a7fec696a32886d16fb9379c7",
1151+
"size": 6169,
1152+
"uri": "https://{canondata_backend}/1946324/4e55210d05969637e5668c50c7fbdc1a61f108f5/resource.tar.gz#test.test_in-in_sorted_by_tuple--Debug_/opt.yql_patched"
11531153
}
11541154
],
11551155
"test.test[in-in_sorted_by_tuple--Plan]": [

ydb/library/yql/tests/sql/dq_file/part11/canondata/result.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,9 +1416,9 @@
14161416
],
14171417
"test.test[join-mapjoin_early_rewrite_star--Debug]": [
14181418
{
1419-
"checksum": "a60696099128912e5d91481015f44908",
1420-
"size": 3030,
1421-
"uri": "https://{canondata_backend}/1937424/567d7f4e2a03fd773183d9e7015f2f468ea57566/resource.tar.gz#test.test_join-mapjoin_early_rewrite_star--Debug_/opt.yql_patched"
1419+
"checksum": "84565151160f644ecc225f1477d59860",
1420+
"size": 3018,
1421+
"uri": "https://{canondata_backend}/1130705/173afc6d44db00f6f42767e88bce00b623a40335/resource.tar.gz#test.test_join-mapjoin_early_rewrite_star--Debug_/opt.yql_patched"
14221422
}
14231423
],
14241424
"test.test[join-mapjoin_early_rewrite_star--Plan]": [
@@ -1460,9 +1460,9 @@
14601460
],
14611461
"test.test[join-mergejoin_saves_output_sort_cross--Debug]": [
14621462
{
1463-
"checksum": "b6e62f8e97766e90796b4ee63e690606",
1464-
"size": 4553,
1465-
"uri": "https://{canondata_backend}/1936273/640ea425b9d5a6140c315077f2a83bba387482d8/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_cross--Debug_/opt.yql_patched"
1463+
"checksum": "83aeda517e251c905218e0bbbff42efb",
1464+
"size": 4538,
1465+
"uri": "https://{canondata_backend}/1130705/173afc6d44db00f6f42767e88bce00b623a40335/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_cross--Debug_/opt.yql_patched"
14661466
}
14671467
],
14681468
"test.test[join-mergejoin_saves_output_sort_cross--Plan]": [

ydb/library/yql/tests/sql/dq_file/part12/canondata/result.json

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,9 +1736,9 @@
17361736
],
17371737
"test.test[join-mapjoin_early_rewrite_sequence--Debug]": [
17381738
{
1739-
"checksum": "40f651523716fa79f06d4b85d77ad9c5",
1740-
"size": 4905,
1741-
"uri": "https://{canondata_backend}/1942415/b6b41eb77627490bfce387dccb1eea7766e2bb71/resource.tar.gz#test.test_join-mapjoin_early_rewrite_sequence--Debug_/opt.yql_patched"
1739+
"checksum": "fca35a6bb85333c80c06f8300bd362bf",
1740+
"size": 4876,
1741+
"uri": "https://{canondata_backend}/1937001/6a20500553ba8b1dbf218cdb9db234c852b93f8e/resource.tar.gz#test.test_join-mapjoin_early_rewrite_sequence--Debug_/opt.yql_patched"
17421742
}
17431743
],
17441744
"test.test[join-mapjoin_early_rewrite_sequence--Plan]": [
@@ -1758,9 +1758,9 @@
17581758
],
17591759
"test.test[join-mapjoin_early_rewrite_sequence-off-Debug]": [
17601760
{
1761-
"checksum": "538681da43ce6e4ec3ebea324a201e4a",
1762-
"size": 4112,
1763-
"uri": "https://{canondata_backend}/1942415/b6b41eb77627490bfce387dccb1eea7766e2bb71/resource.tar.gz#test.test_join-mapjoin_early_rewrite_sequence-off-Debug_/opt.yql_patched"
1761+
"checksum": "933e6aa8c7c4f97966e7173b433e1b57",
1762+
"size": 4083,
1763+
"uri": "https://{canondata_backend}/1937001/6a20500553ba8b1dbf218cdb9db234c852b93f8e/resource.tar.gz#test.test_join-mapjoin_early_rewrite_sequence-off-Debug_/opt.yql_patched"
17641764
}
17651765
],
17661766
"test.test[join-mapjoin_early_rewrite_sequence-off-Plan]": [
@@ -3477,16 +3477,16 @@
34773477
],
34783478
"test.test[tpch-q17-default.txt-Debug]": [
34793479
{
3480-
"checksum": "73519ac6ddad0593c46719ab996c12b1",
3481-
"size": 8174,
3482-
"uri": "https://{canondata_backend}/1031349/596c297595e75709124ce2ef96947a7ecc9a2056/resource.tar.gz#test.test_tpch-q17-default.txt-Debug_/opt.yql_patched"
3480+
"checksum": "defafd6f9612cbc308bd2320730c9dff",
3481+
"size": 7495,
3482+
"uri": "https://{canondata_backend}/1937001/6a20500553ba8b1dbf218cdb9db234c852b93f8e/resource.tar.gz#test.test_tpch-q17-default.txt-Debug_/opt.yql_patched"
34833483
}
34843484
],
34853485
"test.test[tpch-q17-default.txt-Plan]": [
34863486
{
3487-
"checksum": "c4a42e1b87512bc10a0af654874cc783",
3488-
"size": 10696,
3489-
"uri": "https://{canondata_backend}/1031349/596c297595e75709124ce2ef96947a7ecc9a2056/resource.tar.gz#test.test_tpch-q17-default.txt-Plan_/plan.txt"
3487+
"checksum": "f0de1d929a9ebdf2817117e56ae59070",
3488+
"size": 10246,
3489+
"uri": "https://{canondata_backend}/1937001/6a20500553ba8b1dbf218cdb9db234c852b93f8e/resource.tar.gz#test.test_tpch-q17-default.txt-Plan_/plan.txt"
34903490
}
34913491
],
34923492
"test.test[tpch-q17-default.txt-Results]": [],
@@ -3499,9 +3499,9 @@
34993499
],
35003500
"test.test[tpch-q22-default.txt-Debug]": [
35013501
{
3502-
"checksum": "c52c37022c2582ee3db03909ac26d44f",
3503-
"size": 7691,
3504-
"uri": "https://{canondata_backend}/1031349/596c297595e75709124ce2ef96947a7ecc9a2056/resource.tar.gz#test.test_tpch-q22-default.txt-Debug_/opt.yql_patched"
3502+
"checksum": "8d891adc2c453405a0ee69134e9de4b0",
3503+
"size": 7664,
3504+
"uri": "https://{canondata_backend}/1937001/6a20500553ba8b1dbf218cdb9db234c852b93f8e/resource.tar.gz#test.test_tpch-q22-default.txt-Debug_/opt.yql_patched"
35053505
}
35063506
],
35073507
"test.test[tpch-q22-default.txt-Plan]": [
@@ -3521,9 +3521,9 @@
35213521
],
35223522
"test.test[tpch-q3-default.txt-Debug]": [
35233523
{
3524-
"checksum": "d6e266b3f88d8ef61db77efbc3abc5ef",
3525-
"size": 8963,
3526-
"uri": "https://{canondata_backend}/1942415/b6b41eb77627490bfce387dccb1eea7766e2bb71/resource.tar.gz#test.test_tpch-q3-default.txt-Debug_/opt.yql_patched"
3524+
"checksum": "4206abf39a70fdda96443f02526ea417",
3525+
"size": 8948,
3526+
"uri": "https://{canondata_backend}/1937001/6a20500553ba8b1dbf218cdb9db234c852b93f8e/resource.tar.gz#test.test_tpch-q3-default.txt-Debug_/opt.yql_patched"
35273527
}
35283528
],
35293529
"test.test[tpch-q3-default.txt-Plan]": [

0 commit comments

Comments
 (0)