Skip to content

Commit 986e97e

Browse files
Removed a rule that turned Shuffle into Map breaking some TPCDS queries (#6982)
1 parent c0f743d commit 986e97e

File tree

9 files changed

+89
-92
lines changed

9 files changed

+89
-92
lines changed

ydb/core/kqp/opt/physical/kqp_opt_phy.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,6 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase {
8888
AddHandler(0, &TCoTake::Match, HNDL(PropagatePrecomuteTake<false>));
8989
AddHandler(0, &TCoFlatMap::Match, HNDL(PropagatePrecomuteFlatmap<false>));
9090

91-
AddHandler(0, &TDqCnHashShuffle::Match, HNDL(BuildHashShuffleByKeyStage));
92-
9391
AddHandler(0, &TCoAggregateCombine::Match, HNDL(ExpandAggregatePhase));
9492
AddHandler(0, &TCoAggregateCombineState::Match, HNDL(ExpandAggregatePhase));
9593
AddHandler(0, &TCoAggregateMergeState::Match, HNDL(ExpandAggregatePhase));
@@ -253,13 +251,6 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase {
253251
return output;
254252
}
255253

256-
TMaybeNode<TExprBase> BuildHashShuffleByKeyStage(TExprBase node, TExprContext& ctx) {
257-
auto output = DqBuildHashShuffleByKeyStage(node, ctx, {});
258-
DumpAppliedRule("BuildHashShuffleByKeyStage", node.Ptr(), output.Ptr(), ctx);
259-
return TExprBase(output);
260-
}
261-
262-
263254
TMaybeNode<TExprBase> ExpandAggregatePhase(TExprBase node, TExprContext& ctx) {
264255
auto output = ExpandAggregatePeepholeImpl(node.Ptr(), ctx, TypesCtx, KqpCtx.Config->HasOptUseFinalizeByKey(), false);
265256
DumpAppliedRule("ExpandAggregatePhase", node.Ptr(), output, ctx);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
PRAGMA TablePathPrefix='/Root/test/ds';
2+
3+
-- NB: Subquerys
4+
$bla1 = (select distinct
5+
COALESCE(c_last_name,'') as c_last_name,
6+
COALESCE(c_first_name,'') as c_first_name,
7+
COALESCE(cast(d_date as date), cast(0 as Date)) as d_date
8+
from store_sales as store_sales
9+
cross join date_dim as date_dim
10+
cross join customer as customer
11+
where store_sales.ss_sold_date_sk = date_dim.d_date_sk
12+
and store_sales.ss_customer_sk = customer.c_customer_sk
13+
and d_month_seq between 1221 and 1221+11);
14+
15+
$bla2 = ((select distinct
16+
COALESCE(c_last_name,'') as c_last_name,
17+
COALESCE(c_first_name,'') as c_first_name,
18+
COALESCE(cast(d_date as date), cast(0 as Date)) as d_date
19+
from catalog_sales as catalog_sales
20+
cross join date_dim as date_dim
21+
cross join customer as customer
22+
where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
23+
and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
24+
and d_month_seq between 1221 and 1221+11)
25+
union all
26+
(select distinct
27+
COALESCE(c_last_name,'') as c_last_name,
28+
COALESCE(c_first_name,'') as c_first_name,
29+
COALESCE(cast(d_date as date), cast(0 as Date)) as d_date
30+
from web_sales as web_sales
31+
cross join date_dim as date_dim
32+
cross join customer as customer
33+
where web_sales.ws_sold_date_sk = date_dim.d_date_sk
34+
and web_sales.ws_bill_customer_sk = customer.c_customer_sk
35+
and d_month_seq between 1221 and 1221+11));
36+
37+
-- start query 1 in stream 0 using template query87.tpl and seed 1819994127
38+
select count(*)
39+
from $bla1 bla1 left only join $bla2 bla2 using (c_last_name, c_first_name, d_date)
40+
;
41+
42+
-- end query 1 in stream 0 using template query87.tpl

ydb/core/kqp/ut/join/kqp_join_order_ut.cpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,27 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
251251
ExplainJoinOrderTestDataQuery("queries/tpcds61.sql", StreamLookupJoin);
252252
}
253253

254+
void ExecuteJoinOrderTestDataQueryWithStats(const TString& queryPath, const TString& statsPath, bool useStreamLookupJoin) {
255+
auto kikimr = GetKikimrWithJoinSettings(useStreamLookupJoin, GetStatic(statsPath));
256+
auto db = kikimr.GetTableClient();
257+
auto session = db.CreateSession().GetValueSync().GetSession();
258+
259+
CreateSampleTable(session);
260+
261+
/* join with parameters */
262+
{
263+
const TString query = GetStatic(queryPath);
264+
265+
auto result = session.ExecuteDataQuery(query,TTxControl::BeginTx().CommitTx()).ExtractValueSync();
266+
267+
UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS);
268+
}
269+
}
270+
271+
Y_UNIT_TEST_TWIN(TPCDS87, StreamLookupJoin) {
272+
ExecuteJoinOrderTestDataQueryWithStats("queries/tpcds87.sql", "stats/tpcds1000s.json", StreamLookupJoin);
273+
}
274+
254275
Y_UNIT_TEST_TWIN(TPCDS88, StreamLookupJoin) {
255276
ExplainJoinOrderTestDataQuery("queries/tpcds88.sql", StreamLookupJoin);
256277
}
@@ -314,13 +335,13 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
314335
);
315336
}
316337

317-
/*
338+
/*
318339
Y_UNIT_TEST_TWIN(OverrideStatsTPCDS64, StreamLookupJoin) {
319340
JoinOrderTestWithOverridenStats(
320341
"queries/tpcds64.sql", "stats/tpcds1000s.json", "join_order/tpcds64_1000s.json", StreamLookupJoin
321342
);
322343
}
323-
*/
344+
*/
324345

325346
Y_UNIT_TEST_TWIN(OverrideStatsTPCDS78, StreamLookupJoin) {
326347
JoinOrderTestWithOverridenStats(

ydb/library/yql/dq/opt/dq_opt_phy.cpp

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,50 +1190,6 @@ TExprBase DqBuildShuffleStage(TExprBase node, TExprContext& ctx, IOptimizationCo
11901190
.Done();
11911191
}
11921192

1193-
NNodes::TExprBase DqBuildHashShuffleByKeyStage(NNodes::TExprBase node, TExprContext& ctx, const TParentsMap& /*parentsMap*/) {
1194-
if (!node.Maybe<TDqCnHashShuffle>()) {
1195-
return node;
1196-
}
1197-
auto cnHash = node.Cast<TDqCnHashShuffle>();
1198-
auto stage = cnHash.Output().Stage();
1199-
if (!stage.Program().Body().Maybe<TCoExtend>()) {
1200-
return node;
1201-
}
1202-
auto extend = stage.Program().Body().Cast<TCoExtend>();
1203-
TNodeSet nodes;
1204-
for (auto&& i : stage.Program().Args()) {
1205-
nodes.emplace(i.Raw());
1206-
}
1207-
for (auto&& i : extend) {
1208-
if (nodes.erase(i.Raw()) != 1) {
1209-
return node;
1210-
}
1211-
}
1212-
if (!nodes.empty()) {
1213-
return node;
1214-
}
1215-
TExprNode::TListType nodesTuple;
1216-
for (auto&& i : stage.Inputs()) {
1217-
if (!i.Maybe<TDqCnUnionAll>()) {
1218-
return node;
1219-
}
1220-
auto uAll = i.Cast<TDqCnUnionAll>();
1221-
nodesTuple.emplace_back(ctx.ChangeChild(node.Ref(), 0, uAll.Output().Ptr()));
1222-
}
1223-
auto stageCopy = ctx.ChangeChild(stage.Ref(), 0, ctx.NewList(node.Pos(), std::move(nodesTuple)));
1224-
auto output =
1225-
Build<TDqOutput>(ctx, node.Pos())
1226-
.Stage(stageCopy)
1227-
.Index().Build("0")
1228-
.Done();
1229-
auto outputCnMap =
1230-
Build<TDqCnMap>(ctx, node.Pos())
1231-
.Output(output)
1232-
.Done();
1233-
1234-
return TExprBase(outputCnMap);
1235-
}
1236-
12371193
TExprBase DqBuildFinalizeByKeyStage(TExprBase node, TExprContext& ctx,
12381194
const TParentsMap& parentsMap, bool allowStageMultiUsage)
12391195
{

ydb/library/yql/dq/opt/dq_opt_phy.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ NNodes::TExprBase DqBuildShuffleStage(NNodes::TExprBase node, TExprContext& ctx,
5555

5656
NNodes::TExprBase DqBuildFinalizeByKeyStage(NNodes::TExprBase node, TExprContext& ctx,
5757
const TParentsMap& parentsMap, bool allowStageMultiUsage = true);
58-
NNodes::TExprBase DqBuildHashShuffleByKeyStage(NNodes::TExprBase node, TExprContext& ctx, const TParentsMap& parentsMap);
5958

6059
NNodes::TExprBase DqBuildAggregationResultStage(NNodes::TExprBase node, TExprContext& ctx,
6160
IOptimizationContext& optCtx);

ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@
5151
"PlanNodeId": 8,
5252
"Plans": [
5353
{
54-
"Node Type": "Map",
54+
"KeyColumns": [
55+
"SearchPhrase"
56+
],
57+
"Node Type": "HashShuffle",
5558
"PlanNodeId": 7,
5659
"PlanNodeType": "Connection",
5760
"Plans": [
@@ -73,10 +76,7 @@
7376
"PlanNodeId": 6,
7477
"Plans": [
7578
{
76-
"KeyColumns": [
77-
"SearchPhrase"
78-
],
79-
"Node Type": "HashShuffle",
79+
"Node Type": "UnionAll",
8080
"PlanNodeId": 5,
8181
"PlanNodeType": "Connection",
8282
"Plans": [
@@ -279,10 +279,7 @@
279279
]
280280
},
281281
{
282-
"KeyColumns": [
283-
"SearchPhrase"
284-
],
285-
"Node Type": "HashShuffle",
282+
"Node Type": "UnionAll",
286283
"PlanNodeId": 3,
287284
"PlanNodeType": "Connection",
288285
"Plans": [

ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@
5151
"PlanNodeId": 8,
5252
"Plans": [
5353
{
54-
"Node Type": "Map",
54+
"KeyColumns": [
55+
"RegionID"
56+
],
57+
"Node Type": "HashShuffle",
5558
"PlanNodeId": 7,
5659
"PlanNodeType": "Connection",
5760
"Plans": [
@@ -73,10 +76,7 @@
7376
"PlanNodeId": 6,
7477
"Plans": [
7578
{
76-
"KeyColumns": [
77-
"RegionID"
78-
],
79-
"Node Type": "HashShuffle",
79+
"Node Type": "UnionAll",
8080
"PlanNodeId": 5,
8181
"PlanNodeType": "Connection",
8282
"Plans": [
@@ -136,10 +136,7 @@
136136
]
137137
},
138138
{
139-
"KeyColumns": [
140-
"RegionID"
141-
],
142-
"Node Type": "HashShuffle",
139+
"Node Type": "UnionAll",
143140
"PlanNodeId": 3,
144141
"PlanNodeType": "Connection",
145142
"Plans": [

ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@
5151
"PlanNodeId": 8,
5252
"Plans": [
5353
{
54-
"Node Type": "Map",
54+
"KeyColumns": [
55+
"SearchPhrase"
56+
],
57+
"Node Type": "HashShuffle",
5558
"PlanNodeId": 7,
5659
"PlanNodeType": "Connection",
5760
"Plans": [
@@ -73,10 +76,7 @@
7376
"PlanNodeId": 6,
7477
"Plans": [
7578
{
76-
"KeyColumns": [
77-
"SearchPhrase"
78-
],
79-
"Node Type": "HashShuffle",
79+
"Node Type": "UnionAll",
8080
"PlanNodeId": 5,
8181
"PlanNodeType": "Connection",
8282
"Plans": [
@@ -122,10 +122,7 @@
122122
]
123123
},
124124
{
125-
"KeyColumns": [
126-
"SearchPhrase"
127-
],
128-
"Node Type": "HashShuffle",
125+
"Node Type": "UnionAll",
129126
"PlanNodeId": 3,
130127
"PlanNodeType": "Connection",
131128
"Plans": [

ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@
5151
"PlanNodeId": 8,
5252
"Plans": [
5353
{
54-
"Node Type": "Map",
54+
"KeyColumns": [
55+
"RegionID"
56+
],
57+
"Node Type": "HashShuffle",
5558
"PlanNodeId": 7,
5659
"PlanNodeType": "Connection",
5760
"Plans": [
@@ -73,10 +76,7 @@
7376
"PlanNodeId": 6,
7477
"Plans": [
7578
{
76-
"KeyColumns": [
77-
"RegionID"
78-
],
79-
"Node Type": "HashShuffle",
79+
"Node Type": "UnionAll",
8080
"PlanNodeId": 5,
8181
"PlanNodeType": "Connection",
8282
"Plans": [
@@ -113,10 +113,7 @@
113113
]
114114
},
115115
{
116-
"KeyColumns": [
117-
"RegionID"
118-
],
119-
"Node Type": "HashShuffle",
116+
"Node Type": "UnionAll",
120117
"PlanNodeId": 3,
121118
"PlanNodeType": "Connection",
122119
"Plans": [

0 commit comments

Comments
 (0)