@@ -261,13 +261,13 @@ TExprNode::TPtr RenameJoinTree(TExprNode::TPtr joinTree, const THashMap<TString,
261
261
return ret;
262
262
}
263
263
264
- TExprNode::TPtr ReassembleJoinEquality (TExprNode::TPtr columns, const TStringBuf& upstreamLabel ,
264
+ TExprNode::TPtr ReassembleJoinEquality (TExprNode::TPtr columns, const THashSet< TStringBuf>& upstreamLabels ,
265
265
const THashMap<TString, TString>& upstreamTablesRename,
266
266
const THashMap<TString, TString>& upstreamColumnsBackRename, TExprContext& ctx)
267
267
{
268
268
TExprNode::TListType newChildren (columns->ChildrenList ());
269
269
for (ui32 i = 0 ; i < columns->ChildrenSize (); i += 2 ) {
270
- if (columns->Child (i)->Content () != upstreamLabel ) {
270
+ if (!upstreamLabels. contains ( columns->Child (i)->Content ()) ) {
271
271
continue ;
272
272
}
273
273
@@ -280,36 +280,38 @@ TExprNode::TPtr ReassembleJoinEquality(TExprNode::TPtr columns, const TStringBuf
280
280
upstreamTablesRename, ctx);
281
281
newChildren[i + 1 ] = ctx.NewAtom (columns->Pos (), part2);
282
282
} else {
283
- TStringBuf part1;
284
- TStringBuf part2;
285
- SplitTableName (column->Content (), part1, part2);
283
+ TStringBuf part1 = columns->Child (i)->Content ();
284
+ TStringBuf part2 = columns->Child (i + 1 )->Content ();
285
+
286
+ if (TString (column->Content ()).find (" ." ) != TString::npos) {
287
+ SplitTableName (column->Content (), part1, part2);
288
+ }
289
+
286
290
newChildren[i] = RenameJoinTable (columns->Pos (), ctx.NewAtom (columns->Pos (), part1),
287
291
upstreamTablesRename, ctx);
288
292
newChildren[i + 1 ] = ctx.NewAtom (columns->Pos (), part2);
289
-
290
- return nullptr ;
291
293
}
292
294
}
293
295
294
296
auto ret = ctx.ChangeChildren (*columns, std::move (newChildren));
295
297
return ret;
296
298
}
297
299
298
- TExprNode::TPtr FuseJoinTree (TExprNode::TPtr downstreamJoinTree, TExprNode::TPtr upstreamJoinTree, const TStringBuf& upstreamLabel ,
300
+ TExprNode::TPtr FuseJoinTree (TExprNode::TPtr downstreamJoinTree, TExprNode::TPtr upstreamJoinTree, const THashSet< TStringBuf>& upstreamLabels ,
299
301
const THashMap<TString, TString>& upstreamTablesRename, const THashMap<TString, TString>& upstreamColumnsBackRename,
300
302
TExprContext& ctx)
301
303
{
302
304
TExprNode::TPtr left;
303
305
if (downstreamJoinTree->Child (1 )->IsAtom ()) {
304
- if (downstreamJoinTree->Child (1 )->Content () != upstreamLabel ) {
306
+ if (!upstreamLabels. contains ( downstreamJoinTree->Child (1 )->Content ()) ) {
305
307
left = downstreamJoinTree->Child (1 );
306
308
}
307
309
else {
308
310
left = RenameJoinTree (upstreamJoinTree, upstreamTablesRename, ctx);
309
311
}
310
312
}
311
313
else {
312
- left = FuseJoinTree (downstreamJoinTree->Child (1 ), upstreamJoinTree, upstreamLabel , upstreamTablesRename,
314
+ left = FuseJoinTree (downstreamJoinTree->Child (1 ), upstreamJoinTree, upstreamLabels , upstreamTablesRename,
313
315
upstreamColumnsBackRename, ctx);
314
316
if (!left) {
315
317
return nullptr ;
@@ -318,14 +320,14 @@ TExprNode::TPtr FuseJoinTree(TExprNode::TPtr downstreamJoinTree, TExprNode::TPtr
318
320
319
321
TExprNode::TPtr right;
320
322
if (downstreamJoinTree->Child (2 )->IsAtom ()) {
321
- if (downstreamJoinTree->Child (2 )->Content () != upstreamLabel ) {
323
+ if (!upstreamLabels. contains ( downstreamJoinTree->Child (2 )->Content ()) ) {
322
324
right = downstreamJoinTree->Child (2 );
323
325
}
324
326
else {
325
327
right = RenameJoinTree (upstreamJoinTree, upstreamTablesRename, ctx);
326
328
}
327
329
} else {
328
- right = FuseJoinTree (downstreamJoinTree->Child (2 ), upstreamJoinTree, upstreamLabel , upstreamTablesRename,
330
+ right = FuseJoinTree (downstreamJoinTree->Child (2 ), upstreamJoinTree, upstreamLabels , upstreamTablesRename,
329
331
upstreamColumnsBackRename, ctx);
330
332
if (!right) {
331
333
return nullptr ;
@@ -335,9 +337,9 @@ TExprNode::TPtr FuseJoinTree(TExprNode::TPtr downstreamJoinTree, TExprNode::TPtr
335
337
TExprNode::TListType newChildren (downstreamJoinTree->ChildrenList ());
336
338
newChildren[1 ] = left;
337
339
newChildren[2 ] = right;
338
- newChildren[3 ] = ReassembleJoinEquality (downstreamJoinTree->Child (3 ), upstreamLabel , upstreamTablesRename,
340
+ newChildren[3 ] = ReassembleJoinEquality (downstreamJoinTree->Child (3 ), upstreamLabels , upstreamTablesRename,
339
341
upstreamColumnsBackRename, ctx);
340
- newChildren[4 ] = ReassembleJoinEquality (downstreamJoinTree->Child (4 ), upstreamLabel , upstreamTablesRename,
342
+ newChildren[4 ] = ReassembleJoinEquality (downstreamJoinTree->Child (4 ), upstreamLabels , upstreamTablesRename,
341
343
upstreamColumnsBackRename, ctx);
342
344
if (!newChildren[3 ] || !newChildren[4 ]) {
343
345
return nullptr ;
@@ -347,18 +349,37 @@ TExprNode::TPtr FuseJoinTree(TExprNode::TPtr downstreamJoinTree, TExprNode::TPtr
347
349
return ret;
348
350
}
349
351
350
- TExprNode::TPtr FuseEquiJoins (const TExprNode::TPtr& node, ui32 upstreamIndex, TExprContext& ctx) {
352
+ bool IsSuitableToFuseInputMultiLabels (TOptimizeContext &optCtx) {
353
+ YQL_ENSURE (optCtx.Types );
354
+ static const char optName[] = " FuseEquiJoinsInputMultiLabels" ;
355
+ return IsOptimizerEnabled<optName>(*optCtx.Types );
356
+ }
357
+
358
+ TExprNode::TPtr FuseEquiJoins (const TExprNode::TPtr& node, ui32 upstreamIndex, TExprContext& ctx, TOptimizeContext &optCtx) {
351
359
ui32 downstreamInputs = node->ChildrenSize () - 2 ;
352
360
auto upstreamList = node->Child (upstreamIndex)->Child (0 );
353
361
auto upstreamLabel = node->Child (upstreamIndex)->Child (1 );
362
+ THashSet<TStringBuf> upstreamLabelsAssociatedByInputIndex;
354
363
THashSet<TStringBuf> downstreamLabels;
355
364
for (ui32 i = 0 ; i < downstreamInputs; ++i) {
356
365
auto label = node->Child (i)->Child (1 );
357
- if (!label->IsAtom ()) {
358
- return node;
366
+ if (auto list = TMaybeNode<TCoAtomList>(label)) {
367
+ if (!IsSuitableToFuseInputMultiLabels (optCtx)) {
368
+ return node;
369
+ }
370
+ for (auto labelAtom : list.Cast ()) {
371
+ auto label = labelAtom.Value ();
372
+ downstreamLabels.insert (label);
373
+ if (upstreamIndex == i) {
374
+ upstreamLabelsAssociatedByInputIndex.insert (label);
375
+ }
376
+ }
377
+ } else {
378
+ if (upstreamIndex == i) {
379
+ upstreamLabelsAssociatedByInputIndex.insert (label->Content ());
380
+ }
381
+ downstreamLabels.insert (label->Content ());
359
382
}
360
-
361
- downstreamLabels.insert (label->Content ());
362
383
}
363
384
364
385
THashMap<TString, TString> upstreamTablesRename; // rename of conflicted upstream tables
@@ -381,7 +402,18 @@ TExprNode::TPtr FuseEquiJoins(const TExprNode::TPtr& node, ui32 upstreamIndex, T
381
402
return node;
382
403
}
383
404
384
- if (downstreamLabels.contains (label->Content ())) {
405
+ if (upstreamLabelsAssociatedByInputIndex.size () == 1 && downstreamLabels.contains (label->Content ()) ||
406
+ // In case multiple labels input, we are not renaming labels associated with upstream input index.
407
+ // For example:
408
+ // (let ej1 = (EquiJoin '(input1, 'a), '(input2, 'b), upstreamJoinTree, '()))
409
+ // (let ej2 = (EquiJoin '(ej1, '('a 'b)), '(input3, 'c), downstreamJoinTree, '())))
410
+ // Upstream labels: [a, b];
411
+ // Downstream labels: [a, b, c];
412
+ // Not renaming [a, b] because their associated with input index.
413
+ // As result we should get:
414
+ // (let ejFused = (EquiJoin '(input1, 'a), '(input2, 'b), '(input3, 'c), fusedJoinTree, '()))
415
+ (upstreamLabelsAssociatedByInputIndex.size () > 1 && downstreamLabels.contains (label->Content ()) &&
416
+ !upstreamLabelsAssociatedByInputIndex.contains (label->Content ()))) {
385
417
// fix conflict for labels
386
418
for (ui32 suffix = 1 ;; ++suffix) {
387
419
auto newName = TString::Join (label->Content (), " _" , ToString (suffix));
@@ -481,26 +513,35 @@ TExprNode::TPtr FuseEquiJoins(const TExprNode::TPtr& node, ui32 upstreamIndex, T
481
513
}
482
514
}
483
515
484
- for (auto & x : upstreamColumnsRename) {
485
- for (auto & y : x.second ) {
486
- TStringBuf part1;
487
- TStringBuf part2;
488
- SplitTableName (x.first , part1, part2);
489
- if (auto renamed = upstreamTablesRename.FindPtr (part1)) {
490
- part1 = *renamed;
491
- }
492
-
493
- settingsChildren.push_back (ctx.Builder (node->Pos ())
494
- .List ()
495
- .Atom (0 , " rename" )
496
- .Atom (1 , TString::Join (part1, " ." , part2))
497
- .Atom (2 , TString::Join (upstreamLabel->Content (), " ." , y))
498
- .Seal ()
499
- .Build ());
500
- }
501
- }
502
-
503
- auto joinTree = FuseJoinTree (downstreamJoinTree, upstreamJoinTree, upstreamLabel->Content (),
516
+ for (auto & x : upstreamColumnsRename) {
517
+ for (auto & y : x.second ) {
518
+ TStringBuf part1;
519
+ TStringBuf part2;
520
+ SplitTableName (x.first , part1, part2);
521
+ TStringBuf labelName = upstreamLabel->Content ();
522
+ if (upstreamLabelsAssociatedByInputIndex.size () > 1 ) {
523
+ if (upstreamLabelsAssociatedByInputIndex.contains (part1)) {
524
+ continue ;
525
+ } else {
526
+ labelName = part1;
527
+ }
528
+ }
529
+
530
+ if (auto renamed = upstreamTablesRename.FindPtr (part1)) {
531
+ part1 = *renamed;
532
+ }
533
+
534
+ settingsChildren.push_back (ctx.Builder (node->Pos ())
535
+ .List ()
536
+ .Atom (0 , " rename" )
537
+ .Atom (1 , TString::Join (part1, " ." , part2))
538
+ .Atom (2 , TString::Join (labelName, " ." , y))
539
+ .Seal ()
540
+ .Build ());
541
+ }
542
+ }
543
+
544
+ auto joinTree = FuseJoinTree (downstreamJoinTree, upstreamJoinTree, upstreamLabelsAssociatedByInputIndex,
504
545
upstreamTablesRename, upstreamColumnsBackRename, ctx);
505
546
if (!joinTree) {
506
547
return node;
@@ -514,6 +555,7 @@ TExprNode::TPtr FuseEquiJoins(const TExprNode::TPtr& node, ui32 upstreamIndex, T
514
555
return ret;
515
556
}
516
557
558
+
517
559
bool IsRenamingOrPassthroughFlatMap (const TCoFlatMapBase& flatMap, THashMap<TStringBuf, TStringBuf>& renames,
518
560
THashSet<TStringBuf>& outputMembers, bool & isIdentity)
519
561
{
@@ -2007,7 +2049,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) {
2007
2049
if (node->Child (i)->Child (0 )->IsCallable (" EquiJoin" ) &&
2008
2050
optCtx.IsSingleUsage (*node->Child (i)) &&
2009
2051
optCtx.IsSingleUsage (*node->Child (i)->Child (0 ))) {
2010
- auto ret = FuseEquiJoins (node, i, ctx);
2052
+ auto ret = FuseEquiJoins (node, i, ctx, optCtx );
2011
2053
if (ret != node) {
2012
2054
YQL_CLOG (DEBUG, Core) << " FuseEquiJoins" ;
2013
2055
return ret;
0 commit comments