@@ -28,12 +28,14 @@ class TBlockJoinState : public TBlockState {
28
28
public:
29
29
TBlockJoinState (TMemoryUsageInfo* memInfo, TComputationContext& ctx,
30
30
const TVector<TType*>& inputItems,
31
+ const TVector<ui32>& leftIOMap,
31
32
const TVector<TType*> outputItems,
32
33
NUdf::TUnboxedValue**const fields)
33
34
: TBlockState(memInfo, outputItems.size())
34
35
, InputWidth_(inputItems.size() - 1 )
35
36
, OutputWidth_(outputItems.size() - 1 )
36
37
, Inputs_(inputItems.size())
38
+ , LeftIOMap_(leftIOMap)
37
39
, InputsDescr_(ToValueDescr(inputItems))
38
40
{
39
41
const auto & pgBuilder = ctx.Builder ->GetPgBuilder ();
@@ -54,30 +56,37 @@ class TBlockJoinState : public TBlockState {
54
56
55
57
void CopyRow () {
56
58
// Copy items from the "left" flow.
57
- for (size_t i = 0 ; i < InputWidth_; i++) {
58
- AddItem (GetItem (i), i);
59
+ // Use the mapping from input fields to output ones to
60
+ // produce a tight loop to copy row items.
61
+ for (size_t i = 0 ; i < LeftIOMap_.size (); i++) {
62
+ AddItem (GetItem (LeftIOMap_[i]), i);
59
63
}
60
64
OutputRows_++;
61
65
}
62
66
63
67
void MakeRow (const NUdf::TUnboxedValuePod& value) {
68
+ size_t builderIndex = 0 ;
64
69
// Copy items from the "left" flow.
65
- for (size_t i = 0 ; i < InputWidth_; i++) {
66
- AddItem (GetItem (i), i);
70
+ // Use the mapping from input fields to output ones to
71
+ // produce a tight loop to copy row items.
72
+ for (size_t i = 0 ; i < LeftIOMap_.size (); i++, builderIndex++) {
73
+ AddItem (GetItem (LeftIOMap_[i]), i);
67
74
}
68
75
// Convert and append items from the "right" dict.
76
+ // Since the keys are copied to the output only from the
77
+ // "left" flow, process all values unconditionally.
69
78
if constexpr (RightRequired) {
70
- for (size_t i = InputWidth_, j = 0 ; i < OutputWidth_; i++, j ++) {
71
- AddValue (value.GetElement (j ), i );
79
+ for (size_t i = 0 ; builderIndex < OutputWidth_; i++) {
80
+ AddValue (value.GetElement (i ), builderIndex++ );
72
81
}
73
82
} else {
74
83
if (value) {
75
- for (size_t i = InputWidth_, j = 0 ; i < OutputWidth_; i++, j ++) {
76
- AddValue (value.GetElement (j ), i );
84
+ for (size_t i = 0 ; builderIndex < OutputWidth_; i++) {
85
+ AddValue (value.GetElement (i ), builderIndex++ );
77
86
}
78
87
} else {
79
- for ( size_t i = InputWidth_; i < OutputWidth_; i++ ) {
80
- AddValue (value, i );
88
+ while (builderIndex < OutputWidth_) {
89
+ AddValue (value, builderIndex++ );
81
90
}
82
91
}
83
92
}
@@ -164,6 +173,7 @@ class TBlockJoinState : public TBlockState {
164
173
size_t InputWidth_;
165
174
size_t OutputWidth_;
166
175
TUnboxedValueVector Inputs_;
176
+ const TVector<ui32> LeftIOMap_;
167
177
const std::vector<arrow::ValueDescr> InputsDescr_;
168
178
TVector<std::unique_ptr<IBlockReader>> Readers_;
169
179
TVector<std::unique_ptr<IBlockItemConverter>> Converters_;
@@ -178,12 +188,13 @@ using TState = TBlockJoinState<RightRequired>;
178
188
public:
179
189
TBlockWideMapJoinWrapper (TComputationMutables& mutables,
180
190
const TVector<TType*>&& resultJoinItems, const TVector<TType*>&& leftFlowItems,
181
- TVector<ui32>&& leftKeyColumns,
191
+ const TVector<ui32>&& leftKeyColumns, const TVector<ui32>&& leftIOMap ,
182
192
IComputationWideFlowNode* flow, IComputationNode* dict)
183
193
: TBaseComputation(mutables, flow, EValueRepresentation::Boxed)
184
194
, ResultJoinItems_(std::move(resultJoinItems))
185
195
, LeftFlowItems_(std::move(leftFlowItems))
186
196
, LeftKeyColumns_(std::move(leftKeyColumns))
197
+ , LeftIOMap_(leftIOMap)
187
198
, Flow_(flow)
188
199
, Dict_(dict)
189
200
, WideFieldsIndex_(mutables.IncrementWideFieldsIndex(LeftFlowItems_.size()))
@@ -248,7 +259,8 @@ using TState = TBlockJoinState<RightRequired>;
248
259
}
249
260
250
261
void MakeState (TComputationContext& ctx, NUdf::TUnboxedValue& state) const {
251
- state = ctx.HolderFactory .Create <TState>(ctx, LeftFlowItems_, ResultJoinItems_, ctx.WideFields .data () + WideFieldsIndex_);
262
+ state = ctx.HolderFactory .Create <TState>(ctx, LeftFlowItems_, LeftIOMap_,
263
+ ResultJoinItems_, ctx.WideFields .data () + WideFieldsIndex_);
252
264
}
253
265
254
266
TState& GetState (NUdf::TUnboxedValue& state, TComputationContext& ctx) const {
@@ -267,6 +279,7 @@ using TState = TBlockJoinState<RightRequired>;
267
279
const TVector<TType*> ResultJoinItems_;
268
280
const TVector<TType*> LeftFlowItems_;
269
281
const TVector<ui32> LeftKeyColumns_;
282
+ const TVector<ui32> LeftIOMap_;
270
283
IComputationWideFlowNode* const Flow_;
271
284
IComputationNode* const Dict_;
272
285
ui32 WideFieldsIndex_;
@@ -280,12 +293,13 @@ using TState = TBlockJoinState<RightRequired>;
280
293
public:
281
294
TBlockWideMultiMapJoinWrapper (TComputationMutables& mutables,
282
295
const TVector<TType*>&& resultJoinItems, const TVector<TType*>&& leftFlowItems,
283
- TVector<ui32>&& leftKeyColumns,
296
+ const TVector<ui32>&& leftKeyColumns, const TVector<ui32>&& leftIOMap ,
284
297
IComputationWideFlowNode* flow, IComputationNode* dict)
285
298
: TBaseComputation(mutables, flow, EValueRepresentation::Boxed, EValueRepresentation::Boxed)
286
299
, ResultJoinItems_(std::move(resultJoinItems))
287
300
, LeftFlowItems_(std::move(leftFlowItems))
288
301
, LeftKeyColumns_(std::move(leftKeyColumns))
302
+ , LeftIOMap_(leftIOMap)
289
303
, Flow_(flow)
290
304
, Dict_(dict)
291
305
, WideFieldsIndex_(mutables.IncrementWideFieldsIndex(LeftFlowItems_.size()))
@@ -357,7 +371,8 @@ using TState = TBlockJoinState<RightRequired>;
357
371
}
358
372
359
373
void MakeState (TComputationContext& ctx, NUdf::TUnboxedValue& state) const {
360
- state = ctx.HolderFactory .Create <TState>(ctx, LeftFlowItems_, ResultJoinItems_, ctx.WideFields .data () + WideFieldsIndex_);
374
+ state = ctx.HolderFactory .Create <TState>(ctx, LeftFlowItems_, LeftIOMap_,
375
+ ResultJoinItems_, ctx.WideFields .data () + WideFieldsIndex_);
361
376
}
362
377
363
378
TState& GetState (NUdf::TUnboxedValue& state, TComputationContext& ctx) const {
@@ -413,6 +428,7 @@ using TState = TBlockJoinState<RightRequired>;
413
428
const TVector<TType*> ResultJoinItems_;
414
429
const TVector<TType*> LeftFlowItems_;
415
430
const TVector<ui32> LeftKeyColumns_;
431
+ const TVector<ui32> LeftIOMap_;
416
432
IComputationWideFlowNode* const Flow_;
417
433
IComputationNode* const Dict_;
418
434
ui32 WideFieldsIndex_;
@@ -421,7 +437,7 @@ using TState = TBlockJoinState<RightRequired>;
421
437
} // namespace
422
438
423
439
IComputationNode* WrapBlockMapJoinCore (TCallable& callable, const TComputationNodeFactoryContext& ctx) {
424
- MKQL_ENSURE (callable.GetInputsCount () == 4 , " Expected 4 args" );
440
+ MKQL_ENSURE (callable.GetInputsCount () == 5 , " Expected 5 args" );
425
441
426
442
const auto joinType = callable.GetType ()->GetReturnType ();
427
443
MKQL_ENSURE (joinType->IsFlow (), " Expected WideFlow as a resulting stream" );
@@ -459,16 +475,42 @@ IComputationNode* WrapBlockMapJoinCore(TCallable& callable, const TComputationNo
459
475
Y_ENSURE (joinKind == EJoinKind::Inner || joinKind == EJoinKind::Left ||
460
476
joinKind == EJoinKind::LeftSemi || joinKind == EJoinKind::LeftOnly);
461
477
462
- const auto tupleLiteral = AS_VALUE (TTupleLiteral, callable.GetInput (3 ));
478
+ const auto keyColumnsLiteral = callable.GetInput (3 );
479
+ const auto keyColumnsTuple = AS_VALUE (TTupleLiteral, keyColumnsLiteral);
463
480
TVector<ui32> leftKeyColumns;
464
- leftKeyColumns.reserve (tupleLiteral ->GetValuesCount ());
465
- for (ui32 i = 0 ; i < tupleLiteral ->GetValuesCount (); i++) {
466
- const auto item = AS_VALUE (TDataLiteral, tupleLiteral ->GetValue (i));
481
+ leftKeyColumns.reserve (keyColumnsTuple ->GetValuesCount ());
482
+ for (ui32 i = 0 ; i < keyColumnsTuple ->GetValuesCount (); i++) {
483
+ const auto item = AS_VALUE (TDataLiteral, keyColumnsTuple ->GetValue (i));
467
484
leftKeyColumns.emplace_back (item->AsValue ().Get <ui32>());
468
485
}
469
486
// TODO: Handle multi keys.
470
487
Y_ENSURE (leftKeyColumns.size () == 1 );
471
488
489
+ const auto keyDropsLiteral = callable.GetInput (4 );
490
+ const auto keyDropsTuple = AS_VALUE (TTupleLiteral, keyDropsLiteral);
491
+ THashSet<ui32> leftKeyDrops;
492
+ leftKeyDrops.reserve (keyDropsTuple->GetValuesCount ());
493
+ for (ui32 i = 0 ; i < keyDropsTuple->GetValuesCount (); i++) {
494
+ const auto item = AS_VALUE (TDataLiteral, keyDropsTuple->GetValue (i));
495
+ leftKeyDrops.emplace (item->AsValue ().Get <ui32>());
496
+ }
497
+
498
+ const THashSet<ui32> leftKeySet (leftKeyColumns.cbegin (), leftKeyColumns.cend ());
499
+ for (const auto & drop : leftKeyDrops) {
500
+ MKQL_ENSURE (leftKeySet.contains (drop),
501
+ " Only key columns has to be specified in drop column set" );
502
+
503
+ }
504
+
505
+ TVector<ui32> leftIOMap;
506
+ // XXX: Mind the last wide item, containing block length.
507
+ for (size_t i = 0 ; i < leftFlowItems.size () - 1 ; i++) {
508
+ if (leftKeyDrops.contains (i)) {
509
+ continue ;
510
+ }
511
+ leftIOMap.push_back (i);
512
+ }
513
+
472
514
const auto flow = LocateNode (ctx.NodeLocator , callable, 0 );
473
515
const auto dict = LocateNode (ctx.NodeLocator , callable, 1 );
474
516
@@ -477,28 +519,34 @@ IComputationNode* WrapBlockMapJoinCore(TCallable& callable, const TComputationNo
477
519
case EJoinKind::Inner:
478
520
if (isMulti) {
479
521
return new TBlockWideMultiMapJoinWrapper<true >(ctx.Mutables ,
480
- std::move (joinItems), std::move (leftFlowItems), std::move (leftKeyColumns),
522
+ std::move (joinItems), std::move (leftFlowItems),
523
+ std::move (leftKeyColumns), std::move (leftIOMap),
481
524
static_cast <IComputationWideFlowNode*>(flow), dict);
482
525
}
483
526
return new TBlockWideMapJoinWrapper<false , true >(ctx.Mutables ,
484
- std::move (joinItems), std::move (leftFlowItems), std::move (leftKeyColumns),
527
+ std::move (joinItems), std::move (leftFlowItems),
528
+ std::move (leftKeyColumns), std::move (leftIOMap),
485
529
static_cast <IComputationWideFlowNode*>(flow), dict);
486
530
case EJoinKind::Left:
487
531
if (isMulti) {
488
532
return new TBlockWideMultiMapJoinWrapper<false >(ctx.Mutables ,
489
- std::move (joinItems), std::move (leftFlowItems), std::move (leftKeyColumns),
533
+ std::move (joinItems), std::move (leftFlowItems),
534
+ std::move (leftKeyColumns), std::move (leftIOMap),
490
535
static_cast <IComputationWideFlowNode*>(flow), dict);
491
536
}
492
537
return new TBlockWideMapJoinWrapper<false , false >(ctx.Mutables ,
493
- std::move (joinItems), std::move (leftFlowItems), std::move (leftKeyColumns),
538
+ std::move (joinItems), std::move (leftFlowItems),
539
+ std::move (leftKeyColumns), std::move (leftIOMap),
494
540
static_cast <IComputationWideFlowNode*>(flow), dict);
495
541
case EJoinKind::LeftSemi:
496
542
return new TBlockWideMapJoinWrapper<true , true >(ctx.Mutables ,
497
- std::move (joinItems), std::move (leftFlowItems), std::move (leftKeyColumns),
543
+ std::move (joinItems), std::move (leftFlowItems),
544
+ std::move (leftKeyColumns), std::move (leftIOMap),
498
545
static_cast <IComputationWideFlowNode*>(flow), dict);
499
546
case EJoinKind::LeftOnly:
500
547
return new TBlockWideMapJoinWrapper<true , false >(ctx.Mutables ,
501
- std::move (joinItems), std::move (leftFlowItems), std::move (leftKeyColumns),
548
+ std::move (joinItems), std::move (leftFlowItems),
549
+ std::move (leftKeyColumns), std::move (leftIOMap),
502
550
static_cast <IComputationWideFlowNode*>(flow), dict);
503
551
default :
504
552
MKQL_ENSURE (false , " BlockMapJoinCore doesn't support %s join type"
0 commit comments