@@ -55,14 +55,42 @@ const TRuntimeNode MakeDict(TProgramBuilder& pgmBuilder,
55
55
});
56
56
}
57
57
58
+ // XXX: Copy-pasted from program builder sources. Adjusted on demand.
59
+ const std::vector<TType*> ValidateBlockStreamType (const TType* streamType) {
60
+ const auto wideComponents = GetWideComponents (AS_TYPE (TStreamType, streamType));
61
+ Y_ENSURE (wideComponents.size () > 0 , " Expected at least one column" );
62
+ std::vector<TType*> items;
63
+ items.reserve (wideComponents.size ());
64
+ // XXX: Declare these variables outside the loop body to use for the last
65
+ // item (i.e. block length column) in the assertions below.
66
+ bool isScalar;
67
+ TType* itemType;
68
+ for (const auto & wideComponent : wideComponents) {
69
+ auto blockType = AS_TYPE (TBlockType, wideComponent);
70
+ isScalar = blockType->GetShape () == TBlockType::EShape::Scalar;
71
+ itemType = blockType->GetItemType ();
72
+ items.push_back (blockType);
73
+ }
74
+
75
+ Y_ENSURE (isScalar, " Last column should be scalar" );
76
+ Y_ENSURE (AS_TYPE (TDataType, itemType)->GetSchemeType () == NUdf::TDataType<ui64>::Id, " Expected Uint64" );
77
+ return items;
78
+ }
79
+
80
+ bool IsOptionalOrNull (const TType* type) {
81
+ return type->IsOptional () || type->IsNull () || type->IsPg ();
82
+ }
83
+
58
84
const TRuntimeNode BuildBlockJoin (TProgramBuilder& pgmBuilder, EJoinKind joinKind,
59
85
const TVector<ui32>& leftKeyColumns, const TVector<ui32>& leftKeyDrops,
60
86
TRuntimeNode& leftArg, TType* leftTuple, const TRuntimeNode& dictNode
61
87
) {
88
+ // 1. Make left argument node.
62
89
const auto tupleType = AS_TYPE (TTupleType, leftTuple);
63
90
const auto listTupleType = pgmBuilder.NewListType (leftTuple);
64
91
leftArg = pgmBuilder.Arg (listTupleType);
65
92
93
+ // 2. Make left wide stream node.
66
94
const auto leftWideStream = pgmBuilder.FromFlow (pgmBuilder.ExpandMap (pgmBuilder.ToFlow (leftArg),
67
95
[&](TRuntimeNode tupleNode) -> TRuntimeNode::TList {
68
96
TRuntimeNode::TList wide;
@@ -73,8 +101,53 @@ const TRuntimeNode BuildBlockJoin(TProgramBuilder& pgmBuilder, EJoinKind joinKin
73
101
return wide;
74
102
}));
75
103
104
+ // 3. Calculate the resulting join type.
105
+ const auto leftStreamItems = ValidateBlockStreamType (leftWideStream.GetStaticType ());
106
+ const THashSet<ui32> leftKeyDropsSet (leftKeyDrops.cbegin (), leftKeyDrops.cend ());
107
+ TVector<TType*> returnJoinItems;
108
+ for (size_t i = 0 ; i < leftStreamItems.size (); i++) {
109
+ if (leftKeyDropsSet.contains (i)) {
110
+ continue ;
111
+ }
112
+ returnJoinItems.push_back (leftStreamItems[i]);
113
+ }
114
+
115
+ const auto payloadType = AS_TYPE (TDictType, dictNode.GetStaticType ())->GetPayloadType ();
116
+ const auto payloadItemType = payloadType->IsList ()
117
+ ? AS_TYPE (TListType, payloadType)->GetItemType ()
118
+ : payloadType;
119
+ if (joinKind == EJoinKind::Inner || joinKind == EJoinKind::Left) {
120
+ // XXX: This is the contract ensured by the expression compiler and
121
+ // optimizers to ease the processing of the dict payload in wide context.
122
+ Y_ENSURE (payloadItemType->IsTuple (), " Dict payload has to be a Tuple" );
123
+ const auto payloadItems = AS_TYPE (TTupleType, payloadItemType)->GetElements ();
124
+ TVector<TType*> dictBlockItems;
125
+ dictBlockItems.reserve (payloadItems.size ());
126
+ for (const auto & payloadItem : payloadItems) {
127
+ MKQL_ENSURE (!payloadItem->IsBlock (), " Dict payload item has to be non-block" );
128
+ const auto itemType = joinKind == EJoinKind::Inner ? payloadItem
129
+ : IsOptionalOrNull (payloadItem) ? payloadItem
130
+ : pgmBuilder.NewOptionalType (payloadItem);
131
+ dictBlockItems.emplace_back (pgmBuilder.NewBlockType (itemType, TBlockType::EShape::Many));
132
+ }
133
+ // Block length column has to be the last column in wide block stream item,
134
+ // so all contents of the dict payload should be appended to the resulting
135
+ // wide type before the block size column.
136
+ const auto blockLenPos = std::prev (returnJoinItems.end ());
137
+ returnJoinItems.insert (blockLenPos, dictBlockItems.cbegin (), dictBlockItems.cend ());
138
+ } else {
139
+ // XXX: This is the contract ensured by the expression compiler and
140
+ // optimizers for join types that don't require the right (i.e. dict) part.
141
+ Y_ENSURE (payloadItemType->IsVoid (), " Dict payload has to be Void" );
142
+ }
143
+ TType* returnJoinType = pgmBuilder.NewStreamType (pgmBuilder.NewMultiType (returnJoinItems));
144
+
145
+ // 4. Build BlockMapJoinCore node.
76
146
const auto joinNode = pgmBuilder.BlockMapJoinCore (leftWideStream, dictNode, joinKind,
77
- leftKeyColumns, leftKeyDrops);
147
+ leftKeyColumns, leftKeyDrops,
148
+ returnJoinType);
149
+
150
+ // 5. Build the root node with list of tuples.
78
151
const auto joinItems = GetWideComponents (AS_TYPE (TStreamType, joinNode.GetStaticType ()));
79
152
const auto resultType = AS_TYPE (TTupleType, pgmBuilder.NewTupleType (joinItems));
80
153
0 commit comments