@@ -48,7 +48,7 @@ class TTableHistogramBuilderBtreeIndex {
48
48
{
49
49
}
50
50
51
- TString ToString () const noexcept {
51
+ TString ToString (const TKeyCellDefaults &keyDefaults ) const {
52
52
return TStringBuilder ()
53
53
<< " Part: " << Part->Label .ToString ()
54
54
<< " PageId: " << PageId
@@ -57,8 +57,8 @@ class TTableHistogramBuilderBtreeIndex {
57
57
<< " EndRowId: " << EndRowId
58
58
<< " BeginDataSize: " << BeginDataSize
59
59
<< " EndDataSize: " << EndDataSize
60
- << " BeginKey: " << BeginKey. Count ( )
61
- << " EndKey: " << EndKey. Count ( )
60
+ << " BeginKey: " << NFmt::Do (BeginKey, keyDefaults )
61
+ << " EndKey: " << NFmt::Do (EndKey, keyDefaults )
62
62
<< " State: " << (ui32)State;
63
63
}
64
64
@@ -130,15 +130,17 @@ class TTableHistogramBuilderBtreeIndex {
130
130
};
131
131
132
132
struct TEvent {
133
- TCellsIterable Key;
134
- bool IsBegin;
135
133
TNodeState* Node;
134
+ bool IsBegin;
136
135
137
- TString ToString () const noexcept {
136
+ TString ToString (const TKeyCellDefaults &keyDefaults ) const {
138
137
return TStringBuilder ()
139
- << Node->ToString ()
140
- << " IsBegin: " << IsBegin
141
- << " Key: " << Key.Count ();
138
+ << " IsBegin: " << IsBegin
139
+ << " " << Node->ToString (keyDefaults);
140
+ }
141
+
142
+ const TCellsIterable& GetKey () const {
143
+ return IsBegin ? Node->BeginKey : Node->EndKey ;
142
144
}
143
145
};
144
146
@@ -149,7 +151,7 @@ class TTableHistogramBuilderBtreeIndex {
149
151
return Compare (a, b) > 0 ;
150
152
}
151
153
152
- i8 Compare (const TEvent& a, const TEvent& b) const noexcept {
154
+ int Compare (const TEvent& a, const TEvent& b) const {
153
155
// events go in order:
154
156
// - Key = {}, IsBegin = true
155
157
// - ...
@@ -161,13 +163,16 @@ class TTableHistogramBuilderBtreeIndex {
161
163
// - ...
162
164
// - Key = {}, IsBegin = false
163
165
164
- if (a.Key && b.Key ) { // compare by keys
165
- auto cmp = CompareKeys (a.Key , b.Key , KeyDefaults);
166
+ // end goes before begin in order to
167
+ // close previous node before open the next one
168
+
169
+ if (a.GetKey () && b.GetKey ()) { // compare by keys
170
+ auto cmp = CompareKeys (a.GetKey (), b.GetKey (), KeyDefaults);
166
171
if (cmp != 0 ) {
167
172
return cmp;
168
173
}
169
174
// keys are the same, compare by begin flag, end events first:
170
- return Compare (a.IsBegin ? 1 : -1 , b.IsBegin ? 1 : -1 );
175
+ return Compare (a.IsBegin ? + 1 : -1 , b.IsBegin ? + 1 : -1 );
171
176
}
172
177
173
178
// category = -1 for Key = { }, IsBegin = true
@@ -177,14 +182,14 @@ class TTableHistogramBuilderBtreeIndex {
177
182
}
178
183
179
184
private:
180
- static i8 GetCategory (const TEvent& a) noexcept {
181
- if (a.Key ) {
185
+ static int GetCategory (const TEvent& a) {
186
+ if (a.GetKey () ) {
182
187
return 0 ;
183
188
}
184
189
return a.IsBegin ? -1 : +1 ;
185
190
}
186
191
187
- static i8 Compare (i8 a, i8 b) noexcept {
192
+ static int Compare (int a, int b) {
188
193
if (a < b) return -1 ;
189
194
if (a > b) return +1 ;
190
195
return 0 ;
@@ -226,6 +231,9 @@ class TTableHistogramBuilderBtreeIndex {
226
231
227
232
for (auto index : xrange (Subset.Flatten .size ())) {
228
233
auto & part = Subset.Flatten [index];
234
+ if (part.Slices ) {
235
+ LOG_BUILD_STATS (" slicing part " << part->Label << " : " << NFmt::Do (*part.Slices , KeyDefaults));
236
+ }
229
237
auto & meta = part->IndexPages .GetBTree ({});
230
238
TCellsIterable beginKey = EmptyKey;
231
239
if (part.Slices && part.Slices ->front ().FirstKey .GetCells ()) {
@@ -235,7 +243,7 @@ class TTableHistogramBuilderBtreeIndex {
235
243
if (part.Slices && part.Slices ->back ().LastKey .GetCells ()) {
236
244
endKey = MakeCellsIterableKey (part.Part .Get (), part.Slices ->back ().LastKey );
237
245
}
238
- LoadedStateNodes.emplace_back (part.Part .Get (), meta.GetPageId (), meta.LevelCount , 0 , meta.GetRowCount (), 0 , meta.GetDataSize (), beginKey, endKey);
246
+ LoadedStateNodes.emplace_back (part.Part .Get (), meta.GetPageId (), meta.LevelCount , 0 , meta.GetRowCount (), 0 , meta.GetTotalDataSize (), beginKey, endKey);
239
247
ready &= SlicePart (*part.Slices , LoadedStateNodes.back ());
240
248
}
241
249
@@ -261,13 +269,13 @@ class TTableHistogramBuilderBtreeIndex {
261
269
262
270
if (it == slices.end () || node.EndRowId <= it->BeginRowId () || it->EndRowId () <= node.BeginRowId ) {
263
271
// skip the node
264
- LOG_BUILD_STATS (" slicing node " << node.ToString () << " => skip" );
272
+ LOG_BUILD_STATS (" slicing node " << node.ToString (KeyDefaults ) << " => skip" );
265
273
return true ;
266
274
}
267
275
268
276
if (it->BeginRowId () <= node.BeginRowId && node.EndRowId <= it->EndRowId ()) {
269
277
// take the node
270
- LOG_BUILD_STATS (" slicing node " << node.ToString () << " => take" );
278
+ LOG_BUILD_STATS (" slicing node " << node.ToString (KeyDefaults ) << " => take" );
271
279
AddFutureEvents (node);
272
280
return true ;
273
281
}
@@ -278,17 +286,20 @@ class TTableHistogramBuilderBtreeIndex {
278
286
// can't split, decide by node.EndRowId - 1
279
287
// TODO: decide by non-empty slice and node intersection, but this requires size calculation changes too
280
288
if (it->Has (node.EndRowId - 1 )) {
281
- LOG_BUILD_STATS (" slicing node " << node.ToString () << " => take root" );
289
+ LOG_BUILD_STATS (" slicing node " << node.ToString (KeyDefaults) << " => take leaf" );
290
+ // the slice may start after node begin, shift the node begin to make it more sensible
291
+ node.BeginRowId = it->BeginRowId ();
292
+ node.BeginKey = MakeCellsIterableKey (node.Part , it->FirstKey );
282
293
AddFutureEvents (node);
283
294
} else {
284
- LOG_BUILD_STATS (" slicing node " << node.ToString () << " => skip root " );
295
+ LOG_BUILD_STATS (" slicing node " << node.ToString (KeyDefaults ) << " => skip leaf " );
285
296
}
286
297
return true ;
287
298
}
288
299
289
300
bool ready = true ;
290
301
291
- LOG_BUILD_STATS (" slicing node " << node.ToString () << " => split" );
302
+ LOG_BUILD_STATS (" slicing node " << node.ToString (KeyDefaults ) << " => split" );
292
303
const auto addNode = [&](TNodeState& child) {
293
304
ready &= SlicePart (slices, child);
294
305
};
@@ -341,10 +352,10 @@ class TTableHistogramBuilderBtreeIndex {
341
352
<< " openedSortedByRowCount: " << openedSortedByRowCount.size ()
342
353
<< " openedSortedByDataSize: " << openedSortedByDataSize.size ()
343
354
<< " FutureEvents: " << FutureEvents.size ()
344
- << " currentKeyPointer: " << currentKeyPointer.ToString ());
355
+ << " currentKeyPointer: " << currentKeyPointer.ToString (KeyDefaults ));
345
356
346
357
auto processEvent = [&](const TEvent& event) {
347
- LOG_BUILD_STATS (" processing event " << event.ToString ());
358
+ LOG_BUILD_STATS (" processing event " << event.ToString (KeyDefaults ));
348
359
Y_DEBUG_ABORT_UNLESS (NodeEventKeyGreater.Compare (event, currentKeyPointer) <= 0 , " Can't process future events" );
349
360
if (event.IsBegin ) {
350
361
if (event.Node ->Open (openedRowCount, openedDataSize)) {
@@ -370,7 +381,7 @@ class TTableHistogramBuilderBtreeIndex {
370
381
// TODO: skip all closed nodes and don't process them here
371
382
// TODO: don't compare each node key and replace it with parentNode.Seek(currentKeyPointer)
372
383
auto cmp = NodeEventKeyGreater.Compare (event, currentKeyPointer);
373
- LOG_BUILD_STATS (" adding event " << (i32 )cmp << " " << event.ToString ());
384
+ LOG_BUILD_STATS (" adding event " << (i32 )cmp << " " << event.ToString (KeyDefaults ));
374
385
if (cmp <= 0 ) { // event happened
375
386
processEvent (event);
376
387
if (cmp == 0 ) {
@@ -381,8 +392,8 @@ class TTableHistogramBuilderBtreeIndex {
381
392
}
382
393
};
383
394
const auto addNode = [&](TNodeState& node) {
384
- addEvent (TEvent{node. BeginKey , true , &node });
385
- addEvent (TEvent{node. EndKey , false , &node });
395
+ addEvent (TEvent{& node, true });
396
+ addEvent (TEvent{& node, false });
386
397
};
387
398
388
399
// may safely skip current key pointer and go further only if at the next iteration
@@ -395,7 +406,7 @@ class TTableHistogramBuilderBtreeIndex {
395
406
openedSortedByRowCount.pop ();
396
407
397
408
LOG_BUILD_STATS (" loading node by row count trigger"
398
- << node->ToString ()
409
+ << node->ToString (KeyDefaults )
399
410
<< " closedRowCount: " << closedRowCount
400
411
<< " openedRowCount: " << openedRowCount
401
412
<< " nextHistogramRowCount: " << nextHistogramRowCount);
@@ -413,7 +424,7 @@ class TTableHistogramBuilderBtreeIndex {
413
424
openedSortedByDataSize.pop ();
414
425
415
426
LOG_BUILD_STATS (" loading node by data size trigger"
416
- << node->ToString ()
427
+ << node->ToString (KeyDefaults )
417
428
<< " closedDataSize: " << closedDataSize
418
429
<< " openedDataSize: " << openedDataSize
419
430
<< " nextHistogramDataSize: " << nextHistogramDataSize);
@@ -439,7 +450,7 @@ class TTableHistogramBuilderBtreeIndex {
439
450
<< " openedSortedByRowCount: " << openedSortedByRowCount.size ()
440
451
<< " openedSortedByDataSize: " << openedSortedByDataSize.size ()
441
452
<< " FutureEvents: " << FutureEvents.size ()
442
- << " currentKeyPointer: " << currentKeyPointer.ToString ());
453
+ << " currentKeyPointer: " << currentKeyPointer.ToString (KeyDefaults ));
443
454
444
455
// add current key pointer to a histogram if we either:
445
456
// - failed to split opened nodes and may exceed a next histogram bucket value (plus its gaps)
@@ -449,7 +460,7 @@ class TTableHistogramBuilderBtreeIndex {
449
460
// - minus size of all nodes that start at current key pointer
450
461
// - plus half of size of all ohter opened nodes (as they exact position is unknown)
451
462
// also check that current key pointer value is > then last presented value in a histogram
452
- if (currentKeyPointer.Key ) {
463
+ if (currentKeyPointer.GetKey () ) {
453
464
if (nextHistogramRowCount != Max<ui64>()) {
454
465
if (closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap || closedRowCount > nextHistogramRowCount - RowCountResolutionGap) {
455
466
ui64 currentKeyRowCountOpens = 0 ;
@@ -461,7 +472,7 @@ class TTableHistogramBuilderBtreeIndex {
461
472
Y_ABORT_UNLESS (currentKeyRowCountOpens <= openedRowCount);
462
473
ui64 currentKeyPointerRowCount = closedRowCount + (openedRowCount - currentKeyRowCountOpens) / 2 ;
463
474
if ((stats.RowCountHistogram .empty () ? 0 : stats.RowCountHistogram .back ().Value ) < currentKeyPointerRowCount && currentKeyPointerRowCount < stats.RowCount ) {
464
- AddKey (stats.RowCountHistogram , currentKeyPointer.Key , currentKeyPointerRowCount);
475
+ AddKey (stats.RowCountHistogram , currentKeyPointer.GetKey () , currentKeyPointerRowCount);
465
476
nextHistogramRowCount = Max (currentKeyPointerRowCount + 1 , nextHistogramRowCount + RowCountResolution);
466
477
if (nextHistogramRowCount + RowCountResolutionGap > stats.RowCount ) {
467
478
nextHistogramRowCount = Max<ui64>();
@@ -480,7 +491,7 @@ class TTableHistogramBuilderBtreeIndex {
480
491
Y_ABORT_UNLESS (currentKeyDataSizeOpens <= openedDataSize);
481
492
ui64 currentKeyPointerDataSize = closedDataSize + (openedDataSize - currentKeyDataSizeOpens) / 2 ;
482
493
if ((stats.DataSizeHistogram .empty () ? 0 : stats.DataSizeHistogram .back ().Value ) < currentKeyPointerDataSize && currentKeyPointerDataSize < stats.DataSize .Size ) {
483
- AddKey (stats.DataSizeHistogram , currentKeyPointer.Key , currentKeyPointerDataSize);
494
+ AddKey (stats.DataSizeHistogram , currentKeyPointer.GetKey () , currentKeyPointerDataSize);
484
495
nextHistogramDataSize = Max (currentKeyPointerDataSize + 1 , nextHistogramDataSize + DataSizeResolution);
485
496
if (nextHistogramDataSize + DataSizeResolutionGap > stats.DataSize .Size ) {
486
497
nextHistogramDataSize = Max<ui64>();
@@ -507,7 +518,7 @@ class TTableHistogramBuilderBtreeIndex {
507
518
return true ;
508
519
}
509
520
510
- void AddKey (THistogram& histogram, TCellsIterable& key, ui64 value) {
521
+ void AddKey (THistogram& histogram, const TCellsIterable& key, ui64 value) {
511
522
TVector<TCell> keyCells;
512
523
513
524
// add columns that are present in the part:
@@ -555,8 +566,14 @@ class TTableHistogramBuilderBtreeIndex {
555
566
}
556
567
557
568
void AddFutureEvents (TNodeState& node) {
558
- FutureEvents.push (TEvent{node.BeginKey , true , &node});
559
- FutureEvents.push (TEvent{node.EndKey , false , &node});
569
+ auto cmp = NodeEventKeyGreater.Compare (TEvent{&node, true }, TEvent{&node, false });
570
+ LOG_BUILD_STATS (" adding node future events " << (i32 )cmp << " " << node.ToString (KeyDefaults));
571
+ if (node.GetRowCount () > 1 ) {
572
+ Y_DEBUG_ABORT_UNLESS (cmp < 0 );
573
+ }
574
+
575
+ FutureEvents.push (TEvent{&node, true });
576
+ FutureEvents.push (TEvent{&node, false });
560
577
}
561
578
562
579
private:
0 commit comments