Skip to content

Commit 464a31d

Browse files
authored
25-1 Fix B-Tree histograms comparator result overflow (#15819)
1 parent dda41e5 commit 464a31d

9 files changed

+112
-43
lines changed

ydb/core/tablet_flat/flat_page_btree_index.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <ydb/core/base/defs.h>
4+
#include <ydb/core/scheme/scheme_tablecell.h>
45
#include <util/generic/bitmap.h>
56
#include "flat_page_base.h"
67
#include "flat_page_label.h"
@@ -282,6 +283,23 @@ namespace NKikimr::NTable::NPage {
282283
return Count() > 0;
283284
}
284285

286+
void Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const
287+
{
288+
out << '{';
289+
290+
auto iter = Iter();
291+
for (TPos pos : xrange(iter.Count())) {
292+
if (pos != 0) {
293+
out << ", ";
294+
}
295+
TString value;
296+
DbgPrintValue(value, iter.Next(), keyDefaults.BasicTypes()[pos]);
297+
out << value;
298+
}
299+
300+
out << '}';
301+
}
302+
285303
private:
286304
const TIsNullBitmap* IsNullBitmap;
287305
TColumns Columns;

ydb/core/tablet_flat/flat_part_dump.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ namespace {
281281

282282
void TDump::Key(TCellsRef key, const TPartScheme &scheme) noexcept
283283
{
284-
Out << "(";
284+
Out << "{";
285285

286286
for (auto off : xrange(key.size())) {
287287
TString str;
@@ -291,7 +291,7 @@ namespace {
291291
Out << (off ? ", " : "") << str;
292292
}
293293

294-
Out << ")";
294+
Out << "}";
295295
}
296296

297297
void TDump::BTreeIndexNode(const TPart &part, NPage::TBtreeIndexNode::TChild meta, ui32 level) noexcept

ydb/core/tablet_flat/flat_part_slice.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ void TSlice::Describe(IOutputStream& out) const noexcept
183183
{
184184
out << (FirstInclusive ? '[' : '(');
185185
out << FirstRowId;
186-
out << ',';
186+
out << ", ";
187187
if (LastRowId != Max<TRowId>()) {
188188
out << LastRowId;
189189
} else {
@@ -192,6 +192,15 @@ void TSlice::Describe(IOutputStream& out) const noexcept
192192
out << (LastInclusive ? ']' : ')');
193193
}
194194

195+
void TSlice::Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const
196+
{
197+
out << "{rows: ";
198+
Describe(out);
199+
out << " keys: ";
200+
TBounds::Describe(out, keyDefaults);
201+
out << "}";
202+
}
203+
195204
void TSlices::Describe(IOutputStream& out) const noexcept
196205
{
197206
bool first = true;
@@ -206,6 +215,20 @@ void TSlices::Describe(IOutputStream& out) const noexcept
206215
out << (first ? "}" : " }");
207216
}
208217

218+
void TSlices::Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const
219+
{
220+
bool first = true;
221+
out << "{ ";
222+
for (const auto& bounds : *this) {
223+
if (first)
224+
first = false;
225+
else
226+
out << ", ";
227+
bounds.Describe(out, keyDefaults);
228+
}
229+
out << (first ? "}" : " }");
230+
}
231+
209232
void TSlices::Validate() const noexcept
210233
{
211234
TRowId lastEnd = 0;

ydb/core/tablet_flat/flat_part_slice.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ namespace NTable {
133133
}
134134

135135
void Describe(IOutputStream& out) const noexcept;
136+
void Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const;
136137

137138
/**
138139
* Returns true if first row of a is less than first row of b
@@ -328,6 +329,8 @@ namespace NTable {
328329

329330
void Describe(IOutputStream& out) const noexcept;
330331

332+
void Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const;
333+
331334
/**
332335
* Validate slices are correct, crash otherwise
333336
*/

ydb/core/tablet_flat/flat_part_writer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ namespace NTable {
969969

970970
TPos it;
971971
for (it = 0; it < Key.size(); it++) {
972-
if (int cmp = CompareTypedCells(PrevPageLastKey[it], Key[it], layout.KeyTypes[it])) {
972+
if (CompareTypedCells(PrevPageLastKey[it], Key[it], layout.KeyTypes[it]) != 0) {
973973
break;
974974
}
975975
}

ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class TTableHistogramBuilderBtreeIndex {
4848
{
4949
}
5050

51-
TString ToString() const noexcept {
51+
TString ToString(const TKeyCellDefaults &keyDefaults) const {
5252
return TStringBuilder()
5353
<< "Part: " << Part->Label.ToString()
5454
<< " PageId: " << PageId
@@ -57,8 +57,8 @@ class TTableHistogramBuilderBtreeIndex {
5757
<< " EndRowId: " << EndRowId
5858
<< " BeginDataSize: " << BeginDataSize
5959
<< " EndDataSize: " << EndDataSize
60-
<< " BeginKey: " << BeginKey.Count()
61-
<< " EndKey: " << EndKey.Count()
60+
<< " BeginKey: " << NFmt::Do(BeginKey, keyDefaults)
61+
<< " EndKey: " << NFmt::Do(EndKey, keyDefaults)
6262
<< " State: " << (ui32)State;
6363
}
6464

@@ -130,15 +130,17 @@ class TTableHistogramBuilderBtreeIndex {
130130
};
131131

132132
struct TEvent {
133-
TCellsIterable Key;
134-
bool IsBegin;
135133
TNodeState* Node;
134+
bool IsBegin;
136135

137-
TString ToString() const noexcept {
136+
TString ToString(const TKeyCellDefaults &keyDefaults) const {
138137
return TStringBuilder()
139-
<< Node->ToString()
140-
<< " IsBegin: " << IsBegin
141-
<< " Key: " << Key.Count();
138+
<< "IsBegin: " << IsBegin
139+
<< " " << Node->ToString(keyDefaults);
140+
}
141+
142+
const TCellsIterable& GetKey() const {
143+
return IsBegin ? Node->BeginKey : Node->EndKey;
142144
}
143145
};
144146

@@ -149,7 +151,7 @@ class TTableHistogramBuilderBtreeIndex {
149151
return Compare(a, b) > 0;
150152
}
151153

152-
i8 Compare(const TEvent& a, const TEvent& b) const noexcept {
154+
int Compare(const TEvent& a, const TEvent& b) const {
153155
// events go in order:
154156
// - Key = {}, IsBegin = true
155157
// - ...
@@ -161,13 +163,16 @@ class TTableHistogramBuilderBtreeIndex {
161163
// - ...
162164
// - Key = {}, IsBegin = false
163165

164-
if (a.Key && b.Key) { // compare by keys
165-
auto cmp = CompareKeys(a.Key, b.Key, KeyDefaults);
166+
// end goes before begin in order to
167+
// close previous node before open the next one
168+
169+
if (a.GetKey() && b.GetKey()) { // compare by keys
170+
auto cmp = CompareKeys(a.GetKey(), b.GetKey(), KeyDefaults);
166171
if (cmp != 0) {
167172
return cmp;
168173
}
169174
// keys are the same, compare by begin flag, end events first:
170-
return Compare(a.IsBegin ? 1 : -1, b.IsBegin ? 1 : -1);
175+
return Compare(a.IsBegin ? +1 : -1, b.IsBegin ? +1 : -1);
171176
}
172177

173178
// category = -1 for Key = { }, IsBegin = true
@@ -177,14 +182,14 @@ class TTableHistogramBuilderBtreeIndex {
177182
}
178183

179184
private:
180-
static i8 GetCategory(const TEvent& a) noexcept {
181-
if (a.Key) {
185+
static int GetCategory(const TEvent& a) {
186+
if (a.GetKey()) {
182187
return 0;
183188
}
184189
return a.IsBegin ? -1 : +1;
185190
}
186191

187-
static i8 Compare(i8 a, i8 b) noexcept {
192+
static int Compare(int a, int b) {
188193
if (a < b) return -1;
189194
if (a > b) return +1;
190195
return 0;
@@ -226,6 +231,9 @@ class TTableHistogramBuilderBtreeIndex {
226231

227232
for (auto index : xrange(Subset.Flatten.size())) {
228233
auto& part = Subset.Flatten[index];
234+
if (part.Slices) {
235+
LOG_BUILD_STATS("slicing part " << part->Label << ": " << NFmt::Do(*part.Slices, KeyDefaults));
236+
}
229237
auto& meta = part->IndexPages.GetBTree({});
230238
TCellsIterable beginKey = EmptyKey;
231239
if (part.Slices && part.Slices->front().FirstKey.GetCells()) {
@@ -235,7 +243,7 @@ class TTableHistogramBuilderBtreeIndex {
235243
if (part.Slices && part.Slices->back().LastKey.GetCells()) {
236244
endKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->back().LastKey);
237245
}
238-
LoadedStateNodes.emplace_back(part.Part.Get(), meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), 0, meta.GetDataSize(), beginKey, endKey);
246+
LoadedStateNodes.emplace_back(part.Part.Get(), meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), 0, meta.GetTotalDataSize(), beginKey, endKey);
239247
ready &= SlicePart(*part.Slices, LoadedStateNodes.back());
240248
}
241249

@@ -261,13 +269,13 @@ class TTableHistogramBuilderBtreeIndex {
261269

262270
if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) {
263271
// skip the node
264-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => skip");
272+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => skip");
265273
return true;
266274
}
267275

268276
if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) {
269277
// take the node
270-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => take");
278+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => take");
271279
AddFutureEvents(node);
272280
return true;
273281
}
@@ -278,17 +286,20 @@ class TTableHistogramBuilderBtreeIndex {
278286
// can't split, decide by node.EndRowId - 1
279287
// TODO: decide by non-empty slice and node intersection, but this requires size calculation changes too
280288
if (it->Has(node.EndRowId - 1)) {
281-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => take root");
289+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => take leaf");
290+
// the slice may start after node begin, shift the node begin to make it more sensible
291+
node.BeginRowId = it->BeginRowId();
292+
node.BeginKey = MakeCellsIterableKey(node.Part, it->FirstKey);
282293
AddFutureEvents(node);
283294
} else {
284-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => skip root");
295+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => skip leaf");
285296
}
286297
return true;
287298
}
288299

289300
bool ready = true;
290301

291-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => split");
302+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => split");
292303
const auto addNode = [&](TNodeState& child) {
293304
ready &= SlicePart(slices, child);
294305
};
@@ -341,10 +352,10 @@ class TTableHistogramBuilderBtreeIndex {
341352
<< " openedSortedByRowCount: " << openedSortedByRowCount.size()
342353
<< " openedSortedByDataSize: " << openedSortedByDataSize.size()
343354
<< " FutureEvents: " << FutureEvents.size()
344-
<< " currentKeyPointer: " << currentKeyPointer.ToString());
355+
<< " currentKeyPointer: " << currentKeyPointer.ToString(KeyDefaults));
345356

346357
auto processEvent = [&](const TEvent& event) {
347-
LOG_BUILD_STATS("processing event " << event.ToString());
358+
LOG_BUILD_STATS("processing event " << event.ToString(KeyDefaults));
348359
Y_DEBUG_ABORT_UNLESS(NodeEventKeyGreater.Compare(event, currentKeyPointer) <= 0, "Can't process future events");
349360
if (event.IsBegin) {
350361
if (event.Node->Open(openedRowCount, openedDataSize)) {
@@ -370,7 +381,7 @@ class TTableHistogramBuilderBtreeIndex {
370381
// TODO: skip all closed nodes and don't process them here
371382
// TODO: don't compare each node key and replace it with parentNode.Seek(currentKeyPointer)
372383
auto cmp = NodeEventKeyGreater.Compare(event, currentKeyPointer);
373-
LOG_BUILD_STATS("adding event " << (i32)cmp << " " << event.ToString());
384+
LOG_BUILD_STATS("adding event " << (i32)cmp << " " << event.ToString(KeyDefaults));
374385
if (cmp <= 0) { // event happened
375386
processEvent(event);
376387
if (cmp == 0) {
@@ -381,8 +392,8 @@ class TTableHistogramBuilderBtreeIndex {
381392
}
382393
};
383394
const auto addNode = [&](TNodeState& node) {
384-
addEvent(TEvent{node.BeginKey, true, &node});
385-
addEvent(TEvent{node.EndKey, false, &node});
395+
addEvent(TEvent{&node, true});
396+
addEvent(TEvent{&node, false});
386397
};
387398

388399
// may safely skip current key pointer and go further only if at the next iteration
@@ -395,7 +406,7 @@ class TTableHistogramBuilderBtreeIndex {
395406
openedSortedByRowCount.pop();
396407

397408
LOG_BUILD_STATS("loading node by row count trigger"
398-
<< node->ToString()
409+
<< node->ToString(KeyDefaults)
399410
<< " closedRowCount: " << closedRowCount
400411
<< " openedRowCount: " << openedRowCount
401412
<< " nextHistogramRowCount: " << nextHistogramRowCount);
@@ -413,7 +424,7 @@ class TTableHistogramBuilderBtreeIndex {
413424
openedSortedByDataSize.pop();
414425

415426
LOG_BUILD_STATS("loading node by data size trigger"
416-
<< node->ToString()
427+
<< node->ToString(KeyDefaults)
417428
<< " closedDataSize: " << closedDataSize
418429
<< " openedDataSize: " << openedDataSize
419430
<< " nextHistogramDataSize: " << nextHistogramDataSize);
@@ -439,7 +450,7 @@ class TTableHistogramBuilderBtreeIndex {
439450
<< " openedSortedByRowCount: " << openedSortedByRowCount.size()
440451
<< " openedSortedByDataSize: " << openedSortedByDataSize.size()
441452
<< " FutureEvents: " << FutureEvents.size()
442-
<< " currentKeyPointer: " << currentKeyPointer.ToString());
453+
<< " currentKeyPointer: " << currentKeyPointer.ToString(KeyDefaults));
443454

444455
// add current key pointer to a histogram if we either:
445456
// - failed to split opened nodes and may exceed a next histogram bucket value (plus its gaps)
@@ -449,7 +460,7 @@ class TTableHistogramBuilderBtreeIndex {
449460
// - minus size of all nodes that start at current key pointer
450461
// - plus half of size of all ohter opened nodes (as they exact position is unknown)
451462
// also check that current key pointer value is > then last presented value in a histogram
452-
if (currentKeyPointer.Key) {
463+
if (currentKeyPointer.GetKey()) {
453464
if (nextHistogramRowCount != Max<ui64>()) {
454465
if (closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap || closedRowCount > nextHistogramRowCount - RowCountResolutionGap) {
455466
ui64 currentKeyRowCountOpens = 0;
@@ -461,7 +472,7 @@ class TTableHistogramBuilderBtreeIndex {
461472
Y_ABORT_UNLESS(currentKeyRowCountOpens <= openedRowCount);
462473
ui64 currentKeyPointerRowCount = closedRowCount + (openedRowCount - currentKeyRowCountOpens) / 2;
463474
if ((stats.RowCountHistogram.empty() ? 0 : stats.RowCountHistogram.back().Value) < currentKeyPointerRowCount && currentKeyPointerRowCount < stats.RowCount) {
464-
AddKey(stats.RowCountHistogram, currentKeyPointer.Key, currentKeyPointerRowCount);
475+
AddKey(stats.RowCountHistogram, currentKeyPointer.GetKey(), currentKeyPointerRowCount);
465476
nextHistogramRowCount = Max(currentKeyPointerRowCount + 1, nextHistogramRowCount + RowCountResolution);
466477
if (nextHistogramRowCount + RowCountResolutionGap > stats.RowCount) {
467478
nextHistogramRowCount = Max<ui64>();
@@ -480,7 +491,7 @@ class TTableHistogramBuilderBtreeIndex {
480491
Y_ABORT_UNLESS(currentKeyDataSizeOpens <= openedDataSize);
481492
ui64 currentKeyPointerDataSize = closedDataSize + (openedDataSize - currentKeyDataSizeOpens) / 2;
482493
if ((stats.DataSizeHistogram.empty() ? 0 : stats.DataSizeHistogram.back().Value) < currentKeyPointerDataSize && currentKeyPointerDataSize < stats.DataSize.Size) {
483-
AddKey(stats.DataSizeHistogram, currentKeyPointer.Key, currentKeyPointerDataSize);
494+
AddKey(stats.DataSizeHistogram, currentKeyPointer.GetKey(), currentKeyPointerDataSize);
484495
nextHistogramDataSize = Max(currentKeyPointerDataSize + 1, nextHistogramDataSize + DataSizeResolution);
485496
if (nextHistogramDataSize + DataSizeResolutionGap > stats.DataSize.Size) {
486497
nextHistogramDataSize = Max<ui64>();
@@ -507,7 +518,7 @@ class TTableHistogramBuilderBtreeIndex {
507518
return true;
508519
}
509520

510-
void AddKey(THistogram& histogram, TCellsIterable& key, ui64 value) {
521+
void AddKey(THistogram& histogram, const TCellsIterable& key, ui64 value) {
511522
TVector<TCell> keyCells;
512523

513524
// add columns that are present in the part:
@@ -555,8 +566,14 @@ class TTableHistogramBuilderBtreeIndex {
555566
}
556567

557568
void AddFutureEvents(TNodeState& node) {
558-
FutureEvents.push(TEvent{node.BeginKey, true, &node});
559-
FutureEvents.push(TEvent{node.EndKey, false, &node});
569+
auto cmp = NodeEventKeyGreater.Compare(TEvent{&node, true}, TEvent{&node, false});
570+
LOG_BUILD_STATS("adding node future events " << (i32)cmp << " " << node.ToString(KeyDefaults));
571+
if (node.GetRowCount() > 1) {
572+
Y_DEBUG_ABORT_UNLESS(cmp < 0);
573+
}
574+
575+
FutureEvents.push(TEvent{&node, true});
576+
FutureEvents.push(TEvent{&node, false});
560577
}
561578

562579
private:

0 commit comments

Comments
 (0)