Skip to content

Commit d6d96f7

Browse files
lll-phill-lllKamil Khamitov
authored andcommitted
Added bloom filter preallocation in GJ
As mentioned here: ydb-platform#13349 GraceJoin fails with memlimit when trying to allocate memory for bloom filters. This PR adds these filters to pre-allocation so we can enable spilling in case of low memory commit_hash:107ea78fdfeee8f3422818ada96af8d3763e6849
1 parent 47c84a5 commit d6d96f7

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

yql/essentials/minikql/comp_nodes/mkql_grace_join_imp.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,22 +353,28 @@ bool TTable::TryToPreallocateMemoryForJoin(TTable & t1, TTable & t2, EJoinKind /
353353
if (!tableForPreallocation.TableBucketsStats[bucket].TuplesNum || tableForPreallocation.TableBuckets[bucket].NSlots) continue;
354354

355355
TTableBucket& bucketForPreallocation = tableForPreallocation.TableBuckets[bucket];
356-
const TTableBucketStats& bucketForPreallocationStats = tableForPreallocation.TableBucketsStats[bucket];
356+
TTableBucketStats& bucketForPreallocationStats = tableForPreallocation.TableBucketsStats[bucket];
357357

358358
const auto nSlots = ComputeJoinSlotsSizeForBucket(bucketForPreallocation, bucketForPreallocationStats, tableForPreallocation.HeaderSize,
359359
tableForPreallocation.NumberOfKeyStringColumns != 0, tableForPreallocation.NumberOfKeyIColumns != 0);
360360
const auto slotSize = ComputeNumberOfSlots(tableForPreallocation.TableBucketsStats[bucket].TuplesNum);
361361

362362
try {
363363
bucketForPreallocation.JoinSlots.reserve(nSlots*slotSize);
364+
bucketForPreallocationStats.BloomFilter.Reserve(bucketForPreallocationStats.TuplesNum);
364365
} catch (TMemoryLimitExceededException) {
365366
for (ui64 i = 0; i < bucket; ++i) {
366367
auto& b1 = t1.TableBuckets[i];
367368
b1.JoinSlots.resize(0);
368369
b1.JoinSlots.shrink_to_fit();
370+
auto& s1 = t1.TableBucketsStats[i];
371+
s1.BloomFilter.Shrink();
372+
369373
auto& b2 = t2.TableBuckets[i];
370374
b2.JoinSlots.resize(0);
371375
b2.JoinSlots.shrink_to_fit();
376+
auto& s2 = t2.TableBucketsStats[i];
377+
s2.BloomFilter.Shrink();
372378
}
373379
return false;
374380
}

yql/essentials/minikql/comp_nodes/mkql_grace_join_imp.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,23 @@ class TBloomfilter {
4343
Resize(size);
4444
}
4545

46-
void Resize(ui64 size) {
46+
void Reserve(ui64 size) {
4747
size = std::max(size, CachelineSize);
4848
Bits_ = 6;
4949

50-
for (; (ui64(1)<<Bits_) < size; ++Bits_)
50+
for (; (ui64(1) << Bits_) < size; ++Bits_)
5151
;
5252

5353
Bits_ += 3; // -> multiply by 8
54-
size = 1u<<(Bits_ - 6);
5554

55+
Storage_.reserve(ComputeStorageSize());
56+
}
57+
58+
void Resize(ui64 size) {
5659
Storage_.clear();
57-
Storage_.resize(size + CachelineSize/sizeof(ui64) - 1);
60+
61+
Reserve(size);
62+
Storage_.resize(ComputeStorageSize());
5863

5964
// align Ptr_ up to BlockSize
6065
Ptr_ = (ui64 *)((uintptr_t(Storage_.data()) + BlockSize - 1) & ~(BlockSize - 1));
@@ -104,6 +109,12 @@ class TBloomfilter {
104109
Storage_.shrink_to_fit();
105110
Ptr_ = Storage_.data();
106111
}
112+
113+
private:
114+
ui64 ComputeStorageSize() const {
115+
MKQL_ENSURE(Bits_ >= 6, "Internal logic error");
116+
return (1u << (Bits_ - 6)) + CachelineSize / sizeof(ui64) - 1;
117+
}
107118
};
108119

109120
/*

0 commit comments

Comments
 (0)