Skip to content

Commit c0d116a

Browse files
authored
Improve buckets spilling in wide combine (#8939)
1 parent 4f855a7 commit c0d116a

File tree

1 file changed

+38
-16
lines changed

1 file changed

+38
-16
lines changed

ydb/library/yql/minikql/comp_nodes/mkql_wide_combine.cpp

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
355355
};
356356

357357
EBucketState BucketState = EBucketState::InMemory;
358+
ui64 LineCount = 0;
358359
};
359360

360361
enum class EOperatingMode {
@@ -417,7 +418,9 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
417418
case EOperatingMode::Spilling: {
418419
UpdateSpillingBuckets();
419420

420-
if (!HasMemoryForProcessing() && InputStatus != EFetchResult::Finish && TryToReduceMemoryAndWait()) return EUpdateResult::Yield;
421+
if (!HasMemoryForProcessing() && InputStatus != EFetchResult::Finish && TryToReduceMemoryAndWait()) {
422+
return EUpdateResult::Yield;
423+
}
421424

422425
if (BufferForUsedInputItems.size()) {
423426
auto& bucket = SpilledBuckets[BufferForUsedInputItemsBucketId];
@@ -456,13 +459,16 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
456459

457460
if (bucket.BucketState == TSpilledBucket::EBucketState::InMemory) {
458461
std::copy_n(ViewForKeyAndState.data(), KeyWidth, static_cast<NUdf::TUnboxedValue*>(bucket.InMemoryProcessingState->Tongue));
459-
462+
460463
bool isNew = bucket.InMemoryProcessingState->TasteIt();
461464
Throat = bucket.InMemoryProcessingState->Throat;
465+
bucket.LineCount += isNew;
466+
462467
return isNew ? ETasteResult::Init : ETasteResult::Update;
463468
}
464-
465-
// Prepare space for raw data
469+
bucket.LineCount++;
470+
471+
// Prepare space for raw data
466472
MKQL_ENSURE(BufferForUsedInputItems.size() == 0, "Internal logic error");
467473
BufferForUsedInputItems.resize(ItemNodesSize);
468474
BufferForUsedInputItemsBucketId = bucketId;
@@ -485,6 +491,7 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
485491

486492
value = static_cast<NUdf::TUnboxedValue*>(SpilledBuckets.front().InMemoryProcessingState->Extract());
487493
if (!value) {
494+
SpilledBuckets.front().InMemoryProcessingState->ReadMore<false>();
488495
SpilledBuckets.pop_front();
489496
if (SpilledBuckets.empty()) IsEverythingExtracted = true;
490497
}
@@ -521,6 +528,7 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
521528
auto bucketId = hash % SpilledBucketCount;
522529
auto& bucket = SpilledBuckets[bucketId];
523530

531+
bucket.LineCount++;
524532
auto& processingState = *bucket.InMemoryProcessingState;
525533

526534
for (size_t i = 0; i < KeyWidth; ++i) {
@@ -566,6 +574,8 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
566574

567575
if (bucket.BucketState == TSpilledBucket::EBucketState::InMemory) {
568576
bucket.BucketState = TSpilledBucket::EBucketState::SpillingState;
577+
SpillingBucketsCount++;
578+
InMemoryBucketsCount--;
569579
}
570580

571581
while (const auto keyAndState = static_cast<NUdf::TUnboxedValue*>(bucket.InMemoryProcessingState->Extract())) {
@@ -583,10 +593,11 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
583593
bucket.InMemoryProcessingState->ReadMore<false>();
584594

585595
bucket.BucketState = TSpilledBucket::EBucketState::SpillingData;
596+
SpillingBucketsCount--;
586597
}
587598

588599
void UpdateSpillingBuckets() {
589-
for (ui64 i = 0; i < NextBucketToSpill; ++i) {
600+
for (ui64 i = 0; i < SpilledBucketCount; ++i) {
590601
auto& bucket = SpilledBuckets[i];
591602
if (bucket.AsyncWriteOperation.has_value() && bucket.AsyncWriteOperation->HasValue()) {
592603
if (bucket.BucketState == TSpilledBucket::EBucketState::SpillingState) {
@@ -604,16 +615,27 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
604615
}
605616

606617
bool TryToReduceMemoryAndWait() {
607-
for (ui64 i = 0; i < NextBucketToSpill; ++i) {
608-
if (SpilledBuckets[i].BucketState == TSpilledBucket::EBucketState::SpillingState) return true;
618+
if (SpillingBucketsCount > 0) {
619+
return true;
609620
}
621+
while (InMemoryBucketsCount > 0) {
622+
ui64 maxLineCount = 0;
623+
ui32 maxLineBucketInd = (ui32)-1;
624+
for (ui64 i = 0; i < SpilledBucketCount; ++i) {
625+
const auto& bucket = SpilledBuckets[i];
626+
if (bucket.BucketState == TSpilledBucket::EBucketState::InMemory && (maxLineBucketInd == (ui32)-1 || bucket.LineCount > maxLineCount)) {
627+
maxLineCount = bucket.LineCount;
628+
maxLineBucketInd = i;
629+
}
630+
}
631+
MKQL_ENSURE(maxLineBucketInd != (ui32)-1, "Internal logic error");
610632

611-
while (NextBucketToSpill < SpilledBucketCount) {
612-
auto& bucket = SpilledBuckets[NextBucketToSpill++];
613-
SpillMoreStateFromBucket(bucket);
614-
if (bucket.BucketState == TSpilledBucket::EBucketState::SpillingState) return true;
633+
auto& bucketToSpill = SpilledBuckets[maxLineBucketInd];
634+
SpillMoreStateFromBucket(bucketToSpill);
635+
if (bucketToSpill.BucketState == TSpilledBucket::EBucketState::SpillingState) {
636+
return true;
637+
}
615638
}
616-
617639
return false;
618640
}
619641

@@ -661,7 +683,7 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
661683

662684
Throat = BufferForUsedInputItems.data();
663685
Tongue = bucket.InMemoryProcessingState->Tongue;
664-
686+
665687
return EUpdateResult::ExtractRawData;
666688
}
667689
bucket.BucketState = TSpilledBucket::EBucketState::InMemory;
@@ -719,8 +741,6 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
719741
NUdf::TUnboxedValuePod* Throat = nullptr;
720742

721743
private:
722-
ui64 NextBucketToSpill = 0;
723-
724744
bool IsEverythingExtracted = false;
725745

726746
TState InMemoryProcessingState;
@@ -735,6 +755,8 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
735755
TAsyncReadOperation AsyncReadOperation = std::nullopt;
736756
static constexpr size_t SpilledBucketCount = 128;
737757
std::deque<TSpilledBucket> SpilledBuckets;
758+
ui32 SpillingBucketsCount = 0;
759+
ui32 InMemoryBucketsCount = SpilledBucketCount;
738760
ui64 BufferForUsedInputItemsBucketId;
739761
TUnboxedValueVector BufferForUsedInputItems;
740762
std::vector<NUdf::TUnboxedValuePod, TMKQLAllocator<NUdf::TUnboxedValuePod>> ViewForKeyAndState;
@@ -1237,7 +1259,7 @@ using TBaseComputation = TStatefulWideFlowCodegeneratorNode<TWideLastCombinerWra
12371259
EFetchResult DoCalculate(NUdf::TUnboxedValue& state, TComputationContext& ctx, NUdf::TUnboxedValue*const* output) const {
12381260
if (state.IsInvalid()) {
12391261
MakeState(ctx, state);
1240-
}
1262+
}
12411263

12421264
if (const auto ptr = static_cast<TSpillingSupportState*>(state.AsBoxed().Get())) {
12431265
auto **fields = ctx.WideFields.data() + WideFieldsIndex;

0 commit comments

Comments
 (0)