Skip to content

Commit 6244efe

Browse files
authored
[SYCL] Support for pooled small allocations. (#5438)
1 parent 3196d66 commit 6244efe

File tree

1 file changed

+147
-79
lines changed

1 file changed

+147
-79
lines changed

sycl/plugins/level_zero/usm_allocator.cpp

Lines changed: 147 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -414,28 +414,50 @@ class Bucket {
414414
// routines, slab map and etc.
415415
USMAllocContext::USMAllocImpl &OwnAllocCtx;
416416

417+
// For buckets used in chunked mode, a counter of slabs in the pool.
418+
// For allocations that use an entire slab each, the entries in the Available
419+
// list are entries in the pool.Each slab is available for a new
420+
// allocation.The size of the Available list is the size of the pool.
421+
// For allocations that use slabs in chunked mode, slabs will be in the
422+
// Available list if any one or more of their chunks is free.The entire slab
423+
// is not necessarily free, just some chunks in the slab are free. To
424+
// implement pooling we will allow one slab in the Available list to be
425+
// entirely empty. Normally such a slab would have been freed from USM. But
426+
// now we don't, and treat this slab as "in the pool".
427+
// When a slab becomes entirely free we have to decide whether to return it to
428+
// USM or keep it allocated. A simple check for size of the Available list is
429+
// not sufficient to check whether any slab has been pooled yet.We would have
430+
// to traverse the entire Available listand check if any of them is entirely
431+
// free. Instead we keep a counter of entirely empty slabs within the
432+
// Available list to speed up the process of checking if a slab in this bucket
433+
// is already pooled.
434+
size_t chunkedSlabsInPool;
435+
417436
// Statistics
418-
size_t allocCount;
419437
size_t allocPoolCount;
420438
size_t freeCount;
421439
size_t currSlabsInUse;
422440
size_t currSlabsInPool;
423-
size_t maxSlabsInUse;
424441
size_t maxSlabsInPool;
425442

426443
public:
444+
// Statistics
445+
size_t allocCount;
446+
size_t maxSlabsInUse;
447+
427448
Bucket(size_t Sz, USMAllocContext::USMAllocImpl &AllocCtx)
428-
: Size{Sz}, OwnAllocCtx{AllocCtx}, allocCount(0), allocPoolCount(0),
429-
freeCount(0), currSlabsInUse(0), currSlabsInPool(0), maxSlabsInUse(0),
430-
maxSlabsInPool(0) {}
449+
: Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0),
450+
allocPoolCount(0), freeCount(0), currSlabsInUse(0), currSlabsInPool(0),
451+
maxSlabsInPool(0), allocCount(0), maxSlabsInUse(0) {}
431452

432453
// Get pointer to allocation that is one piece of an available slab in this
433454
// bucket.
434-
void *getChunk(bool &FromAllocatedSlab);
455+
void *getChunk(bool &FromPool);
435456

436457
// Get pointer to allocation that is a full slab in this bucket.
437458
void *getSlab(bool &FromPool);
438459

460+
// Return the allocation size of this bucket.
439461
size_t getSize() const { return Size; }
440462

441463
// Free an allocation that is one piece of a slab in this bucket.
@@ -451,11 +473,14 @@ class Bucket {
451473
USMAllocContext::USMAllocImpl &getUsmAllocCtx() { return OwnAllocCtx; }
452474

453475
// Check whether an allocation to be freed can be placed in the pool.
454-
bool CanPool();
476+
bool CanPool(bool &ToPool);
455477

456-
// The minimum allocation size for a slab in this bucket.
478+
// The minimum allocation size for any slab.
457479
size_t SlabMinSize();
458480

481+
// The allocation size for a slab in this bucket.
482+
size_t SlabAllocSize();
483+
459484
// The minimum size of a chunk from this bucket's slabs.
460485
size_t ChunkCutOff();
461486

@@ -475,17 +500,19 @@ class Bucket {
475500
void updateStats(int InUse, int InPool);
476501

477502
// Print bucket statistics
478-
void printStats();
503+
void printStats(bool &TitlePrinted, SystemMemory::MemType MT);
479504

480505
private:
481506
void onFreeChunk(Slab &, bool &ToPool);
482507

508+
// Update statistics of pool usage, and indicate that an allocation was made
509+
// from the pool.
510+
void decrementPool(bool &FromPool);
511+
483512
// Get a slab to be used for chunked allocations.
484-
// These slabs are used for allocations <= ChunkCutOff and not pooled.
485-
decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromAllocatedSlab);
513+
decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool);
486514

487515
// Get a slab that will be used as a whole for a single allocation.
488-
// These slabs are > ChunkCutOff in size and pooled.
489516
decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool);
490517
};
491518

@@ -531,7 +558,8 @@ class USMAllocContext::USMAllocImpl {
531558
return USMSettings.SlabMinSize[(*MemHandle).getMemType()];
532559
};
533560

534-
void printStats();
561+
void printStats(bool &TitlePrinted, size_t &HighBucketSize,
562+
size_t &HighPeakSlabsInUse, SystemMemory::MemType MT);
535563

536564
private:
537565
Bucket &findBucket(size_t Size);
@@ -552,10 +580,8 @@ Slab::Slab(Bucket &Bkt)
552580
// some padding at the end of the slab.
553581
Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0},
554582
bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} {
555-
size_t SlabAllocSize = Bkt.getSize();
556-
if (SlabAllocSize < Bkt.SlabMinSize())
557-
SlabAllocSize = Bkt.SlabMinSize();
558-
MemPtr = Bkt.getMemHandle().allocate(SlabAllocSize);
583+
auto SlabSize = Bkt.SlabAllocSize();
584+
MemPtr = Bkt.getMemHandle().allocate(SlabSize);
559585
regSlab(*this);
560586
}
561587

@@ -673,6 +699,14 @@ void *Slab::getEnd() const {
673699

674700
bool Slab::hasAvail() { return NumAllocated != getNumChunks(); }
675701

702+
// If a slab was available in the pool then note that the current pooled
703+
// size has reduced by the size of a slab in this bucket.
704+
void Bucket::decrementPool(bool &FromPool) {
705+
FromPool = true;
706+
updateStats(1, -1);
707+
USMSettings.CurPoolSize -= SlabAllocSize();
708+
}
709+
676710
auto Bucket::getAvailFullSlab(bool &FromPool)
677711
-> decltype(AvailableSlabs.begin()) {
678712
// Return a slab that will be used for a single allocation.
@@ -681,17 +715,9 @@ auto Bucket::getAvailFullSlab(bool &FromPool)
681715
std::make_unique<Slab>(*this));
682716
(*It)->setIterator(It);
683717
FromPool = false;
684-
if (USMSettings.PoolTrace > 1)
685-
updateStats(1, 0);
718+
updateStats(1, 0);
686719
} else {
687-
// If a slab was available in the pool then note that the current pooled
688-
// size has reduced by the size of this slab.
689-
FromPool = true;
690-
if (USMSettings.PoolTrace > 1) {
691-
updateStats(1, -1);
692-
USMSettings.CurPoolSizes[getMemType()] -= Size;
693-
}
694-
USMSettings.CurPoolSize -= Size;
720+
decrementPool(FromPool);
695721
}
696722

697723
return AvailableSlabs.begin();
@@ -713,47 +739,44 @@ void Bucket::freeSlab(Slab &Slab, bool &ToPool) {
713739
std::lock_guard<std::mutex> Lg(BucketLock);
714740
auto SlabIter = Slab.getIterator();
715741
assert(SlabIter != UnavailableSlabs.end());
716-
if (CanPool()) {
742+
if (CanPool(ToPool)) {
717743
auto It =
718744
AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter));
719745
UnavailableSlabs.erase(SlabIter);
720746
(*It)->setIterator(It);
721-
722-
if (USMSettings.PoolTrace > 1) {
723-
updateStats(-1, 1);
724-
ToPool = true;
725-
}
726747
} else {
727748
UnavailableSlabs.erase(SlabIter);
728-
729-
if (USMSettings.PoolTrace > 1) {
730-
updateStats(-1, 0);
731-
ToPool = false;
732-
}
733749
}
734750
}
735751

736-
auto Bucket::getAvailSlab(bool &FromAllocatedSlab)
737-
-> decltype(AvailableSlabs.begin()) {
752+
auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) {
738753

739-
FromAllocatedSlab = true;
740754
if (AvailableSlabs.size() == 0) {
741755
auto It = AvailableSlabs.insert(AvailableSlabs.begin(),
742756
std::make_unique<Slab>(*this));
743757
(*It)->setIterator(It);
744758

745-
if (USMSettings.PoolTrace > 1)
746-
updateStats(1, 0);
747-
FromAllocatedSlab = false;
759+
updateStats(1, 0);
760+
FromPool = false;
761+
} else {
762+
if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) {
763+
// If this was an empty slab, it was in the pool.
764+
// Now it is no longer in the pool, so update count.
765+
--chunkedSlabsInPool;
766+
decrementPool(FromPool);
767+
} else {
768+
// Allocation from existing slab is treated as from pool for statistics.
769+
FromPool = true;
770+
}
748771
}
749772

750773
return AvailableSlabs.begin();
751774
}
752775

753-
void *Bucket::getChunk(bool &FromAllocatedSlab) {
776+
void *Bucket::getChunk(bool &FromPool) {
754777
std::lock_guard<std::mutex> Lg(BucketLock);
755778

756-
auto SlabIt = getAvailSlab(FromAllocatedSlab);
779+
auto SlabIt = getAvailSlab(FromPool);
757780
auto *FreeChunk = (*SlabIt)->getChunk();
758781

759782
// If the slab is full, move it to unavailable slabs and update its iterator
@@ -792,33 +815,45 @@ void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) {
792815
(*It)->setIterator(It);
793816
}
794817

795-
// Remove the slab when all the chunks from it are deallocated
796-
// Note: since the slab is stored as unique_ptr, just remove it from
797-
// the list to remove the list to destroy the object
818+
// Check if slab is empty, and pool it if we can.
798819
if (Slab.getNumAllocated() == 0) {
799-
auto It = Slab.getIterator();
800-
assert(It != AvailableSlabs.end());
801-
802-
AvailableSlabs.erase(It);
803-
804-
if (USMSettings.PoolTrace > 1)
805-
updateStats(-1, 0);
806-
807-
ToPool = false;
820+
// The slab is now empty.
821+
// If pool has capacity then put the slab in the pool.
822+
// The ToPool parameter indicates whether the Slab will be put in the pool
823+
// or freed from USM.
824+
if (!CanPool(ToPool)) {
825+
// Note: since the slab is stored as unique_ptr, just remove it from
826+
// the list to destroy the object.
827+
auto It = Slab.getIterator();
828+
assert(It != AvailableSlabs.end());
829+
AvailableSlabs.erase(It);
830+
}
808831
}
809832
}
810833

811-
bool Bucket::CanPool() {
834+
bool Bucket::CanPool(bool &ToPool) {
812835
std::lock_guard<sycl::detail::SpinLock> Lock{PoolLock};
813-
size_t NewFreeSlabsInBucket = AvailableSlabs.size() + 1;
836+
size_t NewFreeSlabsInBucket;
837+
// Check if this bucket is used in chunked form or as full slabs.
838+
bool chunkedBucket = getSize() <= ChunkCutOff();
839+
if (chunkedBucket)
840+
NewFreeSlabsInBucket = chunkedSlabsInPool + 1;
841+
else
842+
NewFreeSlabsInBucket = AvailableSlabs.size() + 1;
814843
if (Capacity() >= NewFreeSlabsInBucket) {
815-
size_t NewPoolSize = USMSettings.CurPoolSize + Size;
844+
size_t NewPoolSize = USMSettings.CurPoolSize + SlabAllocSize();
816845
if (USMSettings.MaxPoolSize >= NewPoolSize) {
817846
USMSettings.CurPoolSize = NewPoolSize;
818-
USMSettings.CurPoolSizes[getMemType()] += Size;
847+
if (chunkedBucket)
848+
++chunkedSlabsInPool;
849+
850+
updateStats(-1, 1);
851+
ToPool = true;
819852
return true;
820853
}
821854
}
855+
updateStats(-1, 0);
856+
ToPool = false;
822857
return false;
823858
}
824859

@@ -830,7 +865,16 @@ SystemMemory::MemType Bucket::getMemType() {
830865

831866
size_t Bucket::SlabMinSize() { return USMSettings.SlabMinSize[getMemType()]; }
832867

833-
size_t Bucket::Capacity() { return USMSettings.Capacity[getMemType()]; }
868+
size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); }
869+
870+
size_t Bucket::Capacity() {
871+
// For buckets used in chunked mode, just one slab in pool is sufficient.
872+
// For larger buckets, the capacity could be more and is adjustable.
873+
if (getSize() <= ChunkCutOff())
874+
return 1;
875+
else
876+
return USMSettings.Capacity[getMemType()];
877+
}
834878

835879
size_t Bucket::MaxPoolableSize() {
836880
return USMSettings.MaxPoolableSize[getMemType()];
@@ -847,14 +891,28 @@ void Bucket::countAlloc(bool FromPool) {
847891
void Bucket::countFree() { ++freeCount; }
848892

849893
void Bucket::updateStats(int InUse, int InPool) {
894+
if (USMSettings.PoolTrace == 0)
895+
return;
850896
currSlabsInUse += InUse;
851897
maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse);
852898
currSlabsInPool += InPool;
853899
maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool);
900+
// Increment or decrement current pool sizes based on whether
901+
// slab was added to or removed from pool.
902+
USMSettings.CurPoolSizes[getMemType()] += InPool * SlabAllocSize();
854903
}
855904

856-
void Bucket::printStats() {
905+
void Bucket::printStats(bool &TitlePrinted, SystemMemory::MemType MT) {
857906
if (allocCount) {
907+
if (!TitlePrinted) {
908+
auto Label = MemTypeNames[MT];
909+
std::cout << Label << " memory statistics\n";
910+
std::cout << std::setw(14) << "Bucket Size" << std::setw(12) << "Allocs"
911+
<< std::setw(12) << "Frees" << std::setw(18)
912+
<< "Allocs from Pool" << std::setw(20) << "Peak Slabs in Use"
913+
<< std::setw(21) << "Peak Slabs in Pool" << std::endl;
914+
TitlePrinted = true;
915+
}
858916
std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount
859917
<< std::setw(12) << freeCount << std::setw(18) << allocPoolCount
860918
<< std::setw(20) << maxSlabsInUse << std::setw(21)
@@ -982,6 +1040,7 @@ USMAllocContext::USMAllocContext(std::unique_ptr<SystemMemory> MemHandle)
9821040
: pImpl(std::make_unique<USMAllocImpl>(std::move(MemHandle))) {}
9831041

9841042
void *USMAllocContext::allocate(size_t size) {
1043+
// For full-slab allocations indicates whether slab is from Pool.
9851044
bool FromPool;
9861045
auto Ptr = pImpl->allocate(size, FromPool);
9871046

@@ -1023,27 +1082,36 @@ void USMAllocContext::deallocate(void *ptr, bool OwnZeMemHandle) {
10231082
return;
10241083
}
10251084

1026-
// Define destructor for its usage with unique_ptr
1085+
// Define destructor for use with unique_ptr
10271086
USMAllocContext::~USMAllocContext() {
1087+
bool TitlePrinted = false;
1088+
size_t HighBucketSize;
1089+
size_t HighPeakSlabsInUse;
10281090
if (USMSettings.PoolTrace > 1) {
1029-
auto Label = "Shared";
1030-
if (pImpl->getMemHandle().getMemType() == SystemMemory::Host)
1031-
Label = "Host";
1032-
if (pImpl->getMemHandle().getMemType() == SystemMemory::Device)
1033-
Label = "Device";
1034-
std::cout << Label << " memory statistics\n";
1035-
pImpl->printStats();
1036-
std::cout << "Current Pool Size " << USMSettings.CurPoolSize << std::endl;
1091+
SystemMemory::MemType MT = pImpl->getMemHandle().getMemType();
1092+
pImpl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, MT);
1093+
if (TitlePrinted) {
1094+
std::cout << "Current Pool Size " << USMSettings.CurPoolSize << std::endl;
1095+
const char *Label = MemTypeNames[MT];
1096+
std::cout << "Suggested Setting: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=;"
1097+
<< std::string(1, tolower(*Label)) << std::string(Label + 1)
1098+
<< ":" << HighBucketSize << "," << HighPeakSlabsInUse << ",64K"
1099+
<< std::endl;
1100+
}
10371101
}
10381102
}
10391103

1040-
void USMAllocContext::USMAllocImpl::printStats() {
1041-
std::cout << std::setw(14) << "Bucket Size" << std::setw(12) << "Allocs"
1042-
<< std::setw(12) << "Frees" << std::setw(18) << "Allocs From Pool"
1043-
<< std::setw(20) << "Peak Slabs In Use" << std::setw(21)
1044-
<< "Peak Slabs in Pool" << std::endl;
1104+
void USMAllocContext::USMAllocImpl::printStats(bool &TitlePrinted,
1105+
size_t &HighBucketSize,
1106+
size_t &HighPeakSlabsInUse,
1107+
SystemMemory::MemType MT) {
1108+
HighBucketSize = 0;
1109+
HighPeakSlabsInUse = 0;
10451110
for (auto &B : Buckets) {
1046-
(*B).printStats();
1111+
(*B).printStats(TitlePrinted, MT);
1112+
HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse);
1113+
if ((*B).allocCount)
1114+
HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize);
10471115
}
10481116
}
10491117

0 commit comments

Comments
 (0)