@@ -414,28 +414,50 @@ class Bucket {
414
414
// routines, slab map and etc.
415
415
USMAllocContext::USMAllocImpl &OwnAllocCtx;
416
416
417
+ // For buckets used in chunked mode, a counter of slabs in the pool.
418
+ // For allocations that use an entire slab each, the entries in the Available
419
+ // list are entries in the pool.Each slab is available for a new
420
+ // allocation.The size of the Available list is the size of the pool.
421
+ // For allocations that use slabs in chunked mode, slabs will be in the
422
+ // Available list if any one or more of their chunks is free.The entire slab
423
+ // is not necessarily free, just some chunks in the slab are free. To
424
+ // implement pooling we will allow one slab in the Available list to be
425
+ // entirely empty. Normally such a slab would have been freed from USM. But
426
+ // now we don't, and treat this slab as "in the pool".
427
+ // When a slab becomes entirely free we have to decide whether to return it to
428
+ // USM or keep it allocated. A simple check for size of the Available list is
429
+ // not sufficient to check whether any slab has been pooled yet.We would have
430
+ // to traverse the entire Available listand check if any of them is entirely
431
+ // free. Instead we keep a counter of entirely empty slabs within the
432
+ // Available list to speed up the process of checking if a slab in this bucket
433
+ // is already pooled.
434
+ size_t chunkedSlabsInPool;
435
+
417
436
// Statistics
418
- size_t allocCount;
419
437
size_t allocPoolCount;
420
438
size_t freeCount;
421
439
size_t currSlabsInUse;
422
440
size_t currSlabsInPool;
423
- size_t maxSlabsInUse;
424
441
size_t maxSlabsInPool;
425
442
426
443
public:
444
+ // Statistics
445
+ size_t allocCount;
446
+ size_t maxSlabsInUse;
447
+
427
448
Bucket (size_t Sz, USMAllocContext::USMAllocImpl &AllocCtx)
428
- : Size{Sz}, OwnAllocCtx{AllocCtx}, allocCount( 0 ), allocPoolCount (0 ),
429
- freeCount (0 ), currSlabsInUse (0 ), currSlabsInPool (0 ), maxSlabsInUse (0 ),
430
- maxSlabsInPool (0 ) {}
449
+ : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool (0 ),
450
+ allocPoolCount (0 ), freeCount (0 ), currSlabsInUse (0 ), currSlabsInPool (0 ),
451
+ maxSlabsInPool (0 ), allocCount( 0 ), maxSlabsInUse( 0 ) {}
431
452
432
453
// Get pointer to allocation that is one piece of an available slab in this
433
454
// bucket.
434
- void *getChunk (bool &FromAllocatedSlab );
455
+ void *getChunk (bool &FromPool );
435
456
436
457
// Get pointer to allocation that is a full slab in this bucket.
437
458
void *getSlab (bool &FromPool);
438
459
460
+ // Return the allocation size of this bucket.
439
461
size_t getSize () const { return Size; }
440
462
441
463
// Free an allocation that is one piece of a slab in this bucket.
@@ -451,11 +473,14 @@ class Bucket {
451
473
USMAllocContext::USMAllocImpl &getUsmAllocCtx () { return OwnAllocCtx; }
452
474
453
475
// Check whether an allocation to be freed can be placed in the pool.
454
- bool CanPool ();
476
+ bool CanPool (bool &ToPool );
455
477
456
- // The minimum allocation size for a slab in this bucket .
478
+ // The minimum allocation size for any slab.
457
479
size_t SlabMinSize ();
458
480
481
+ // The allocation size for a slab in this bucket.
482
+ size_t SlabAllocSize ();
483
+
459
484
// The minimum size of a chunk from this bucket's slabs.
460
485
size_t ChunkCutOff ();
461
486
@@ -475,17 +500,19 @@ class Bucket {
475
500
void updateStats (int InUse, int InPool);
476
501
477
502
// Print bucket statistics
478
- void printStats ();
503
+ void printStats (bool &TitlePrinted, SystemMemory::MemType MT );
479
504
480
505
private:
481
506
void onFreeChunk (Slab &, bool &ToPool);
482
507
508
+ // Update statistics of pool usage, and indicate that an allocation was made
509
+ // from the pool.
510
+ void decrementPool (bool &FromPool);
511
+
483
512
// Get a slab to be used for chunked allocations.
484
- // These slabs are used for allocations <= ChunkCutOff and not pooled.
485
- decltype (AvailableSlabs.begin()) getAvailSlab(bool &FromAllocatedSlab);
513
+ decltype (AvailableSlabs.begin()) getAvailSlab(bool &FromPool);
486
514
487
515
// Get a slab that will be used as a whole for a single allocation.
488
- // These slabs are > ChunkCutOff in size and pooled.
489
516
decltype (AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool);
490
517
};
491
518
@@ -531,7 +558,8 @@ class USMAllocContext::USMAllocImpl {
531
558
return USMSettings.SlabMinSize [(*MemHandle).getMemType ()];
532
559
};
533
560
534
- void printStats ();
561
+ void printStats (bool &TitlePrinted, size_t &HighBucketSize,
562
+ size_t &HighPeakSlabsInUse, SystemMemory::MemType MT);
535
563
536
564
private:
537
565
Bucket &findBucket (size_t Size);
@@ -552,10 +580,8 @@ Slab::Slab(Bucket &Bkt)
552
580
// some padding at the end of the slab.
553
581
Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0 },
554
582
bucket (Bkt), SlabListIter{}, FirstFreeChunkIdx{0 } {
555
- size_t SlabAllocSize = Bkt.getSize ();
556
- if (SlabAllocSize < Bkt.SlabMinSize ())
557
- SlabAllocSize = Bkt.SlabMinSize ();
558
- MemPtr = Bkt.getMemHandle ().allocate (SlabAllocSize);
583
+ auto SlabSize = Bkt.SlabAllocSize ();
584
+ MemPtr = Bkt.getMemHandle ().allocate (SlabSize);
559
585
regSlab (*this );
560
586
}
561
587
@@ -673,6 +699,14 @@ void *Slab::getEnd() const {
673
699
674
700
bool Slab::hasAvail () { return NumAllocated != getNumChunks (); }
675
701
702
+ // If a slab was available in the pool then note that the current pooled
703
+ // size has reduced by the size of a slab in this bucket.
704
+ void Bucket::decrementPool (bool &FromPool) {
705
+ FromPool = true ;
706
+ updateStats (1 , -1 );
707
+ USMSettings.CurPoolSize -= SlabAllocSize ();
708
+ }
709
+
676
710
auto Bucket::getAvailFullSlab (bool &FromPool)
677
711
-> decltype(AvailableSlabs.begin()) {
678
712
// Return a slab that will be used for a single allocation.
@@ -681,17 +715,9 @@ auto Bucket::getAvailFullSlab(bool &FromPool)
681
715
std::make_unique<Slab>(*this ));
682
716
(*It)->setIterator (It);
683
717
FromPool = false ;
684
- if (USMSettings.PoolTrace > 1 )
685
- updateStats (1 , 0 );
718
+ updateStats (1 , 0 );
686
719
} else {
687
- // If a slab was available in the pool then note that the current pooled
688
- // size has reduced by the size of this slab.
689
- FromPool = true ;
690
- if (USMSettings.PoolTrace > 1 ) {
691
- updateStats (1 , -1 );
692
- USMSettings.CurPoolSizes [getMemType ()] -= Size;
693
- }
694
- USMSettings.CurPoolSize -= Size;
720
+ decrementPool (FromPool);
695
721
}
696
722
697
723
return AvailableSlabs.begin ();
@@ -713,47 +739,44 @@ void Bucket::freeSlab(Slab &Slab, bool &ToPool) {
713
739
std::lock_guard<std::mutex> Lg (BucketLock);
714
740
auto SlabIter = Slab.getIterator ();
715
741
assert (SlabIter != UnavailableSlabs.end ());
716
- if (CanPool ()) {
742
+ if (CanPool (ToPool )) {
717
743
auto It =
718
744
AvailableSlabs.insert (AvailableSlabs.begin (), std::move (*SlabIter));
719
745
UnavailableSlabs.erase (SlabIter);
720
746
(*It)->setIterator (It);
721
-
722
- if (USMSettings.PoolTrace > 1 ) {
723
- updateStats (-1 , 1 );
724
- ToPool = true ;
725
- }
726
747
} else {
727
748
UnavailableSlabs.erase (SlabIter);
728
-
729
- if (USMSettings.PoolTrace > 1 ) {
730
- updateStats (-1 , 0 );
731
- ToPool = false ;
732
- }
733
749
}
734
750
}
735
751
736
- auto Bucket::getAvailSlab (bool &FromAllocatedSlab)
737
- -> decltype(AvailableSlabs.begin()) {
752
+ auto Bucket::getAvailSlab (bool &FromPool) -> decltype(AvailableSlabs.begin()) {
738
753
739
- FromAllocatedSlab = true ;
740
754
if (AvailableSlabs.size () == 0 ) {
741
755
auto It = AvailableSlabs.insert (AvailableSlabs.begin (),
742
756
std::make_unique<Slab>(*this ));
743
757
(*It)->setIterator (It);
744
758
745
- if (USMSettings.PoolTrace > 1 )
746
- updateStats (1 , 0 );
747
- FromAllocatedSlab = false ;
759
+ updateStats (1 , 0 );
760
+ FromPool = false ;
761
+ } else {
762
+ if ((*(AvailableSlabs.begin ()))->getNumAllocated () == 0 ) {
763
+ // If this was an empty slab, it was in the pool.
764
+ // Now it is no longer in the pool, so update count.
765
+ --chunkedSlabsInPool;
766
+ decrementPool (FromPool);
767
+ } else {
768
+ // Allocation from existing slab is treated as from pool for statistics.
769
+ FromPool = true ;
770
+ }
748
771
}
749
772
750
773
return AvailableSlabs.begin ();
751
774
}
752
775
753
- void *Bucket::getChunk (bool &FromAllocatedSlab ) {
776
+ void *Bucket::getChunk (bool &FromPool ) {
754
777
std::lock_guard<std::mutex> Lg (BucketLock);
755
778
756
- auto SlabIt = getAvailSlab (FromAllocatedSlab );
779
+ auto SlabIt = getAvailSlab (FromPool );
757
780
auto *FreeChunk = (*SlabIt)->getChunk ();
758
781
759
782
// If the slab is full, move it to unavailable slabs and update its iterator
@@ -792,33 +815,45 @@ void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) {
792
815
(*It)->setIterator (It);
793
816
}
794
817
795
- // Remove the slab when all the chunks from it are deallocated
796
- // Note: since the slab is stored as unique_ptr, just remove it from
797
- // the list to remove the list to destroy the object
818
+ // Check if slab is empty, and pool it if we can.
798
819
if (Slab.getNumAllocated () == 0 ) {
799
- auto It = Slab.getIterator ();
800
- assert (It != AvailableSlabs.end ());
801
-
802
- AvailableSlabs.erase (It);
803
-
804
- if (USMSettings.PoolTrace > 1 )
805
- updateStats (-1 , 0 );
806
-
807
- ToPool = false ;
820
+ // The slab is now empty.
821
+ // If pool has capacity then put the slab in the pool.
822
+ // The ToPool parameter indicates whether the Slab will be put in the pool
823
+ // or freed from USM.
824
+ if (!CanPool (ToPool)) {
825
+ // Note: since the slab is stored as unique_ptr, just remove it from
826
+ // the list to destroy the object.
827
+ auto It = Slab.getIterator ();
828
+ assert (It != AvailableSlabs.end ());
829
+ AvailableSlabs.erase (It);
830
+ }
808
831
}
809
832
}
810
833
811
- bool Bucket::CanPool () {
834
+ bool Bucket::CanPool (bool &ToPool ) {
812
835
std::lock_guard<sycl::detail::SpinLock> Lock{PoolLock};
813
- size_t NewFreeSlabsInBucket = AvailableSlabs.size () + 1 ;
836
+ size_t NewFreeSlabsInBucket;
837
+ // Check if this bucket is used in chunked form or as full slabs.
838
+ bool chunkedBucket = getSize () <= ChunkCutOff ();
839
+ if (chunkedBucket)
840
+ NewFreeSlabsInBucket = chunkedSlabsInPool + 1 ;
841
+ else
842
+ NewFreeSlabsInBucket = AvailableSlabs.size () + 1 ;
814
843
if (Capacity () >= NewFreeSlabsInBucket) {
815
- size_t NewPoolSize = USMSettings.CurPoolSize + Size ;
844
+ size_t NewPoolSize = USMSettings.CurPoolSize + SlabAllocSize () ;
816
845
if (USMSettings.MaxPoolSize >= NewPoolSize) {
817
846
USMSettings.CurPoolSize = NewPoolSize;
818
- USMSettings.CurPoolSizes [getMemType ()] += Size;
847
+ if (chunkedBucket)
848
+ ++chunkedSlabsInPool;
849
+
850
+ updateStats (-1 , 1 );
851
+ ToPool = true ;
819
852
return true ;
820
853
}
821
854
}
855
+ updateStats (-1 , 0 );
856
+ ToPool = false ;
822
857
return false ;
823
858
}
824
859
@@ -830,7 +865,16 @@ SystemMemory::MemType Bucket::getMemType() {
830
865
831
866
size_t Bucket::SlabMinSize () { return USMSettings.SlabMinSize [getMemType ()]; }
832
867
833
- size_t Bucket::Capacity () { return USMSettings.Capacity [getMemType ()]; }
868
+ size_t Bucket::SlabAllocSize () { return std::max (getSize (), SlabMinSize ()); }
869
+
870
+ size_t Bucket::Capacity () {
871
+ // For buckets used in chunked mode, just one slab in pool is sufficient.
872
+ // For larger buckets, the capacity could be more and is adjustable.
873
+ if (getSize () <= ChunkCutOff ())
874
+ return 1 ;
875
+ else
876
+ return USMSettings.Capacity [getMemType ()];
877
+ }
834
878
835
879
size_t Bucket::MaxPoolableSize () {
836
880
return USMSettings.MaxPoolableSize [getMemType ()];
@@ -847,14 +891,28 @@ void Bucket::countAlloc(bool FromPool) {
847
891
void Bucket::countFree () { ++freeCount; }
848
892
849
893
void Bucket::updateStats (int InUse, int InPool) {
894
+ if (USMSettings.PoolTrace == 0 )
895
+ return ;
850
896
currSlabsInUse += InUse;
851
897
maxSlabsInUse = std::max (currSlabsInUse, maxSlabsInUse);
852
898
currSlabsInPool += InPool;
853
899
maxSlabsInPool = std::max (currSlabsInPool, maxSlabsInPool);
900
+ // Increment or decrement current pool sizes based on whether
901
+ // slab was added to or removed from pool.
902
+ USMSettings.CurPoolSizes [getMemType ()] += InPool * SlabAllocSize ();
854
903
}
855
904
856
- void Bucket::printStats () {
905
+ void Bucket::printStats (bool &TitlePrinted, SystemMemory::MemType MT ) {
857
906
if (allocCount) {
907
+ if (!TitlePrinted) {
908
+ auto Label = MemTypeNames[MT];
909
+ std::cout << Label << " memory statistics\n " ;
910
+ std::cout << std::setw (14 ) << " Bucket Size" << std::setw (12 ) << " Allocs"
911
+ << std::setw (12 ) << " Frees" << std::setw (18 )
912
+ << " Allocs from Pool" << std::setw (20 ) << " Peak Slabs in Use"
913
+ << std::setw (21 ) << " Peak Slabs in Pool" << std::endl;
914
+ TitlePrinted = true ;
915
+ }
858
916
std::cout << std::setw (14 ) << getSize () << std::setw (12 ) << allocCount
859
917
<< std::setw (12 ) << freeCount << std::setw (18 ) << allocPoolCount
860
918
<< std::setw (20 ) << maxSlabsInUse << std::setw (21 )
@@ -982,6 +1040,7 @@ USMAllocContext::USMAllocContext(std::unique_ptr<SystemMemory> MemHandle)
982
1040
: pImpl(std::make_unique<USMAllocImpl>(std::move(MemHandle))) {}
983
1041
984
1042
void *USMAllocContext::allocate (size_t size) {
1043
+ // For full-slab allocations indicates whether slab is from Pool.
985
1044
bool FromPool;
986
1045
auto Ptr = pImpl->allocate (size, FromPool);
987
1046
@@ -1023,27 +1082,36 @@ void USMAllocContext::deallocate(void *ptr, bool OwnZeMemHandle) {
1023
1082
return ;
1024
1083
}
1025
1084
1026
- // Define destructor for its usage with unique_ptr
1085
+ // Define destructor for use with unique_ptr
1027
1086
USMAllocContext::~USMAllocContext () {
1087
+ bool TitlePrinted = false ;
1088
+ size_t HighBucketSize;
1089
+ size_t HighPeakSlabsInUse;
1028
1090
if (USMSettings.PoolTrace > 1 ) {
1029
- auto Label = " Shared" ;
1030
- if (pImpl->getMemHandle ().getMemType () == SystemMemory::Host)
1031
- Label = " Host" ;
1032
- if (pImpl->getMemHandle ().getMemType () == SystemMemory::Device)
1033
- Label = " Device" ;
1034
- std::cout << Label << " memory statistics\n " ;
1035
- pImpl->printStats ();
1036
- std::cout << " Current Pool Size " << USMSettings.CurPoolSize << std::endl;
1091
+ SystemMemory::MemType MT = pImpl->getMemHandle ().getMemType ();
1092
+ pImpl->printStats (TitlePrinted, HighBucketSize, HighPeakSlabsInUse, MT);
1093
+ if (TitlePrinted) {
1094
+ std::cout << " Current Pool Size " << USMSettings.CurPoolSize << std::endl;
1095
+ const char *Label = MemTypeNames[MT];
1096
+ std::cout << " Suggested Setting: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=;"
1097
+ << std::string (1 , tolower (*Label)) << std::string (Label + 1 )
1098
+ << " :" << HighBucketSize << " ," << HighPeakSlabsInUse << " ,64K"
1099
+ << std::endl;
1100
+ }
1037
1101
}
1038
1102
}
1039
1103
1040
- void USMAllocContext::USMAllocImpl::printStats () {
1041
- std::cout << std::setw (14 ) << " Bucket Size" << std::setw (12 ) << " Allocs"
1042
- << std::setw (12 ) << " Frees" << std::setw (18 ) << " Allocs From Pool"
1043
- << std::setw (20 ) << " Peak Slabs In Use" << std::setw (21 )
1044
- << " Peak Slabs in Pool" << std::endl;
1104
+ void USMAllocContext::USMAllocImpl::printStats (bool &TitlePrinted,
1105
+ size_t &HighBucketSize,
1106
+ size_t &HighPeakSlabsInUse,
1107
+ SystemMemory::MemType MT) {
1108
+ HighBucketSize = 0 ;
1109
+ HighPeakSlabsInUse = 0 ;
1045
1110
for (auto &B : Buckets) {
1046
- (*B).printStats ();
1111
+ (*B).printStats (TitlePrinted, MT);
1112
+ HighPeakSlabsInUse = std::max ((*B).maxSlabsInUse , HighPeakSlabsInUse);
1113
+ if ((*B).allocCount )
1114
+ HighBucketSize = std::max ((*B).SlabAllocSize (), HighBucketSize);
1047
1115
}
1048
1116
}
1049
1117
0 commit comments