-
Notifications
You must be signed in to change notification settings - Fork 6.1k
8361099: Shenandoah: Improve heap lock contention by using CAS for memory allocation #26171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 45 commits
a063a1c
66f3919
90f21c7
cd19779
5a6bc1c
2da4821
5d0d37f
c7ef2ec
8237eb6
854ba37
11da608
60e75f2
d3cebfc
4caa801
64015b3
b9c9926
94e538c
37cee1f
977bebf
4faf618
d509856
970f3dd
bc5e72a
103e42f
2f5d818
6aa2dba
ce5616c
d1d71bc
96db619
d4dcb28
2ea822c
e4ddfdc
dceff3c
138acb7
fccbd0d
3452995
c93dc01
3e80fdc
b3d3592
9340e6e
1557472
926462f
c640e68
ca04034
f4c8e55
5338346
3ef8b86
c9b7c55
c88bccc
fa3e02a
ef512c5
e5224a3
4d8cc7e
554d937
5927bac
00b976e
c26ee74
01460bd
44684ea
ccb4c50
433b51d
6060534
f6e9997
30eb3dc
6f8fbc6
19b1154
07df1b4
9c3984e
35937a7
60f9663
eac1ed0
150d443
5c3a738
673adac
fddf25b
a7a4a87
54d74d6
224a6cd
c288e16
68226bb
b2e498b
acbc84d
416d13a
89f59b2
7cbc3d2
a007b8a
aa268de
e9e84f4
7266bab
25940a3
d25aa55
594334c
9aafa33
f3a9d48
490e773
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
#include "gc/shenandoah/shenandoahHeap.hpp" | ||
#include "gc/shenandoah/shenandoahHeapRegionSet.hpp" | ||
#include "gc/shenandoah/shenandoahSimpleBitMap.hpp" | ||
#include "memory/padded.inline.hpp" | ||
|
||
// Each ShenandoahHeapRegion is associated with a ShenandoahFreeSetPartitionId. | ||
enum class ShenandoahFreeSetPartitionId : uint8_t { | ||
|
@@ -78,10 +79,9 @@ class ShenandoahRegionPartitions { | |
// are denoted in bytes. Note that some regions that had been assigned to a particular partition at rebuild time | ||
// may have been retired following the rebuild. The tallies for these regions are still reflected in _capacity[p] | ||
// and _used[p], even though the region may have been removed from the free set. | ||
size_t _capacity[UIntNumPartitions]; | ||
size_t _used[UIntNumPartitions]; | ||
size_t _available[UIntNumPartitions]; | ||
size_t _region_counts[UIntNumPartitions]; | ||
size_t volatile _capacity[UIntNumPartitions]; | ||
size_t volatile _used[UIntNumPartitions]; | ||
size_t volatile _region_counts[UIntNumPartitions]; | ||
|
||
// For each partition p, _left_to_right_bias is true iff allocations are normally made from lower indexed regions | ||
// before higher indexed regions. | ||
|
@@ -213,56 +213,40 @@ class ShenandoahRegionPartitions { | |
|
||
inline size_t capacity_of(ShenandoahFreeSetPartitionId which_partition) const { | ||
assert (which_partition < NumPartitions, "selected free set must be valid"); | ||
return _capacity[int(which_partition)]; | ||
return Atomic::load(_capacity + int(which_partition)); | ||
} | ||
|
||
inline size_t used_by(ShenandoahFreeSetPartitionId which_partition) const { | ||
assert (which_partition < NumPartitions, "selected free set must be valid"); | ||
return _used[int(which_partition)]; | ||
return Atomic::load(_used + int(which_partition)); | ||
} | ||
|
||
inline size_t available_in(ShenandoahFreeSetPartitionId which_partition) const { | ||
assert (which_partition < NumPartitions, "selected free set must be valid"); | ||
shenandoah_assert_heaplocked(); | ||
assert(_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)], | ||
"Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s", | ||
_available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)], | ||
partition_membership_name(ssize_t(which_partition))); | ||
return _available[int(which_partition)]; | ||
return capacity_of(which_partition) - used_by(which_partition); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can't do this outside of a lock. The value fetched for capacity may be incoherent with the value fetched for used (because each is modified independently). That is why the previous version of the code used the available[] array and computed its value while holding the lock. |
||
} | ||
|
||
// Return available_in assuming caller does not hold the heap lock. In production builds, available is | ||
// returned without acquiring the lock. In debug builds, the global heap lock is acquired in order to | ||
// enforce a consistency assert. | ||
inline size_t available_in_not_locked(ShenandoahFreeSetPartitionId which_partition) const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These changes are beyond the scope of planned topic. I think we need to consider them more carefully. Would prefer not to mix the two. (and I personally believe the original implementation has better performance, but feel free to prove me wrong.) |
||
assert (which_partition < NumPartitions, "selected free set must be valid"); | ||
shenandoah_assert_not_heaplocked(); | ||
#ifdef ASSERT | ||
ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock()); | ||
assert((_available[int(which_partition)] == FreeSetUnderConstruction) || | ||
(_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)]), | ||
"Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s", | ||
_available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)], | ||
partition_membership_name(ssize_t(which_partition))); | ||
#endif | ||
return _available[int(which_partition)]; | ||
return available_in(which_partition); | ||
} | ||
|
||
inline void set_capacity_of(ShenandoahFreeSetPartitionId which_partition, size_t value) { | ||
shenandoah_assert_heaplocked(); | ||
assert (which_partition < NumPartitions, "selected free set must be valid"); | ||
_capacity[int(which_partition)] = value; | ||
_available[int(which_partition)] = value - _used[int(which_partition)]; | ||
Atomic::store(_capacity + int(which_partition), value); | ||
} | ||
|
||
inline void set_used_by(ShenandoahFreeSetPartitionId which_partition, size_t value) { | ||
shenandoah_assert_heaplocked(); | ||
assert (which_partition < NumPartitions, "selected free set must be valid"); | ||
_used[int(which_partition)] = value; | ||
_available[int(which_partition)] = _capacity[int(which_partition)] - value; | ||
Atomic::store(_used + int(which_partition), value); | ||
} | ||
|
||
inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { return _region_counts[int(which_partition)]; } | ||
inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { | ||
assert (which_partition < NumPartitions, "selected free set must be valid"); | ||
return Atomic::load(_region_counts + int(which_partition)); | ||
} | ||
|
||
// Assure leftmost, rightmost, leftmost_empty, and rightmost_empty bounds are valid for all free sets. | ||
// Valid bounds honor all of the following (where max is the number of heap regions): | ||
|
@@ -287,6 +271,28 @@ class ShenandoahRegionPartitions { | |
void assert_bounds() NOT_DEBUG_RETURN; | ||
}; | ||
|
||
#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_AFFINITY ((Thread*)-1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of time to dive deep into this right now. Wonder if it makes sense to randomly generate a hash for each thread and store this into a thread-local field. Might provide "randomness" and locality. |
||
#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF ((Thread*)-2) | ||
// When mutator threads allocate from directly allocatable regions, ideally the allocation should be evenly | ||
// distributed to all the directly allocatable regions, random is the best portable option for this, but with random | ||
// distribution it may worsen memory locality, e.g. two consecutive allocation from same thread are randomly | ||
// distributed to different allocatable regions. ShenandoahDirectlyAllocatableRegionAffinity solves/mitigates | ||
// the memory locality issue. | ||
// The idea and code is borrowed from ZGC's CPU affinity, but with random number instead of CPU id. | ||
class ShenandoahDirectlyAllocatableRegionAffinity : public AllStatic { | ||
struct Affinity { | ||
Thread* _thread; | ||
}; | ||
|
||
static PaddedEnd<Affinity>* _affinity; | ||
static THREAD_LOCAL Thread* _self; | ||
static THREAD_LOCAL uint _index; | ||
static uint index_slow(); | ||
public: | ||
static void initialize(); | ||
static uint index(); | ||
}; | ||
|
||
// Publicly, ShenandoahFreeSet represents memory that is available to mutator threads. The public capacity(), used(), | ||
// and available() methods represent this public notion of memory that is under control of the mutator. Separately, | ||
// ShenandoahFreeSet also represents memory available to garbage collection activities for compaction purposes. | ||
|
@@ -315,6 +321,7 @@ class ShenandoahFreeSet : public CHeapObj<mtGC> { | |
private: | ||
ShenandoahHeap* const _heap; | ||
ShenandoahRegionPartitions _partitions; | ||
ShenandoahHeapRegion** _directly_allocatable_regions; | ||
|
||
HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r); | ||
|
||
|
@@ -410,6 +417,21 @@ class ShenandoahFreeSet : public CHeapObj<mtGC> { | |
// log status, assuming lock has already been acquired by the caller. | ||
void log_status(); | ||
|
||
template<bool IS_TLAB> | ||
HeapWord* par_allocate_in_for_mutator(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region); | ||
|
||
bool try_allocate_directly_allocatable_regions(ShenandoahHeapRegion** shared_region_address[], | ||
ShenandoahHeapRegion* original_shared_regions[], | ||
uint region_count, | ||
ShenandoahAllocRequest &req, | ||
HeapWord* &obj, | ||
bool &in_new_region); | ||
template<bool IS_MUTATOR, bool IS_OLD> | ||
uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty); | ||
|
||
template<typename Iter> | ||
uint iterate_regions_for_alloc(Iter& iterator, ShenandoahHeapRegionBreakableIterClosure* cl); | ||
|
||
public: | ||
static const size_t FreeSetUnderConstruction = ShenandoahRegionPartitions::FreeSetUnderConstruction; | ||
|
||
|
@@ -484,6 +506,14 @@ class ShenandoahFreeSet : public CHeapObj<mtGC> { | |
|
||
HeapWord* allocate(ShenandoahAllocRequest& req, bool& in_new_region); | ||
|
||
HeapWord* allocate_humongous(ShenandoahAllocRequest &req); | ||
|
||
void release_all_directly_allocatable_regions(); | ||
|
||
void release_directly_allocatable_region(ShenandoahHeapRegion *region); | ||
|
||
template<bool IS_TLAB> | ||
HeapWord* par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); | ||
/* | ||
* Internal fragmentation metric: describes how fragmented the heap regions are. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Prefer not to make these volatile, as that imposes a compiler overhead.