openjdk · pengxiaolong · May 21, 2025 · May 21, 2025 · May 21, 2025 · Jun 24, 2025
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
@@ -27,6 +27,7 @@
 #include "gc/shenandoah/shenandoahCollectionSet.hpp"
 #include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
 #include "gc/shenandoah/shenandoahEvacInfo.hpp"
+#include "gc/shenandoah/shenandoahFreeSet.hpp"
 #include "gc/shenandoah/shenandoahGeneration.hpp"
 #include "gc/shenandoah/shenandoahGenerationalHeap.hpp"
 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
@@ -94,6 +95,9 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio
         immediate_regions++;
         immediate_garbage += garbage;
         region->make_trash_immediate();
+        if (region->reserved_for_direct_allocation()) {
+          heap->free_set()->release_directly_allocatable_region(region);
+        }
       } else {
         bool is_candidate;
         // This is our candidate for later consideration.

diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp
@@ -27,6 +27,7 @@
 #include "gc/shared/gcCause.hpp"
 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
 #include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
+#include "gc/shenandoah/shenandoahFreeSet.hpp"
 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
 #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp"
 #include "logging/log.hpp"
@@ -111,6 +112,9 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec
         immediate_regions++;
         immediate_garbage += garbage;
         region->make_trash_immediate();
+        if (region->reserved_for_direct_allocation()) {
+          heap->free_set()->release_directly_allocatable_region(region);
+        }
       } else {
         // This is our candidate for later consideration.
         candidates[cand_idx].set_region_and_garbage(region, garbage);

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp
@@ -27,6 +27,7 @@
 
 #include "gc/shenandoah/shenandoahAgeCensus.hpp"
 #include "gc/shenandoah/shenandoahCollectionSet.hpp"
+#include "gc/shenandoah/shenandoahFreeSet.hpp"
 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
 #include "gc/shenandoah/shenandoahHeapRegionSet.hpp"
@@ -101,6 +102,9 @@ void ShenandoahCollectionSet::add_region(ShenandoahHeapRegion* r) {
     if (ShenandoahHeap::heap()->mode()->is_generational() && r->age() >= ShenandoahGenerationalHeap::heap()->age_census()->tenuring_threshold()) {
       _young_bytes_to_promote += live;
     }
+    if (r->reserved_for_direct_allocation()) {
+      _heap->free_set()->release_directly_allocatable_region(r);
+    }
   } else if (r->is_old()) {
     _old_bytes_to_evacuate += live;
     _old_garbage += garbage;

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp
@@ -29,6 +29,7 @@
 #include "gc/shenandoah/shenandoahHeap.hpp"
 #include "gc/shenandoah/shenandoahHeapRegionSet.hpp"
 #include "gc/shenandoah/shenandoahSimpleBitMap.hpp"
+#include "memory/padded.inline.hpp"
 
 // Each ShenandoahHeapRegion is associated with a ShenandoahFreeSetPartitionId.
 enum class ShenandoahFreeSetPartitionId : uint8_t {
@@ -78,10 +79,9 @@ class ShenandoahRegionPartitions {
   // are denoted in bytes.  Note that some regions that had been assigned to a particular partition at rebuild time
   // may have been retired following the rebuild.  The tallies for these regions are still reflected in _capacity[p]
   // and _used[p], even though the region may have been removed from the free set.
-  size_t _capacity[UIntNumPartitions];
-  size_t _used[UIntNumPartitions];
-  size_t _available[UIntNumPartitions];
-  size_t _region_counts[UIntNumPartitions];
+  size_t volatile _capacity[UIntNumPartitions];
+  size_t volatile _used[UIntNumPartitions];
+  size_t volatile _region_counts[UIntNumPartitions];
 
   // For each partition p, _left_to_right_bias is true iff allocations are normally made from lower indexed regions
   // before higher indexed regions.
@@ -213,56 +213,40 @@ class ShenandoahRegionPartitions {
 
   inline size_t capacity_of(ShenandoahFreeSetPartitionId which_partition) const {
     assert (which_partition < NumPartitions, "selected free set must be valid");
-    return _capacity[int(which_partition)];
+    return Atomic::load(_capacity + int(which_partition));
   }
 
   inline size_t used_by(ShenandoahFreeSetPartitionId which_partition) const {
     assert (which_partition < NumPartitions, "selected free set must be valid");
-    return _used[int(which_partition)];
+    return Atomic::load(_used + int(which_partition));
   }
 
   inline size_t available_in(ShenandoahFreeSetPartitionId which_partition) const {
     assert (which_partition < NumPartitions, "selected free set must be valid");
-    shenandoah_assert_heaplocked();
-    assert(_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)],
-           "Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s",
-           _available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)],
-           partition_membership_name(ssize_t(which_partition)));
-    return _available[int(which_partition)];
+    return capacity_of(which_partition) - used_by(which_partition);
   }
 
   // Return available_in assuming caller does not hold the heap lock.  In production builds, available is
   // returned without acquiring the lock.  In debug builds, the global heap lock is acquired in order to
   // enforce a consistency assert.
   inline size_t available_in_not_locked(ShenandoahFreeSetPartitionId which_partition) const {
-    assert (which_partition < NumPartitions, "selected free set must be valid");
-    shenandoah_assert_not_heaplocked();
-#ifdef ASSERT
-    ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock());
-    assert((_available[int(which_partition)] == FreeSetUnderConstruction) ||
-           (_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)]),
-           "Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s",
-           _available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)],
-           partition_membership_name(ssize_t(which_partition)));
-#endif
-    return _available[int(which_partition)];
+    return available_in(which_partition);
   }
 
   inline void set_capacity_of(ShenandoahFreeSetPartitionId which_partition, size_t value) {
-    shenandoah_assert_heaplocked();
     assert (which_partition < NumPartitions, "selected free set must be valid");
-    _capacity[int(which_partition)] = value;
-    _available[int(which_partition)] = value - _used[int(which_partition)];
+    Atomic::store(_capacity + int(which_partition), value);
   }
 
   inline void set_used_by(ShenandoahFreeSetPartitionId which_partition, size_t value) {
-    shenandoah_assert_heaplocked();
     assert (which_partition < NumPartitions, "selected free set must be valid");
-    _used[int(which_partition)] = value;
-    _available[int(which_partition)] = _capacity[int(which_partition)] - value;
+    Atomic::store(_used + int(which_partition), value);
   }
 
-  inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { return _region_counts[int(which_partition)]; }
+  inline size_t count(ShenandoahFreeSetPartitionId which_partition) const {
+    assert (which_partition < NumPartitions, "selected free set must be valid");
+    return Atomic::load(_region_counts + int(which_partition));
+  }
 
   // Assure leftmost, rightmost, leftmost_empty, and rightmost_empty bounds are valid for all free sets.
   // Valid bounds honor all of the following (where max is the number of heap regions):
@@ -287,6 +271,28 @@ class ShenandoahRegionPartitions {
   void assert_bounds() NOT_DEBUG_RETURN;
 };
 
+#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_AFFINITY ((Thread*)-1)
+#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF     ((Thread*)-2)
+// When mutator threads allocate from directly allocatable regions, ideally the allocation should be evenly
+// distributed to all the directly allocatable regions, random is the best portable option for this, but with random
+// distribution it may worsen memory locality, e.g. two consecutive allocation from same thread are randomly
+// distributed to different allocatable regions. ShenandoahDirectlyAllocatableRegionAffinity solves/mitigates
+// the memory locality issue.
+// The idea and code is borrowed from ZGC's CPU affinity, but with random number instead of CPU id.
+class ShenandoahDirectlyAllocatableRegionAffinity : public AllStatic {
+  struct Affinity {
+    Thread* _thread;
+  };
+
+  static PaddedEnd<Affinity>* _affinity;
+  static THREAD_LOCAL Thread* _self;
+  static THREAD_LOCAL uint    _index;
+  static uint index_slow();
+public:
+  static void initialize();
+  static uint index();
+};
+
 // Publicly, ShenandoahFreeSet represents memory that is available to mutator threads.  The public capacity(), used(),
 // and available() methods represent this public notion of memory that is under control of the mutator.  Separately,
 // ShenandoahFreeSet also represents memory available to garbage collection activities for compaction purposes.
@@ -315,6 +321,7 @@ class ShenandoahFreeSet : public CHeapObj<mtGC> {
 private:
   ShenandoahHeap* const _heap;
   ShenandoahRegionPartitions _partitions;
+  ShenandoahHeapRegion** _directly_allocatable_regions;
 
   HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r);
 
@@ -410,6 +417,21 @@ class ShenandoahFreeSet : public CHeapObj<mtGC> {
   // log status, assuming lock has already been acquired by the caller.
   void log_status();
 
+  template<bool IS_TLAB>
+  HeapWord* par_allocate_in_for_mutator(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region);
+
+  bool try_allocate_directly_allocatable_regions(ShenandoahHeapRegion** shared_region_address[],
+                                                 ShenandoahHeapRegion* original_shared_regions[],
+                                                 uint region_count,
+                                                 ShenandoahAllocRequest &req,
+                                                 HeapWord* &obj,
+                                                 bool &in_new_region);
+  template<bool IS_MUTATOR, bool IS_OLD>
+  uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty);
+
+  template<typename Iter>
+  uint iterate_regions_for_alloc(Iter& iterator, ShenandoahHeapRegionBreakableIterClosure* cl);
+
 public:
   static const size_t FreeSetUnderConstruction = ShenandoahRegionPartitions::FreeSetUnderConstruction;
 
@@ -484,6 +506,14 @@ class ShenandoahFreeSet : public CHeapObj<mtGC> {
 
   HeapWord* allocate(ShenandoahAllocRequest& req, bool& in_new_region);
 
+  HeapWord* allocate_humongous(ShenandoahAllocRequest &req);
+
+  void release_all_directly_allocatable_regions();
+
+  void release_directly_allocatable_region(ShenandoahHeapRegion *region);
+
+  template<bool IS_TLAB>
+  HeapWord* par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region);
   /*
    * Internal fragmentation metric: describes how fragmented the heap regions are.
    *

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
@@ -219,6 +219,8 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) {
     heap->tlabs_retire(ResizeTLAB);
   }
 
+  heap->free_set()->release_all_directly_allocatable_regions();
+
   OrderAccess::fence();
 
   phase1_mark_heap();

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
@@ -565,7 +565,7 @@ size_t ShenandoahGeneration::select_aged_regions(size_t old_available) {
         // old generation.
         HeapWord* tams = ctx->top_at_mark_start(r);
         HeapWord* original_top = r->top();
-        if (!heap->is_concurrent_old_mark_in_progress() && tams == original_top) {
+        if (!heap->is_concurrent_old_mark_in_progress() && tams == original_top && !r->reserved_for_direct_allocation()) {
           // No allocations from this region have been made during concurrent mark. It meets all the criteria
           // for in-place-promotion. Though we only need the value of top when we fill the end of the region,
           // we use this field to indicate that this region should be promoted in place during the evacuation

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -976,7 +976,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {
     }
 
     if (!ShenandoahAllocFailureALot || !should_inject_alloc_failure()) {
-      result = allocate_memory_under_lock(req, in_new_region);
+      result = allocate_memory_for_mutator(req, in_new_region);
     }
 
     // Check that gc overhead is not exceeded.
@@ -1008,7 +1008,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {
       const size_t original_count = shenandoah_policy()->full_gc_count();
       while (result == nullptr && should_retry_allocation(original_count)) {
         control_thread()->handle_alloc_failure(req, true);
-        result = allocate_memory_under_lock(req, in_new_region);
+        result = allocate_memory_for_mutator(req, in_new_region);
       }
       if (result != nullptr) {
         // If our allocation request has been satisfied after it initially failed, we count this as good gc progress
@@ -1062,6 +1062,22 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {
   return result;
 }
 
+HeapWord* ShenandoahHeap::allocate_memory_for_mutator(ShenandoahAllocRequest& req, bool& in_new_region) {
+  assert(req.is_mutator_alloc(), "Sanity");
+  assert(!req.is_old(), "Sanity");
+  shenandoah_assert_not_heaplocked();
+  ShenandoahFreeSet* free_set = ShenandoahHeap::free_set();
+  if (ShenandoahHeapRegion::requires_humongous(req.size())) {
+    in_new_region = true;
+    return free_set->allocate_humongous(req);
+  }
+  if (req.is_lab_alloc()) {
+    return free_set->par_allocate_single_for_mutator<true>(req, in_new_region);
+  } else {
+    return free_set->par_allocate_single_for_mutator<false>(req, in_new_region);
+  }
+}
+
 inline bool ShenandoahHeap::should_retry_allocation(size_t original_full_gc_count) const {
   return shenandoah_policy()->full_gc_count() == original_full_gc_count
       && !shenandoah_policy()->is_at_shutdown();

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@@ -118,6 +118,12 @@ class ShenandoahHeapRegionClosure : public StackObj {
   virtual bool is_thread_safe() { return false; }
 };
 
+class ShenandoahHeapRegionBreakableIterClosure : public StackObj {
+public:
+  // Return true to break the iteration loop.
+  virtual bool heap_region_do(ShenandoahHeapRegion* r) { return false; };
+};
+
 typedef ShenandoahLock    ShenandoahHeapLock;
 typedef ShenandoahLocker  ShenandoahHeapLocker;
 typedef Stack<oop, mtGC>  ShenandoahScanObjectStack;
@@ -691,6 +697,7 @@ class ShenandoahHeap : public CollectedHeap {
 
 private:
   HeapWord* allocate_memory_under_lock(ShenandoahAllocRequest& request, bool& in_new_region);
+  HeapWord* allocate_memory_for_mutator(ShenandoahAllocRequest& request, bool& in_new_region);
   HeapWord* allocate_from_gclab_slow(Thread* thread, size_t size);
   HeapWord* allocate_new_gclab(size_t min_size, size_t word_size, size_t* actual_size);
 

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp
@@ -89,6 +89,7 @@ ShenandoahHeapRegion::ShenandoahHeapRegion(HeapWord* start, size_t index, bool c
     SpaceMangler::mangle_region(MemRegion(_bottom, _end));
   }
   _recycling.unset();
+  _direct_alloc_reserved.unset();
 }
 
 void ShenandoahHeapRegion::report_illegal_transition(const char *method) {
@@ -370,25 +371,25 @@ void ShenandoahHeapRegion::make_committed_bypass() {
 }
 
 void ShenandoahHeapRegion::reset_alloc_metadata() {
-  _tlab_allocs = 0;
-  _gclab_allocs = 0;
-  _plab_allocs = 0;
+  Atomic::store(&_tlab_allocs, size_t(0));
+  Atomic::store(&_gclab_allocs, size_t(0));
+  Atomic::store(&_plab_allocs, size_t(0));
 }
 
 size_t ShenandoahHeapRegion::get_shared_allocs() const {
-  return used() - (_tlab_allocs + _gclab_allocs + _plab_allocs) * HeapWordSize;
+  return used() - (Atomic::load(&_tlab_allocs) + Atomic::load(&_gclab_allocs) + Atomic::load(&_plab_allocs)) * HeapWordSize;
 }
 
 size_t ShenandoahHeapRegion::get_tlab_allocs() const {
-  return _tlab_allocs * HeapWordSize;
+  return Atomic::load(&_tlab_allocs) * HeapWordSize;
 }
 
 size_t ShenandoahHeapRegion::get_gclab_allocs() const {
-  return _gclab_allocs * HeapWordSize;
+  return Atomic::load(&_gclab_allocs) * HeapWordSize;
 }
 
 size_t ShenandoahHeapRegion::get_plab_allocs() const {
-  return _plab_allocs * HeapWordSize;
+  return Atomic::load(&_plab_allocs) * HeapWordSize;
 }
 
 void ShenandoahHeapRegion::set_live_data(size_t s) {
@@ -854,6 +855,8 @@ size_t ShenandoahHeapRegion::pin_count() const {
 }
 
 void ShenandoahHeapRegion::set_affiliation(ShenandoahAffiliation new_affiliation) {
+  assert(new_affiliation != OLD_GENERATION || !reserved_for_direct_allocation(), "Reserved region can't move to old");
+
   ShenandoahHeap* heap = ShenandoahHeap::heap();
 
   ShenandoahAffiliation region_affiliation = heap->region_affiliation(this);