From a063a1c5b9bd281190d8af87ae6c8fa9226acf0a Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 21 May 2025 10:44:02 -0700 Subject: [PATCH 01/44] Add allocate_atomic using CAS to ShenandoahHeapRegion --- .../gc/shenandoah/shenandoahHeapRegion.cpp | 14 +++++----- .../gc/shenandoah/shenandoahHeapRegion.hpp | 19 ++++++++----- .../shenandoahHeapRegion.inline.hpp | 27 ++++++++++++++++--- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp index d00a99ee7289c..47ce48e03f480 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp @@ -370,25 +370,25 @@ void ShenandoahHeapRegion::make_committed_bypass() { } void ShenandoahHeapRegion::reset_alloc_metadata() { - _tlab_allocs = 0; - _gclab_allocs = 0; - _plab_allocs = 0; + Atomic::store(&_tlab_allocs, 0); + Atomic::store(&_gclab_allocs, 0); + Atomic::store(&_plab_allocs, 0); } size_t ShenandoahHeapRegion::get_shared_allocs() const { - return used() - (_tlab_allocs + _gclab_allocs + _plab_allocs) * HeapWordSize; + return used() - (Atomic::load(&_tlab_allocs) + Atomic::load(&_gclab_allocs) + Atomic::load(&_plab_allocs)) * HeapWordSize; } size_t ShenandoahHeapRegion::get_tlab_allocs() const { - return _tlab_allocs * HeapWordSize; + return Atomic::load(&_tlab_allocs) * HeapWordSize; } size_t ShenandoahHeapRegion::get_gclab_allocs() const { - return _gclab_allocs * HeapWordSize; + return Atomic::load(&_gclab_allocs) * HeapWordSize; } size_t ShenandoahHeapRegion::get_plab_allocs() const { - return _plab_allocs * HeapWordSize; + return Atomic::load(&_plab_allocs) * HeapWordSize; } void ShenandoahHeapRegion::set_live_data(size_t s) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp index 4c99364bc6ed4..dfb54f6cf35fa 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp @@ -250,11 +250,11 @@ class ShenandoahHeapRegion { HeapWord* _coalesce_and_fill_boundary; // for old regions not selected as collection set candidates. // Frequently updated fields - HeapWord* _top; + HeapWord* volatile _top; - size_t _tlab_allocs; - size_t _gclab_allocs; - size_t _plab_allocs; + size_t volatile _tlab_allocs; + size_t volatile _gclab_allocs; + size_t volatile _plab_allocs; volatile size_t _live_data; volatile size_t _critical_pins; @@ -366,6 +366,9 @@ class ShenandoahHeapRegion { // Allocation (return nullptr if full) inline HeapWord* allocate(size_t word_size, const ShenandoahAllocRequest& req); + // Atomic allocation using CAS, return nullptr if full or no enough space for the req + inline HeapWord* allocate_atomic(size_t word_size, const ShenandoahAllocRequest &req); + inline void clear_live_data(); void set_live_data(size_t s); @@ -425,8 +428,12 @@ class ShenandoahHeapRegion { // Find humongous start region that this region belongs to ShenandoahHeapRegion* humongous_start_region() const; - HeapWord* top() const { return _top; } - void set_top(HeapWord* v) { _top = v; } + HeapWord* top() const { + return Atomic::load(&_top); + } + void set_top(HeapWord* v) { + Atomic::store(&_top, v); + } HeapWord* new_top() const { return _new_top; } void set_new_top(HeapWord* v) { _new_top = v; } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 0df482c1e2dab..80724dd616785 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -109,6 +109,27 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque } } +HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAllocRequest& req) { + assert(is_object_aligned(size), "alloc size breaks alignment: %zu", size); + assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); + assert(this->is_regular(), "must be a regular region"); + + for (;;) { + HeapWord* obj = top(); + if (pointer_delta(end(), obj) >= size) { + HeapWord* new_top = obj + size; + if (Atomic::cmpxchg(&_top, obj, new_top) == obj) { + adjust_alloc_metadata(req.type(), size); + assert(is_object_aligned(new_top), "new top breaks alignment: " PTR_FORMAT, p2i(new_top)); + assert(is_object_aligned(obj), "obj is not aligned: " PTR_FORMAT, p2i(obj)); + return obj; + } + } else { + return nullptr; + } + } +} + inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t size) { switch (type) { case ShenandoahAllocRequest::_alloc_shared: @@ -116,13 +137,13 @@ inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest:: // Counted implicitly by tlab/gclab allocs break; case ShenandoahAllocRequest::_alloc_tlab: - _tlab_allocs += size; + Atomic::add(&_tlab_allocs, size); break; case ShenandoahAllocRequest::_alloc_gclab: - _gclab_allocs += size; + Atomic::add(&_gclab_allocs, size); break; case ShenandoahAllocRequest::_alloc_plab: - _plab_allocs += size; + Atomic::add(&_plab_allocs, size); break; default: ShouldNotReachHere(); From 66f3919d1b4c31945718494c0ec9d8b960158a2c Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 21 May 2025 10:45:14 -0700 Subject: [PATCH 02/44] Duplicate Z's CPUAffinity in gc shared --- src/hotspot/share/gc/shared/CPUAffinity.cpp | 67 +++++++++++++++++++ src/hotspot/share/gc/shared/CPUAffinity.hpp | 49 ++++++++++++++ .../share/gc/shared/CPUAffinity.inline.hpp | 49 ++++++++++++++ 3 files changed, 165 insertions(+) create mode 100644 src/hotspot/share/gc/shared/CPUAffinity.cpp create mode 100644 src/hotspot/share/gc/shared/CPUAffinity.hpp create mode 100644 src/hotspot/share/gc/shared/CPUAffinity.inline.hpp diff --git a/src/hotspot/share/gc/shared/CPUAffinity.cpp b/src/hotspot/share/gc/shared/CPUAffinity.cpp new file mode 100644 index 0000000000000..c085fe65f1ba2 --- /dev/null +++ b/src/hotspot/share/gc/shared/CPUAffinity.cpp @@ -0,0 +1,67 @@ +/* +* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "gc/shared/gcLogPrecious.hpp" +#include "gc/shared/CPUAffinity.inline.hpp" +#include "memory/padded.inline.hpp" +#include "runtime/javaThread.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" + + +#define UNKNOWN_AFFINITY ((Thread*)-1) +#define UNKNOWN_SELF ((Thread*)-2) + +PaddedEnd* CPUAffinity::_affinity = nullptr; +THREAD_LOCAL Thread* CPUAffinity::_self = UNKNOWN_SELF; +THREAD_LOCAL uint32_t CPUAffinity::_cpu = 0; + +void CPUAffinity::initialize() { + assert(_affinity == nullptr, "Already initialized"); + const uint32_t ncpus = count(); + + _affinity = PaddedArray::create_unfreeable(ncpus); + + for (uint32_t i = 0; i < ncpus; i++) { + _affinity[i]._thread = UNKNOWN_AFFINITY; + } + + log_info_p(gc, init)("CPUs: %u total, %u available", + os::processor_count(), + os::initial_active_processor_count()); +} + +uint32_t CPUAffinity::id_slow() { + // Set current thread + if (_self == UNKNOWN_SELF) { + _self = Thread::current(); + } + + // Set current CPU + _cpu = os::processor_id(); + + // Update affinity table + _affinity[_cpu]._thread = _self; + + return _cpu; +} diff --git a/src/hotspot/share/gc/shared/CPUAffinity.hpp b/src/hotspot/share/gc/shared/CPUAffinity.hpp new file mode 100644 index 0000000000000..cdc2af23778dd --- /dev/null +++ b/src/hotspot/share/gc/shared/CPUAffinity.hpp @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef SHARE_GC_SHARED_CPUAFFINITY_HPP +#define SHARE_GC_SHARED_CPUAFFINITY_HPP + +#include "memory/allStatic.hpp" +#include "memory/padded.hpp" +#include "utilities/globalDefinitions.hpp" + +class Thread; + +class CPUAffinity : public AllStatic { + struct Affinity { + Thread* _thread; + }; + static PaddedEnd* _affinity; + static THREAD_LOCAL Thread* _self; + static THREAD_LOCAL uint32_t _cpu; + + static uint32_t id_slow(); +public: + static void initialize(); + + static uint32_t count(); + static uint32_t id(); +}; + +#endif diff --git a/src/hotspot/share/gc/shared/CPUAffinity.inline.hpp b/src/hotspot/share/gc/shared/CPUAffinity.inline.hpp new file mode 100644 index 0000000000000..6f1a59e65418b --- /dev/null +++ b/src/hotspot/share/gc/shared/CPUAffinity.inline.hpp @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef SHARE_GC_SHARED_CPUAFFINITY_INLINE_HPP +#define SHARE_GC_SHARED_CPUAFFINITY_INLINE_HPP + +#include "gc/shared/CPUAffinity.hpp" + +#include "runtime/os.hpp" +#include "utilities/debug.hpp" + +inline uint32_t CPUAffinity::count() { + return (uint32_t)os::processor_count(); +} + +inline uint32_t CPUAffinity::id() { + assert(_affinity != nullptr, "Not initialized"); + + // Fast path + if (_affinity[_cpu]._thread == _self) { + return _cpu; + } + + // Slow path + return id_slow(); +} + + +#endif \ No newline at end of file From 90f21c7c706d6154946332d3718ec1764ffa5da5 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 21 May 2025 10:52:41 -0700 Subject: [PATCH 03/44] Touch up --- src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp | 6 +++--- src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp index 47ce48e03f480..6fe2ef415fdc9 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp @@ -370,9 +370,9 @@ void ShenandoahHeapRegion::make_committed_bypass() { } void ShenandoahHeapRegion::reset_alloc_metadata() { - Atomic::store(&_tlab_allocs, 0); - Atomic::store(&_gclab_allocs, 0); - Atomic::store(&_plab_allocs, 0); + Atomic::store(&_tlab_allocs, 0ul); + Atomic::store(&_gclab_allocs, 0ul); + Atomic::store(&_plab_allocs, 0ul); } size_t ShenandoahHeapRegion::get_shared_allocs() const { diff --git a/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp b/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp index a245f91fa71e9..069daa4990ee6 100644 --- a/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp +++ b/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp @@ -42,7 +42,7 @@ volatile_nonstatic_field(ShenandoahHeapRegion, _state, ShenandoahHeapRegion::RegionState) \ nonstatic_field(ShenandoahHeapRegion, _index, size_t const) \ nonstatic_field(ShenandoahHeapRegion, _bottom, HeapWord* const) \ - nonstatic_field(ShenandoahHeapRegion, _top, HeapWord*) \ + volatile_nonstatic_field(ShenandoahHeapRegion, _top, HeapWord*) \ nonstatic_field(ShenandoahHeapRegion, _end, HeapWord* const) \ #define VM_INT_CONSTANTS_SHENANDOAH(declare_constant, declare_constant_with_value) \ From cd19779f24c4a1075cb884380a51a1821726a20a Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 24 Jun 2025 14:38:19 -0700 Subject: [PATCH 04/44] cas_alloc --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 68 +++++++++++++++++++ .../share/gc/shenandoah/shenandoahFreeSet.hpp | 9 +++ .../share/gc/shenandoah/shenandoahHeap.cpp | 4 ++ .../share/gc/shenandoah/shenandoahHeap.hpp | 1 + .../gc/shenandoah/shenandoahHeapRegion.hpp | 6 +- .../shenandoahHeapRegion.inline.hpp | 39 +++++++++-- 6 files changed, 122 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 1acb6a23e7a4c..e3c2a4d386ae4 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -751,6 +751,7 @@ ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _alloc_bias_weight(0) { clear_internal(); + _directly_allocatable_regions = NEW_C_HEAP_ARRAY(ShenandoahHeapRegion*, 13, mtGC); } void ShenandoahFreeSet::add_promoted_in_place_region_to_old_collector(ShenandoahHeapRegion* region) { @@ -2080,6 +2081,73 @@ HeapWord* ShenandoahFreeSet::allocate(ShenandoahAllocRequest& req, bool& in_new_ } } +template +HeapWord* ShenandoahFreeSet::cas_allocate_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { + shenandoah_assert_not_heaplocked(); + assert(req.is_mutator_alloc(), "Must be mutator allocation"); + assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); + + int seed = os::current_process_id(); + idx_t idx = seed % 13; + HeapWord* obj = nullptr; + size_t actual_size = req.size(); + int attempts = 0; + for (;;) { + attempts ++; + ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); + if (r != nullptr) { + if (IS_TLAB) { + obj = r->allocate_lab_atomic(req, actual_size); + } else { + obj = r->allocate_atomic(req.size(), req); + } + if (obj != nullptr) { + assert(actual_size > 0, "Must be"); + req.set_actual_size(actual_size); + if (pointer_delta(r->bottom(), obj) == actual_size) { + // Set to true if it is the first object/tlab allocated in the region. + in_new_region = true; + } + return obj; + } + } + idx = (idx + seed) % 13; + } +} + +ShenandoahHeapRegion* ShenandoahFreeSet::allocate_new_shared_region( + ShenandoahHeapRegion** shared_region, + ShenandoahHeapRegion* original_shared_region) { + assert(Thread::current()->is_Java_thread(), "Must be mutator"); + { + ShenandoahHeapLocker locker(_heap->lock()); + if (_partitions.is_empty(ShenandoahFreeSetPartitionId::Mutator)) { + return Atomic::load_acquire(shared_region); + } else { + if (_partitions.alloc_from_left_bias(ShenandoahFreeSetPartitionId::Mutator)) { + ShenandoahLeftRightIterator iterator(&_partitions, ShenandoahFreeSetPartitionId::Mutator, true); + try_allocate_new_shared_region(shared_region, original_shared_region, iterator); + } else { + ShenandoahRightLeftIterator iterator(&_partitions, ShenandoahFreeSetPartitionId::Mutator, true); + try_allocate_new_shared_region(shared_region, original_shared_region, iterator); + } + } + } +} + +template +ShenandoahHeapRegion* ShenandoahFreeSet::try_allocate_new_shared_region(ShenandoahHeapRegion** shared_region, ShenandoahHeapRegion* original_shared_region, Iter iterator) { + for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { + ShenandoahHeapRegion* r = _heap->get_region(idx); + if (r->is_trash()) { + if (_heap->is_concurrent_weak_root_in_progress()) continue; + r->try_recycle_under_lock(); + } + if (r->is_empty()) { + } + } +} + void ShenandoahFreeSet::print_on(outputStream* out) const { out->print_cr("Mutator Free Set: %zu", _partitions.count(ShenandoahFreeSetPartitionId::Mutator)); ShenandoahLeftRightIterator mutator(const_cast(&_partitions), ShenandoahFreeSetPartitionId::Mutator); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 55f23480618b9..de93bad3cdd37 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -315,6 +315,7 @@ class ShenandoahFreeSet : public CHeapObj { private: ShenandoahHeap* const _heap; ShenandoahRegionPartitions _partitions; + ShenandoahHeapRegion** _directly_allocatable_regions = nullptr; HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r); @@ -410,6 +411,11 @@ class ShenandoahFreeSet : public CHeapObj { // log status, assuming lock has already been acquired by the caller. void log_status(); + ShenandoahHeapRegion* allocate_new_shared_region(ShenandoahHeapRegion** shared_region, ShenandoahHeapRegion* original_shared_region); + + template + ShenandoahHeapRegion* try_allocate_new_shared_region(ShenandoahHeapRegion** shared_region, ShenandoahHeapRegion* original_shared_region, Iter iterator); + public: static const size_t FreeSetUnderConstruction = ShenandoahRegionPartitions::FreeSetUnderConstruction; @@ -484,6 +490,9 @@ class ShenandoahFreeSet : public CHeapObj { HeapWord* allocate(ShenandoahAllocRequest& req, bool& in_new_region); + template + HeapWord* cas_allocate_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); + /* * Internal fragmentation metric: describes how fragmented the heap regions are. * diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 55dfb2e8de400..95ab690da65ec 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -1062,6 +1062,10 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { return result; } +HeapWord* ShenandoahHeap::allocate_memory_for_mutator(ShenandoahAllocRequest& req) { + assert(req.is_mutator_alloc(), "Sanity"); +} + inline bool ShenandoahHeap::should_retry_allocation(size_t original_full_gc_count) const { return shenandoah_policy()->full_gc_count() == original_full_gc_count && !shenandoah_policy()->is_at_shutdown(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp index 4124bf8be7f5a..f02eefb21431e 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp @@ -699,6 +699,7 @@ class ShenandoahHeap : public CollectedHeap { public: HeapWord* allocate_memory(ShenandoahAllocRequest& request); + HeapWord* allocate_memory_for_mutator(ShenandoahAllocRequest& request); HeapWord* mem_allocate(size_t size, bool* what) override; MetaWord* satisfy_failed_metadata_allocation(ClassLoaderData* loader_data, size_t size, diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp index dfb54f6cf35fa..c918cc390d123 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp @@ -367,7 +367,11 @@ class ShenandoahHeapRegion { inline HeapWord* allocate(size_t word_size, const ShenandoahAllocRequest& req); // Atomic allocation using CAS, return nullptr if full or no enough space for the req - inline HeapWord* allocate_atomic(size_t word_size, const ShenandoahAllocRequest &req); + inline HeapWord* allocate_atomic(size_t word_size, const ShenandoahAllocRequest &req, size_t &actual_size); + + inline HeapWord* allocate_lab_atomic(const ShenandoahAllocRequest &req, size_t &actual_size); + + inline bool try_allocate(HeapWord* const obj, size_t const size); inline void clear_live_data(); void set_live_data(size_t s); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 80724dd616785..468bc4d9a6c30 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -117,11 +117,8 @@ HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAll for (;;) { HeapWord* obj = top(); if (pointer_delta(end(), obj) >= size) { - HeapWord* new_top = obj + size; - if (Atomic::cmpxchg(&_top, obj, new_top) == obj) { + if (try_allocate(obj, size)) { adjust_alloc_metadata(req.type(), size); - assert(is_object_aligned(new_top), "new top breaks alignment: " PTR_FORMAT, p2i(new_top)); - assert(is_object_aligned(obj), "obj is not aligned: " PTR_FORMAT, p2i(obj)); return obj; } } else { @@ -130,6 +127,40 @@ HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAll } } +HeapWord* ShenandoahHeapRegion::allocate_lab_atomic(const ShenandoahAllocRequest& req, size_t &actual_size) { + assert(req.type() == _tlab_allocs || req.type() == _gclab_allocs, "Only allow tlab or gclab"); + assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); + assert(this->is_regular(), "must be a regular region"); + size_t adjusted_size = req.size(); + for (;;) { + HeapWord* obj = top(); + size_t free_words = align_down(byte_size(obj, end()) >> LogHeapWordSize, MinObjAlignment); + if (adjusted_size > free_words) { + adjusted_size = free_words; + } + if (adjusted_size >= req.min_size()) { + if (try_allocate(obj, adjusted_size)) { + actual_size = adjusted_size; + adjust_alloc_metadata(req.type(), adjusted_size); + return obj; + } + } else { + log_trace(gc, free)("Failed to shrink TLAB or GCLAB request (%zu) in region %zu to %zu" + " because min_size() is %zu", req.size(), index(), adjusted_size, req.min_size()); + } + } +} + +bool ShenandoahHeapRegion::try_allocate(HeapWord* const obj, size_t const size) { + HeapWord* new_top = obj + size; + if (Atomic::cmpxchg(&_top, obj, new_top) == obj) { + assert(is_object_aligned(new_top), "new top breaks alignment: " PTR_FORMAT, p2i(new_top)); + assert(is_object_aligned(obj), "obj is not aligned: " PTR_FORMAT, p2i(obj)); + return true; + } + return false; +} + inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t size) { switch (type) { case ShenandoahAllocRequest::_alloc_shared: From 5a6bc1c50f9851ab1c9f00dc71093564e29c733e Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 1 Jul 2025 02:22:35 -0700 Subject: [PATCH 05/44] CAS allocation for mutators --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 161 +++++++++++++----- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 12 +- .../share/gc/shenandoah/shenandoahHeap.cpp | 18 +- .../share/gc/shenandoah/shenandoahHeap.hpp | 8 +- .../gc/shenandoah/shenandoahHeapRegion.cpp | 1 + .../gc/shenandoah/shenandoahHeapRegion.hpp | 18 +- .../gc/shenandoah/shenandoah_globals.hpp | 4 + 7 files changed, 171 insertions(+), 51 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index e3c2a4d386ae4..a5ec29a574d1c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2081,71 +2081,148 @@ HeapWord* ShenandoahFreeSet::allocate(ShenandoahAllocRequest& req, bool& in_new_ } } +HeapWord* ShenandoahFreeSet::allocate_humongous(ShenandoahAllocRequest& req) { + assert(ShenandoahHeapRegion::requires_humongous(req.size()), "Must be humongous alloc"); + ShenandoahHeapLocker locker(_heap->lock()); + return allocate_contiguous(req); +} + template -HeapWord* ShenandoahFreeSet::cas_allocate_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { +HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { shenandoah_assert_not_heaplocked(); assert(req.is_mutator_alloc(), "Must be mutator allocation"); + assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); - int seed = os::current_process_id(); - idx_t idx = seed % 13; - HeapWord* obj = nullptr; - size_t actual_size = req.size(); - int attempts = 0; + uint process_id = static_cast(os::current_process_id()); + constexpr uint max_probes = 3u; for (;;) { - attempts ++; - ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); - if (r != nullptr) { - if (IS_TLAB) { - obj = r->allocate_lab_atomic(req, actual_size); - } else { - obj = r->allocate_atomic(req.size(), req); - } - if (obj != nullptr) { - assert(actual_size > 0, "Must be"); - req.set_actual_size(actual_size); - if (pointer_delta(r->bottom(), obj) == actual_size) { - // Set to true if it is the first object/tlab allocated in the region. - in_new_region = true; + uint idx = process_id % ShenandoahDirectlyAllocatableRegionCount; + ShenandoahHeapRegion* probed_regions[max_probes]; + uint probed_indexes[max_probes]; + HeapWord* obj = nullptr; + size_t actual_size = req.size(); + for (uint i = 0u; i < max_probes; i++) { + ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); + if (r != nullptr) { + if (IS_TLAB) { + obj = r->allocate_lab_atomic(req, actual_size); + } else { + obj = r->allocate_atomic(req.size(), req); + } + if (obj != nullptr) { + assert(actual_size > 0, "Must be"); + req.set_actual_size(actual_size); + if (pointer_delta(r->bottom(), obj) == actual_size) { + // Set to true if it is the first object/tlab allocated in the region. + in_new_region = true; + } + return obj; } - return obj; } + probed_indexes[i] = idx; + probed_regions[i] = r; + idx = (++idx) % ShenandoahDirectlyAllocatableRegionCount; + } + // Failed to allocate in 3 consecutive directly allocatable regions. + // Try to retire the region if the free size is less than minimal tlab size and try to replace with a new region. + if (!try_refill_directly_allocatable_regions(max_probes, probed_indexes, probed_regions)) { + return nullptr; } - idx = (idx + seed) % 13; } } -ShenandoahHeapRegion* ShenandoahFreeSet::allocate_new_shared_region( - ShenandoahHeapRegion** shared_region, - ShenandoahHeapRegion* original_shared_region) { +// Explicit specializations +template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); +template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); + + +class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreakableIterClosure { +public: + ShenandoahHeapRegion** *_regions_to_refill; + uint _refill_count; + uint _refilled_count = 0u; + + RefillDirectlyAllocatableRegionClosure(ShenandoahHeapRegion** *regions_to_refill, uint refill_count) : + _regions_to_refill(regions_to_refill), _refill_count(refill_count), _refilled_count(0) {}; + + bool heap_region_do(ShenandoahHeapRegion *r) override { + if (r->is_empty() && !r->reserved_for_direct_allocation()) { + if (ShenandoahHeap::heap()->is_concurrent_weak_root_in_progress() && r->is_trash()) return false; + if (r->is_trash()) { + r->try_recycle_under_lock(); + } + r->reserve_for_direct_allocation(); + Atomic::store(_regions_to_refill[_refilled_count], r); + _refilled_count++; + return _refilled_count == _refill_count; + } + return false; + } +}; + +bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_region_count, + uint probed_indexes[], + ShenandoahHeapRegion* probed_regions[] + ) { assert(Thread::current()->is_Java_thread(), "Must be mutator"); - { - ShenandoahHeapLocker locker(_heap->lock()); - if (_partitions.is_empty(ShenandoahFreeSetPartitionId::Mutator)) { - return Atomic::load_acquire(shared_region); - } else { - if (_partitions.alloc_from_left_bias(ShenandoahFreeSetPartitionId::Mutator)) { - ShenandoahLeftRightIterator iterator(&_partitions, ShenandoahFreeSetPartitionId::Mutator, true); - try_allocate_new_shared_region(shared_region, original_shared_region, iterator); - } else { - ShenandoahRightLeftIterator iterator(&_partitions, ShenandoahFreeSetPartitionId::Mutator, true); - try_allocate_new_shared_region(shared_region, original_shared_region, iterator); + assert(probed_region_count > 0u && probed_region_count <= ShenandoahDirectlyAllocatableRegionCount, "Must be"); + shenandoah_assert_not_heaplocked(); + + ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); + ShenandoahHeapRegion** regions_to_refill[probed_region_count]; + uint refill_count = 0u; + uint regions_refilled_by_others = 0u; + for (uint i = 0u; i < probed_region_count; i++) { + const ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + probed_indexes[i]); + if (r == nullptr || r == probed_regions[i]) { + if (r == nullptr) { + regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; + } else if (r->free() < PLAB::min_size()) { + _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); + regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; + Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); } + } else { + regions_refilled_by_others++; } } + + RefillDirectlyAllocatableRegionClosure cl(regions_to_refill, refill_count); + if (refill_count > 0u) { + iterate_regions_for_alloc(&cl, true); + } + return cl._refilled_count > 0u || regions_refilled_by_others > 0u;; +} + +template +uint ShenandoahFreeSet::iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty) { + assert((IS_MUTATOR && !IS_OLD) || !IS_MUTATOR, "Sanity check"); + ShenandoahFreeSetPartitionId partition = IS_MUTATOR ? ShenandoahFreeSetPartitionId::Mutator : + (IS_OLD ? ShenandoahFreeSetPartitionId::OldCollector : ShenandoahFreeSetPartitionId::Mutator); + if (_partitions.is_empty(partition)) { + return 0u; + } + if (_partitions.alloc_from_left_bias(partition)) { + ShenandoahLeftRightIterator iterator(&_partitions, partition, use_empty); + return iterate_regions_for_alloc(iterator, cl); + } else { + ShenandoahRightLeftIterator iterator(&_partitions, partition, use_empty); + return iterate_regions_for_alloc(iterator, cl); + } } template -ShenandoahHeapRegion* ShenandoahFreeSet::try_allocate_new_shared_region(ShenandoahHeapRegion** shared_region, ShenandoahHeapRegion* original_shared_region, Iter iterator) { +uint ShenandoahFreeSet::iterate_regions_for_alloc(Iter& iterator, ShenandoahHeapRegionBreakableIterClosure* cl) { + uint regions_iterated = 0u; for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { + regions_iterated++; ShenandoahHeapRegion* r = _heap->get_region(idx); - if (r->is_trash()) { - if (_heap->is_concurrent_weak_root_in_progress()) continue; - r->try_recycle_under_lock(); - } - if (r->is_empty()) { + if (cl->heap_region_do(r)) { + break; } } + return regions_iterated; } void ShenandoahFreeSet::print_on(outputStream* out) const { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index de93bad3cdd37..9119c48bb530c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -411,10 +411,13 @@ class ShenandoahFreeSet : public CHeapObj { // log status, assuming lock has already been acquired by the caller. void log_status(); - ShenandoahHeapRegion* allocate_new_shared_region(ShenandoahHeapRegion** shared_region, ShenandoahHeapRegion* original_shared_region); + bool try_refill_directly_allocatable_regions(uint probed_region_count, uint probed_indexes[], ShenandoahHeapRegion* probed_regions[]); + + template + uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty); template - ShenandoahHeapRegion* try_allocate_new_shared_region(ShenandoahHeapRegion** shared_region, ShenandoahHeapRegion* original_shared_region, Iter iterator); + uint iterate_regions_for_alloc(Iter& iterator, ShenandoahHeapRegionBreakableIterClosure* cl); public: static const size_t FreeSetUnderConstruction = ShenandoahRegionPartitions::FreeSetUnderConstruction; @@ -490,9 +493,10 @@ class ShenandoahFreeSet : public CHeapObj { HeapWord* allocate(ShenandoahAllocRequest& req, bool& in_new_region); - template - HeapWord* cas_allocate_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); + HeapWord* allocate_humongous(ShenandoahAllocRequest &req); + template + HeapWord* par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); /* * Internal fragmentation metric: describes how fragmented the heap regions are. * diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 95ab690da65ec..b8051eea11af4 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -976,7 +976,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { } if (!ShenandoahAllocFailureALot || !should_inject_alloc_failure()) { - result = allocate_memory_under_lock(req, in_new_region); + result = allocate_memory_for_mutator(req, in_new_region); } // Check that gc overhead is not exceeded. @@ -1008,7 +1008,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { const size_t original_count = shenandoah_policy()->full_gc_count(); while (result == nullptr && should_retry_allocation(original_count)) { control_thread()->handle_alloc_failure(req, true); - result = allocate_memory_under_lock(req, in_new_region); + result = allocate_memory_for_mutator(req, in_new_region); } if (result != nullptr) { // If our allocation request has been satisfied after it initially failed, we count this as good gc progress @@ -1062,8 +1062,20 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { return result; } -HeapWord* ShenandoahHeap::allocate_memory_for_mutator(ShenandoahAllocRequest& req) { +HeapWord* ShenandoahHeap::allocate_memory_for_mutator(ShenandoahAllocRequest& req, bool& in_new_region) { assert(req.is_mutator_alloc(), "Sanity"); + assert(!req.is_old(), "Sanity"); + shenandoah_assert_not_heaplocked(); + ShenandoahFreeSet* free_set = ShenandoahHeap::free_set(); + if (ShenandoahHeapRegion::requires_humongous(req.size())) { + in_new_region = true; + return free_set->allocate_humongous(req); + } + if (req.is_lab_alloc()) { + return free_set->par_allocate_single_for_mutator(req, in_new_region); + } else { + return free_set->par_allocate_single_for_mutator(req, in_new_region); + } } inline bool ShenandoahHeap::should_retry_allocation(size_t original_full_gc_count) const { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp index f02eefb21431e..d50c76cab12e5 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp @@ -118,6 +118,12 @@ class ShenandoahHeapRegionClosure : public StackObj { virtual bool is_thread_safe() { return false; } }; +class ShenandoahHeapRegionBreakableIterClosure : public StackObj { +public: + // Return true to break the iteration loop. + virtual bool heap_region_do(ShenandoahHeapRegion* r) { return false; }; +}; + typedef ShenandoahLock ShenandoahHeapLock; typedef ShenandoahLocker ShenandoahHeapLocker; typedef Stack ShenandoahScanObjectStack; @@ -691,6 +697,7 @@ class ShenandoahHeap : public CollectedHeap { private: HeapWord* allocate_memory_under_lock(ShenandoahAllocRequest& request, bool& in_new_region); + HeapWord* allocate_memory_for_mutator(ShenandoahAllocRequest& request, bool& in_new_region); HeapWord* allocate_from_gclab_slow(Thread* thread, size_t size); HeapWord* allocate_new_gclab(size_t min_size, size_t word_size, size_t* actual_size); @@ -699,7 +706,6 @@ class ShenandoahHeap : public CollectedHeap { public: HeapWord* allocate_memory(ShenandoahAllocRequest& request); - HeapWord* allocate_memory_for_mutator(ShenandoahAllocRequest& request); HeapWord* mem_allocate(size_t size, bool* what) override; MetaWord* satisfy_failed_metadata_allocation(ClassLoaderData* loader_data, size_t size, diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp index 6fe2ef415fdc9..80f36ad9330a9 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp @@ -89,6 +89,7 @@ ShenandoahHeapRegion::ShenandoahHeapRegion(HeapWord* start, size_t index, bool c SpaceMangler::mangle_region(MemRegion(_bottom, _end)); } _recycling.unset(); + _direct_alloc_reserved.unset(); } void ShenandoahHeapRegion::report_illegal_transition(const char *method) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp index c918cc390d123..0908de3f9165f 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp @@ -268,6 +268,8 @@ class ShenandoahHeapRegion { bool _needs_bitmap_reset; + ShenandoahSharedFlag _direct_alloc_reserved; // Flag to indicate that whether the region is reserved for lock-free direct allocation + public: ShenandoahHeapRegion(HeapWord* start, size_t index, bool committed); @@ -367,7 +369,7 @@ class ShenandoahHeapRegion { inline HeapWord* allocate(size_t word_size, const ShenandoahAllocRequest& req); // Atomic allocation using CAS, return nullptr if full or no enough space for the req - inline HeapWord* allocate_atomic(size_t word_size, const ShenandoahAllocRequest &req, size_t &actual_size); + inline HeapWord* allocate_atomic(size_t word_size, const ShenandoahAllocRequest &req); inline HeapWord* allocate_lab_atomic(const ShenandoahAllocRequest &req, size_t &actual_size); @@ -502,6 +504,20 @@ class ShenandoahHeapRegion { _needs_bitmap_reset = false; } + inline void reserve_for_direct_allocation() { + assert(_direct_alloc_reserved.is_unset(), "Must be"); + _direct_alloc_reserved.set(); + } + + inline void release_from_direct_allocation() { + assert(_direct_alloc_reserved.is_set(), "Must be"); + _direct_alloc_reserved.unset(); + } + + inline bool reserved_for_direct_allocation() const { + return _direct_alloc_reserved.is_set(); + } + private: void decrement_humongous_waste() const; void do_commit(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp index ad0beeafed79e..1a84f595cdd0b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp @@ -564,6 +564,10 @@ "to prevent starvation of the old collector. Setting this to " \ "0 will allow back to back young collections to run during old " \ "collections.") \ + product(uintx, ShenandoahDirectlyAllocatableRegionCount, 7, EXPERIMENTAL, \ + "Number of regions Shenandoah will pre-allocate for " \ + "direct allocation with CAS, the values should less than " \ + "number of CPU cores. Ideally it should be a prime number. ") \ // end of GC_SHENANDOAH_FLAGS #endif // SHARE_GC_SHENANDOAH_SHENANDOAH_GLOBALS_HPP From 2da482106e6542a60445a2a59418f5801369ee48 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 1 Jul 2025 02:35:24 -0700 Subject: [PATCH 06/44] Update allocation bias --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index a5ec29a574d1c..b7e8b5633dea1 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2174,12 +2174,13 @@ bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_regi uint refill_count = 0u; uint regions_refilled_by_others = 0u; for (uint i = 0u; i < probed_region_count; i++) { - const ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + probed_indexes[i]); + ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + probed_indexes[i]); if (r == nullptr || r == probed_regions[i]) { if (r == nullptr) { regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; } else if (r->free() < PLAB::min_size()) { _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); + r->release_from_direct_allocation(); regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); } @@ -2203,6 +2204,9 @@ uint ShenandoahFreeSet::iterate_regions_for_alloc(ShenandoahHeapRegionBreakableI if (_partitions.is_empty(partition)) { return 0u; } + if (IS_MUTATOR) { + update_allocation_bias(); + } if (_partitions.alloc_from_left_bias(partition)) { ShenandoahLeftRightIterator iterator(&_partitions, partition, use_empty); return iterate_regions_for_alloc(iterator, cl); From 5d0d37f0083a96b7c10fd1260a80987fce83776e Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 1 Jul 2025 03:02:30 -0700 Subject: [PATCH 07/44] Humongous allocation and GC shall not use regions reserved for direct allocation --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index b7e8b5633dea1..e3d95a89593e4 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -949,7 +949,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_from_mutator(ShenandoahAllocRequest& r ShenandoahRightLeftIterator iterator(&_partitions, ShenandoahFreeSetPartitionId::Mutator, true); for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { ShenandoahHeapRegion* r = _heap->get_region(idx); - if (can_allocate_from(r)) { + if (can_allocate_from(r) && !r->reserved_for_direct_allocation()) { if (req.is_old()) { if (!flip_to_old_gc(r)) { continue; @@ -1203,7 +1203,8 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { // We've confirmed num contiguous regions belonging to Mutator partition, so no need to confirm membership. // If region is not completely free, the current [beg; end] is useless, and we may fast-forward. If we can extend // the existing range, we can exploit that certain regions are already known to be in the Mutator free set. - while (!can_allocate_from(_heap->get_region(end))) { + ShenandoahHeapRegion* region = _heap->get_region(end); + while (!can_allocate_from(region) || region->reserved_for_direct_allocation()) { // region[end] is not empty, so we restart our search after region[end] idx_t slide_delta = end + 1 - beg; if (beg + slide_delta > last_possible_start) { @@ -1234,6 +1235,7 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { } end++; + region = _heap->get_region(end); } size_t remainder = words_size & ShenandoahHeapRegion::region_size_words_mask(); From c7ef2ec1c266c8f3a68def0cbba2bde2aecefcdc Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 1 Jul 2025 04:39:31 -0700 Subject: [PATCH 08/44] Bug fix --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 8 +++++++- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp | 2 +- .../share/gc/shenandoah/shenandoahHeapRegion.inline.hpp | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index e3d95a89593e4..dc56cb1a328d8 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -751,7 +751,10 @@ ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _alloc_bias_weight(0) { clear_internal(); - _directly_allocatable_regions = NEW_C_HEAP_ARRAY(ShenandoahHeapRegion*, 13, mtGC); + _directly_allocatable_regions = NEW_C_HEAP_ARRAY(ShenandoahHeapRegion*, ShenandoahDirectlyAllocatableRegionCount, mtGC); + for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + _directly_allocatable_regions[i] = nullptr; + } } void ShenandoahFreeSet::add_promoted_in_place_region_to_old_collector(ShenandoahHeapRegion* region) { @@ -2106,6 +2109,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ size_t actual_size = req.size(); for (uint i = 0u; i < max_probes; i++) { ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); + log_info(gc)("Cas alloc from region " PTR_FORMAT "", p2i(r)); if (r != nullptr) { if (IS_TLAB) { obj = r->allocate_lab_atomic(req, actual_size); @@ -2155,6 +2159,8 @@ class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreak r->try_recycle_under_lock(); } r->reserve_for_direct_allocation(); + r->set_affiliation(YOUNG_GENERATION); + r->make_regular_allocation(YOUNG_GENERATION); Atomic::store(_regions_to_refill[_refilled_count], r); _refilled_count++; return _refilled_count == _refill_count; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 9119c48bb530c..562c39f9d0273 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -315,7 +315,7 @@ class ShenandoahFreeSet : public CHeapObj { private: ShenandoahHeap* const _heap; ShenandoahRegionPartitions _partitions; - ShenandoahHeapRegion** _directly_allocatable_regions = nullptr; + ShenandoahHeapRegion** _directly_allocatable_regions; HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 468bc4d9a6c30..0e7ec61087304 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -128,7 +128,7 @@ HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAll } HeapWord* ShenandoahHeapRegion::allocate_lab_atomic(const ShenandoahAllocRequest& req, size_t &actual_size) { - assert(req.type() == _tlab_allocs || req.type() == _gclab_allocs, "Only allow tlab or gclab"); + assert(req.is_lab_alloc(), "Only lab alloc"); assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); assert(this->is_regular(), "must be a regular region"); size_t adjusted_size = req.size(); From 8237eb60f0d1a19a2eead60375c55ae2262303fd Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 1 Jul 2025 04:53:06 -0700 Subject: [PATCH 09/44] Bug fix --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 2 ++ src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index dc56cb1a328d8..4825dc625ed15 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2123,6 +2123,8 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ // Set to true if it is the first object/tlab allocated in the region. in_new_region = true; } + assert(req.is_young(), "Mutator allocations always come from young generation."); + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); return obj; } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 0e7ec61087304..49f28335c4d45 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -147,6 +147,7 @@ HeapWord* ShenandoahHeapRegion::allocate_lab_atomic(const ShenandoahAllocRequest } else { log_trace(gc, free)("Failed to shrink TLAB or GCLAB request (%zu) in region %zu to %zu" " because min_size() is %zu", req.size(), index(), adjusted_size, req.min_size()); + return nullptr; } } } From 11da608c25d41c5698380a8451ade5ad3d059bde Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 1 Jul 2025 05:40:05 -0700 Subject: [PATCH 10/44] increase_used needs to be called with heap lock --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 4825dc625ed15..35311e825063b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2109,7 +2109,6 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ size_t actual_size = req.size(); for (uint i = 0u; i < max_probes; i++) { ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); - log_info(gc)("Cas alloc from region " PTR_FORMAT "", p2i(r)); if (r != nullptr) { if (IS_TLAB) { obj = r->allocate_lab_atomic(req, actual_size); @@ -2119,12 +2118,15 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ if (obj != nullptr) { assert(actual_size > 0, "Must be"); req.set_actual_size(actual_size); - if (pointer_delta(r->bottom(), obj) == actual_size) { + if (pointer_delta(obj, r->bottom()) == actual_size) { // Set to true if it is the first object/tlab allocated in the region. in_new_region = true; } assert(req.is_young(), "Mutator allocations always come from young generation."); - _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + { + ShenandoahHeapLocker locker(_heap->lock()); + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + } return obj; } } From 60e75f23e3003d5938a643530bf21370894b348c Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 1 Jul 2025 14:57:53 -0700 Subject: [PATCH 11/44] Fix errors under race conditions --- .../shenandoahGenerationalHeuristics.cpp | 3 + .../heuristics/shenandoahHeuristics.cpp | 3 + .../gc/shenandoah/shenandoahCollectionSet.cpp | 3 + .../share/gc/shenandoah/shenandoahFreeSet.cpp | 55 ++++++++++++++----- .../shenandoahHeapRegion.inline.hpp | 6 ++ 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp index 08fd45993462b..700dc8ac4e272 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp @@ -94,6 +94,9 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio immediate_regions++; immediate_garbage += garbage; region->make_trash_immediate(); + if (region->reserved_for_direct_allocation()) { + region->release_from_direct_allocation(); + } } else { bool is_candidate; // This is our candidate for later consideration. diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp index b151a75e6e7e5..399675d5287c1 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp @@ -111,6 +111,9 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec immediate_regions++; immediate_garbage += garbage; region->make_trash_immediate(); + if (region->reserved_for_direct_allocation()) { + region->release_from_direct_allocation(); + } } else { // This is our candidate for later consideration. candidates[cand_idx].set_region_and_garbage(region, garbage); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp index 25b900f8d7772..bf3de15fecb79 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp @@ -101,6 +101,9 @@ void ShenandoahCollectionSet::add_region(ShenandoahHeapRegion* r) { if (ShenandoahHeap::heap()->mode()->is_generational() && r->age() >= ShenandoahGenerationalHeap::heap()->age_census()->tenuring_threshold()) { _young_bytes_to_promote += live; } + if (r->reserved_for_direct_allocation()) { + r->release_from_direct_allocation(); + } } else if (r->is_old()) { _old_bytes_to_evacuate += live; _old_garbage += garbage; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 35311e825063b..e75545f8ca9e5 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -448,6 +448,10 @@ void ShenandoahRegionPartitions::move_from_partition_to_partition(idx_t idx, She "Orig partition used: %zu must exceed moved used: %zu within region %zd", _used[int(orig_partition)], used, idx); + if (orig_partition == ShenandoahFreeSetPartitionId::Mutator && r->reserved_for_direct_allocation()) { + r->release_from_direct_allocation(); + } + _membership[int(orig_partition)].clear_bit(idx); _membership[int(new_partition)].set_bit(idx); @@ -601,6 +605,7 @@ void ShenandoahRegionPartitions::assert_bounds() { idx_t rightmosts[UIntNumPartitions]; idx_t empty_leftmosts[UIntNumPartitions]; idx_t empty_rightmosts[UIntNumPartitions]; + ShenandoahHeap* heap = ShenandoahHeap::heap(); for (uint i = 0; i < UIntNumPartitions; i++) { leftmosts[i] = _max; @@ -621,18 +626,31 @@ void ShenandoahRegionPartitions::assert_bounds() { { size_t capacity = _free_set->alloc_capacity(i); bool is_empty = (capacity == _region_size_bytes); - assert(capacity > 0, "free regions must have allocation capacity"); + // TODO remove assert, not possible to pass when allow mutator to allocate w/o lock. + //assert(capacity > 0, "free regions must have allocation capacity"); if (i < leftmosts[int(partition)]) { leftmosts[int(partition)] = i; } if (is_empty && (i < empty_leftmosts[int(partition)])) { - empty_leftmosts[int(partition)] = i; + if (partition == ShenandoahFreeSetPartitionId::Mutator) { + if (!heap->get_region(i)->reserved_for_direct_allocation()){ + empty_leftmosts[int(partition)] = i; + } + } else { + empty_leftmosts[int(partition)] = i; + } } if (i > rightmosts[int(partition)]) { rightmosts[int(partition)] = i; } if (is_empty && (i > empty_rightmosts[int(partition)])) { - empty_rightmosts[int(partition)] = i; + if (partition == ShenandoahFreeSetPartitionId::Mutator) { + if (!heap->get_region(i)->reserved_for_direct_allocation()) { + empty_rightmosts[int(partition)] = i; + } + } else { + empty_rightmosts[int(partition)] = i; + } } break; } @@ -788,7 +806,7 @@ template HeapWord* ShenandoahFreeSet::allocate_with_affiliation(Iter& iterator, ShenandoahAffiliation affiliation, ShenandoahAllocRequest& req, bool& in_new_region) { for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { ShenandoahHeapRegion* r = _heap->get_region(idx); - if (r->affiliation() == affiliation) { + if (r->affiliation() == affiliation && !r->reserved_for_direct_allocation()) { HeapWord* result = try_allocate_in(r, req, in_new_region); if (result != nullptr) { return result; @@ -1985,12 +2003,12 @@ void ShenandoahFreeSet::log_status() { } size_t max_humongous = max_contig * ShenandoahHeapRegion::region_size_bytes(); - size_t free = capacity() - used(); // Since certain regions that belonged to the Mutator free partition at the time of most recent rebuild may have been // retired, the sum of used and capacities within regions that are still in the Mutator free partition may not match // my internally tracked values of used() and free(). - assert(free == total_free, "Free memory should match"); + //TODO remove assert, it is not possible to mach since mutators may allocate on region w/o acquiring lock + //assert(free == total_free, "Free memory should match"); ls.print("Free: %zu%s, Max: %zu%s regular, %zu%s humongous, ", byte_size_in_proper_unit(total_free), proper_unit_for_byte_size(total_free), byte_size_in_proper_unit(max), proper_unit_for_byte_size(max), @@ -2109,7 +2127,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ size_t actual_size = req.size(); for (uint i = 0u; i < max_probes; i++) { ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); - if (r != nullptr) { + if (r != nullptr && r->reserved_for_direct_allocation()) { if (IS_TLAB) { obj = r->allocate_lab_atomic(req, actual_size); } else { @@ -2151,20 +2169,22 @@ class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreak public: ShenandoahHeapRegion** *_regions_to_refill; uint _refill_count; - uint _refilled_count = 0u; + uint _refilled_count; RefillDirectlyAllocatableRegionClosure(ShenandoahHeapRegion** *regions_to_refill, uint refill_count) : - _regions_to_refill(regions_to_refill), _refill_count(refill_count), _refilled_count(0) {}; + _regions_to_refill(regions_to_refill), _refill_count(refill_count), _refilled_count(0u) {}; bool heap_region_do(ShenandoahHeapRegion *r) override { if (r->is_empty() && !r->reserved_for_direct_allocation()) { - if (ShenandoahHeap::heap()->is_concurrent_weak_root_in_progress() && r->is_trash()) return false; - if (r->is_trash()) { - r->try_recycle_under_lock(); + if (ShenandoahHeap::heap()->is_concurrent_weak_root_in_progress() && r->is_trash()) { + return false; } + r->try_recycle_under_lock(); + r->reserve_for_direct_allocation(); r->set_affiliation(YOUNG_GENERATION); r->make_regular_allocation(YOUNG_GENERATION); + ShenandoahHeap::heap()->generation_for(r->affiliation())->increment_affiliated_region_count(); Atomic::store(_regions_to_refill[_refilled_count], r); _refilled_count++; return _refilled_count == _refill_count; @@ -2186,12 +2206,17 @@ bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_regi uint refill_count = 0u; uint regions_refilled_by_others = 0u; for (uint i = 0u; i < probed_region_count; i++) { - ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + probed_indexes[i]); + ShenandoahHeapRegion* r = Atomic::load(_directly_allocatable_regions + probed_indexes[i]); if (r == nullptr || r == probed_regions[i]) { if (r == nullptr) { regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; - } else if (r->free() < PLAB::min_size()) { - _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); + } else if (!r->reserved_for_direct_allocation()) { + regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; + Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); + } else { + if (r->free() < PLAB::min_size() && _partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { + _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); + } r->release_from_direct_allocation(); regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 49f28335c4d45..2ecb659ae1fe3 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -115,6 +115,9 @@ HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAll assert(this->is_regular(), "must be a regular region"); for (;;) { + if (!reserved_for_direct_allocation()) { + return nullptr; + } HeapWord* obj = top(); if (pointer_delta(end(), obj) >= size) { if (try_allocate(obj, size)) { @@ -133,6 +136,9 @@ HeapWord* ShenandoahHeapRegion::allocate_lab_atomic(const ShenandoahAllocRequest assert(this->is_regular(), "must be a regular region"); size_t adjusted_size = req.size(); for (;;) { + if (!reserved_for_direct_allocation()) { + return nullptr; + } HeapWord* obj = top(); size_t free_words = align_down(byte_size(obj, end()) >> LogHeapWordSize, MinObjAlignment); if (adjusted_size > free_words) { From d3cebfc3ba209f330ad4f0425eb0a8ed44f2df43 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 2 Jul 2025 01:09:14 -0700 Subject: [PATCH 12/44] Fixes --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 53 ++++++++++++------- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 7 ++- .../share/gc/shenandoah/shenandoahFullGC.cpp | 2 + 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index e75545f8ca9e5..aa56eeb52a24d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2106,10 +2106,22 @@ HeapWord* ShenandoahFreeSet::allocate(ShenandoahAllocRequest& req, bool& in_new_ HeapWord* ShenandoahFreeSet::allocate_humongous(ShenandoahAllocRequest& req) { assert(ShenandoahHeapRegion::requires_humongous(req.size()), "Must be humongous alloc"); - ShenandoahHeapLocker locker(_heap->lock()); + ShenandoahHeapLocker locker(_heap->lock(), req.is_mutator_alloc()); return allocate_contiguous(req); } +void ShenandoahFreeSet::release_all_directly_allocatable_regions() { + for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + ShenandoahHeapRegion* r = Atomic::load(_directly_allocatable_regions + i); + if (r != nullptr) { + if (r->reserved_for_direct_allocation()) { + r->release_from_direct_allocation(); + } + Atomic::store(_directly_allocatable_regions + i, static_cast(nullptr)); + } + } +} + template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { shenandoah_assert_not_heaplocked(); @@ -2117,14 +2129,15 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); - uint process_id = static_cast(os::current_process_id()); - constexpr uint max_probes = 3u; + uint hash = (reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount; + const uint max_probes = ShenandoahDirectlyAllocatableRegionCount; for (;;) { - uint idx = process_id % ShenandoahDirectlyAllocatableRegionCount; + uint idx = hash % ShenandoahDirectlyAllocatableRegionCount; ShenandoahHeapRegion* probed_regions[max_probes]; uint probed_indexes[max_probes]; HeapWord* obj = nullptr; size_t actual_size = req.size(); + size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; for (uint i = 0u; i < max_probes; i++) { ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); if (r != nullptr && r->reserved_for_direct_allocation()) { @@ -2154,7 +2167,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ } // Failed to allocate in 3 consecutive directly allocatable regions. // Try to retire the region if the free size is less than minimal tlab size and try to replace with a new region. - if (!try_refill_directly_allocatable_regions(max_probes, probed_indexes, probed_regions)) { + if (!try_refill_directly_allocatable_regions(max_probes, probed_indexes, probed_regions, min_requested_size)) { return nullptr; } } @@ -2170,12 +2183,14 @@ class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreak ShenandoahHeapRegion** *_regions_to_refill; uint _refill_count; uint _refilled_count; + size_t _min_req_byte_size; - RefillDirectlyAllocatableRegionClosure(ShenandoahHeapRegion** *regions_to_refill, uint refill_count) : - _regions_to_refill(regions_to_refill), _refill_count(refill_count), _refilled_count(0u) {}; + RefillDirectlyAllocatableRegionClosure(ShenandoahHeapRegion** *regions_to_refill, uint refill_count, size_t min_req_size) : + _regions_to_refill(regions_to_refill), _refill_count(refill_count), _refilled_count(0u), _min_req_byte_size(min_req_size * HeapWordSize) {} bool heap_region_do(ShenandoahHeapRegion *r) override { - if (r->is_empty() && !r->reserved_for_direct_allocation()) { + if (r->reserved_for_direct_allocation()) return false; + if (r->is_empty()) { if (ShenandoahHeap::heap()->is_concurrent_weak_root_in_progress() && r->is_trash()) { return false; } @@ -2185,18 +2200,19 @@ class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreak r->set_affiliation(YOUNG_GENERATION); r->make_regular_allocation(YOUNG_GENERATION); ShenandoahHeap::heap()->generation_for(r->affiliation())->increment_affiliated_region_count(); - Atomic::store(_regions_to_refill[_refilled_count], r); - _refilled_count++; - return _refilled_count == _refill_count; + Atomic::store(_regions_to_refill[_refilled_count++], r); + } else if (r->affiliation() == YOUNG_GENERATION && r->is_regular() && r->free() >= _min_req_byte_size) { + r->reserve_for_direct_allocation(); + Atomic::store(_regions_to_refill[_refilled_count++], r); } - return false; + return _refilled_count == _refill_count; } }; bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_region_count, uint probed_indexes[], - ShenandoahHeapRegion* probed_regions[] - ) { + ShenandoahHeapRegion* probed_regions[], + size_t min_req_size) { assert(Thread::current()->is_Java_thread(), "Must be mutator"); assert(probed_region_count > 0u && probed_region_count <= ShenandoahDirectlyAllocatableRegionCount, "Must be"); shenandoah_assert_not_heaplocked(); @@ -2212,12 +2228,13 @@ bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_regi regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; } else if (!r->reserved_for_direct_allocation()) { regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; - Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); + Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); } else { + r->release_from_direct_allocation(); + OrderAccess::fence(); if (r->free() < PLAB::min_size() && _partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); } - r->release_from_direct_allocation(); regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); } @@ -2226,11 +2243,11 @@ bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_regi } } - RefillDirectlyAllocatableRegionClosure cl(regions_to_refill, refill_count); + RefillDirectlyAllocatableRegionClosure cl(regions_to_refill, refill_count, min_req_size); if (refill_count > 0u) { iterate_regions_for_alloc(&cl, true); } - return cl._refilled_count > 0u || regions_refilled_by_others > 0u;; + return cl._refilled_count > 0u || regions_refilled_by_others > 0u; } template diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 562c39f9d0273..b907ba9fd9a5c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -411,7 +411,10 @@ class ShenandoahFreeSet : public CHeapObj { // log status, assuming lock has already been acquired by the caller. void log_status(); - bool try_refill_directly_allocatable_regions(uint probed_region_count, uint probed_indexes[], ShenandoahHeapRegion* probed_regions[]); + bool try_refill_directly_allocatable_regions(uint probed_region_count, + uint probed_indexes[], + ShenandoahHeapRegion* probed_regions[], + size_t min_req_size); template uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty); @@ -495,6 +498,8 @@ class ShenandoahFreeSet : public CHeapObj { HeapWord* allocate_humongous(ShenandoahAllocRequest &req); + void release_all_directly_allocatable_regions(); + template HeapWord* par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); /* diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp index 27ff45e67de19..8d5eaac4c3d66 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp @@ -219,6 +219,8 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) { heap->tlabs_retire(ResizeTLAB); } + heap->free_set()->release_all_directly_allocatable_regions(); + OrderAccess::fence(); phase1_mark_heap(); From 4caa8011a4c91a61093fcfb7c3a8ea2644d0ae05 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 2 Jul 2025 18:00:43 -0700 Subject: [PATCH 13/44] Fix humongous allocation failure --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index aa56eeb52a24d..c2b8691a5d468 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -1248,6 +1248,7 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { return nullptr; } end = beg; + region = _heap->get_region(end); } if ((end - beg + 1) == num) { @@ -1256,7 +1257,6 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { } end++; - region = _heap->get_region(end); } size_t remainder = words_size & ShenandoahHeapRegion::region_size_words_mask(); From 64015b3631d4552ddbbcd97bc575a23fac915469 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 2 Jul 2025 22:05:39 -0700 Subject: [PATCH 14/44] Fix more asserts --- .../shenandoahGenerationalHeuristics.cpp | 3 ++- .../heuristics/shenandoahHeuristics.cpp | 3 ++- .../gc/shenandoah/shenandoahCollectionSet.cpp | 3 ++- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 16 ++++++++++++++-- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 3 ++- .../share/gc/shenandoah/shenandoahGeneration.cpp | 2 +- .../share/gc/shenandoah/shenandoahHeapRegion.cpp | 2 ++ .../shenandoah/shenandoahHeapRegion.inline.hpp | 4 ++-- 8 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp index 700dc8ac4e272..71fff8689ad0e 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp @@ -27,6 +27,7 @@ #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahEvacInfo.hpp" +#include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahGeneration.hpp" #include "gc/shenandoah/shenandoahGenerationalHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" @@ -95,7 +96,7 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio immediate_garbage += garbage; region->make_trash_immediate(); if (region->reserved_for_direct_allocation()) { - region->release_from_direct_allocation(); + heap->free_set()->release_directly_allocatable_region(region); } } else { bool is_candidate; diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp index 399675d5287c1..f85ef76e8e27a 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp @@ -27,6 +27,7 @@ #include "gc/shared/gcCause.hpp" #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" +#include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp" #include "logging/log.hpp" @@ -112,7 +113,7 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec immediate_garbage += garbage; region->make_trash_immediate(); if (region->reserved_for_direct_allocation()) { - region->release_from_direct_allocation(); + heap->free_set()->release_directly_allocatable_region(region); } } else { // This is our candidate for later consideration. diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp index bf3de15fecb79..60acaf349da96 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp @@ -27,6 +27,7 @@ #include "gc/shenandoah/shenandoahAgeCensus.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" +#include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" @@ -102,7 +103,7 @@ void ShenandoahCollectionSet::add_region(ShenandoahHeapRegion* r) { _young_bytes_to_promote += live; } if (r->reserved_for_direct_allocation()) { - r->release_from_direct_allocation(); + _heap->free_set()->release_directly_allocatable_region(r); } } else if (r->is_old()) { _old_bytes_to_evacuate += live; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index c2b8691a5d468..b225132395e2d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -449,7 +449,7 @@ void ShenandoahRegionPartitions::move_from_partition_to_partition(idx_t idx, She _used[int(orig_partition)], used, idx); if (orig_partition == ShenandoahFreeSetPartitionId::Mutator && r->reserved_for_direct_allocation()) { - r->release_from_direct_allocation(); + ShenandoahHeap::heap()->free_set()->release_directly_allocatable_region(r); } _membership[int(orig_partition)].clear_bit(idx); @@ -2117,7 +2117,7 @@ void ShenandoahFreeSet::release_all_directly_allocatable_regions() { if (r->reserved_for_direct_allocation()) { r->release_from_direct_allocation(); } - Atomic::store(_directly_allocatable_regions + i, static_cast(nullptr)); + Atomic::release_store(_directly_allocatable_regions + i, static_cast(nullptr)); } } } @@ -2250,6 +2250,18 @@ bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_regi return cl._refilled_count > 0u || regions_refilled_by_others > 0u; } +void ShenandoahFreeSet::release_directly_allocatable_region(ShenandoahHeapRegion* region) { + shenandoah_assert_heaplocked(); + region->release_from_direct_allocation(); + for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + ShenandoahHeapRegion** shared_region = _directly_allocatable_regions + i; + if (Atomic::load(shared_region) == region) { + Atomic::release_store(shared_region, static_cast(nullptr)); + break; + } + } +} + template uint ShenandoahFreeSet::iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty) { assert((IS_MUTATOR && !IS_OLD) || !IS_MUTATOR, "Sanity check"); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index b907ba9fd9a5c..2547a40e4d0e2 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -415,7 +415,6 @@ class ShenandoahFreeSet : public CHeapObj { uint probed_indexes[], ShenandoahHeapRegion* probed_regions[], size_t min_req_size); - template uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty); @@ -500,6 +499,8 @@ class ShenandoahFreeSet : public CHeapObj { void release_all_directly_allocatable_regions(); + void release_directly_allocatable_region(ShenandoahHeapRegion *region); + template HeapWord* par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); /* diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp index 9a511de939ccb..01de1cfb3fea8 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp @@ -565,7 +565,7 @@ size_t ShenandoahGeneration::select_aged_regions(size_t old_available) { // old generation. HeapWord* tams = ctx->top_at_mark_start(r); HeapWord* original_top = r->top(); - if (!heap->is_concurrent_old_mark_in_progress() && tams == original_top) { + if (!heap->is_concurrent_old_mark_in_progress() && tams == original_top && !r->reserved_for_direct_allocation()) { // No allocations from this region have been made during concurrent mark. It meets all the criteria // for in-place-promotion. Though we only need the value of top when we fill the end of the region, // we use this field to indicate that this region should be promoted in place during the evacuation diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp index 0d342c7c03e37..37b9b58f5aa20 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp @@ -855,6 +855,8 @@ size_t ShenandoahHeapRegion::pin_count() const { } void ShenandoahHeapRegion::set_affiliation(ShenandoahAffiliation new_affiliation) { + assert(new_affiliation != OLD_GENERATION || !reserved_for_direct_allocation(), "Reserved region can't move to old"); + ShenandoahHeap* heap = ShenandoahHeap::heap(); ShenandoahAffiliation region_affiliation = heap->region_affiliation(this); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 2ecb659ae1fe3..2a0796870ab46 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -112,7 +112,7 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAllocRequest& req) { assert(is_object_aligned(size), "alloc size breaks alignment: %zu", size); assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); - assert(this->is_regular(), "must be a regular region"); + assert(this->is_regular() || this->is_regular_pinned(), "must be a regular region"); for (;;) { if (!reserved_for_direct_allocation()) { @@ -133,7 +133,7 @@ HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAll HeapWord* ShenandoahHeapRegion::allocate_lab_atomic(const ShenandoahAllocRequest& req, size_t &actual_size) { assert(req.is_lab_alloc(), "Only lab alloc"); assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); - assert(this->is_regular(), "must be a regular region"); + assert(this->is_regular() || this->is_regular_pinned(), "must be a regular region"); size_t adjusted_size = req.size(); for (;;) { if (!reserved_for_direct_allocation()) { From 94e538cb9af82ab6476ad1540adcd8d3fe2fa3c3 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 2 Jul 2025 22:33:06 -0700 Subject: [PATCH 15/44] Fix build error --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index b225132395e2d..5e38f76ac20b1 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2163,7 +2163,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ } probed_indexes[i] = idx; probed_regions[i] = r; - idx = (++idx) % ShenandoahDirectlyAllocatableRegionCount; + idx = (idx + 1) % ShenandoahDirectlyAllocatableRegionCount; } // Failed to allocate in 3 consecutive directly allocatable regions. // Try to retire the region if the free size is less than minimal tlab size and try to replace with a new region. From 37cee1f5e4430ce2ba0e695055c6e954d0912aea Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Wed, 2 Jul 2025 23:58:33 -0700 Subject: [PATCH 16/44] Remove use of heap lock when update used --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 41 +++++------------ .../share/gc/shenandoah/shenandoahFreeSet.hpp | 44 ++++++------------- 2 files changed, 26 insertions(+), 59 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 5e38f76ac20b1..98f3660b6cedb 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -33,6 +33,7 @@ #include "gc/shenandoah/shenandoahOldGeneration.hpp" #include "gc/shenandoah/shenandoahSimpleBitMap.hpp" #include "gc/shenandoah/shenandoahSimpleBitMap.inline.hpp" +#include "gc/shenandoah/shenandoahUtils.hpp" #include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" @@ -229,7 +230,6 @@ void ShenandoahRegionPartitions::make_all_regions_unavailable() { _rightmosts_empty[partition_id] = -1;; _capacity[partition_id] = 0; _used[partition_id] = 0; - _available[partition_id] = FreeSetUnderConstruction; } _region_counts[int(ShenandoahFreeSetPartitionId::Mutator)] = _region_counts[int(ShenandoahFreeSetPartitionId::Collector)] = 0; } @@ -244,21 +244,18 @@ void ShenandoahRegionPartitions::establish_mutator_intervals(idx_t mutator_leftm _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_leftmost_empty; _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_rightmost_empty; - _region_counts[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_region_count; - _used[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_used; - _capacity[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_region_count * _region_size_bytes; - _available[int(ShenandoahFreeSetPartitionId::Mutator)] = - _capacity[int(ShenandoahFreeSetPartitionId::Mutator)] - _used[int(ShenandoahFreeSetPartitionId::Mutator)]; + Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::Mutator), mutator_region_count); + Atomic::store(_used + int(ShenandoahFreeSetPartitionId::Mutator), mutator_used); + Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Mutator), mutator_region_count * _region_size_bytes); _leftmosts[int(ShenandoahFreeSetPartitionId::Collector)] = _max; _rightmosts[int(ShenandoahFreeSetPartitionId::Collector)] = -1; _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = _max; _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = -1; - _region_counts[int(ShenandoahFreeSetPartitionId::Collector)] = 0; - _used[int(ShenandoahFreeSetPartitionId::Collector)] = 0; - _capacity[int(ShenandoahFreeSetPartitionId::Collector)] = 0; - _available[int(ShenandoahFreeSetPartitionId::Collector)] = 0; + Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::Collector), 0ul); + Atomic::store(_used + int(ShenandoahFreeSetPartitionId::Collector), 0ul); + Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Collector), 0ul); } void ShenandoahRegionPartitions::establish_old_collector_intervals(idx_t old_collector_leftmost, idx_t old_collector_rightmost, @@ -272,22 +269,14 @@ void ShenandoahRegionPartitions::establish_old_collector_intervals(idx_t old_col _leftmosts_empty[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_leftmost_empty; _rightmosts_empty[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_rightmost_empty; - _region_counts[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_region_count; - _used[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_used; - _capacity[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_region_count * _region_size_bytes; - _available[int(ShenandoahFreeSetPartitionId::OldCollector)] = - _capacity[int(ShenandoahFreeSetPartitionId::OldCollector)] - _used[int(ShenandoahFreeSetPartitionId::OldCollector)]; + Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::OldCollector), old_collector_region_count); + Atomic::store(_used + int(ShenandoahFreeSetPartitionId::OldCollector), old_collector_used); + Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Collector), old_collector_region_count * _region_size_bytes); } void ShenandoahRegionPartitions::increase_used(ShenandoahFreeSetPartitionId which_partition, size_t bytes) { - shenandoah_assert_heaplocked(); assert (which_partition < NumPartitions, "Partition must be valid"); - - _used[int(which_partition)] += bytes; - _available[int(which_partition)] -= bytes; - assert (_used[int(which_partition)] <= _capacity[int(which_partition)], - "Must not use (%zu) more than capacity (%zu) after increase by %zu", - _used[int(which_partition)], _capacity[int(which_partition)], bytes); + Atomic::add(_used + int(which_partition), bytes); } inline void ShenandoahRegionPartitions::shrink_interval_if_range_modifies_either_boundary( @@ -389,7 +378,6 @@ void ShenandoahRegionPartitions::make_free(idx_t idx, ShenandoahFreeSetPartition _membership[int(which_partition)].set_bit(idx); _capacity[int(which_partition)] += _region_size_bytes; _used[int(which_partition)] += _region_size_bytes - available; - _available[int(which_partition)] += available; expand_interval_if_boundary_modified(which_partition, idx, available); _region_counts[int(which_partition)]++; } @@ -457,12 +445,10 @@ void ShenandoahRegionPartitions::move_from_partition_to_partition(idx_t idx, She _capacity[int(orig_partition)] -= _region_size_bytes; _used[int(orig_partition)] -= used; - _available[int(orig_partition)] -= available; shrink_interval_if_boundary_modified(orig_partition, idx); _capacity[int(new_partition)] += _region_size_bytes;; _used[int(new_partition)] += used; - _available[int(new_partition)] += available; expand_interval_if_boundary_modified(new_partition, idx, available); _region_counts[int(orig_partition)]--; @@ -2154,10 +2140,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ in_new_region = true; } assert(req.is_young(), "Mutator allocations always come from young generation."); - { - ShenandoahHeapLocker locker(_heap->lock()); - _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); - } + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); return obj; } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 2547a40e4d0e2..3bc50fcf05f41 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -78,10 +78,9 @@ class ShenandoahRegionPartitions { // are denoted in bytes. Note that some regions that had been assigned to a particular partition at rebuild time // may have been retired following the rebuild. The tallies for these regions are still reflected in _capacity[p] // and _used[p], even though the region may have been removed from the free set. - size_t _capacity[UIntNumPartitions]; - size_t _used[UIntNumPartitions]; - size_t _available[UIntNumPartitions]; - size_t _region_counts[UIntNumPartitions]; + size_t volatile _capacity[UIntNumPartitions]; + size_t volatile _used[UIntNumPartitions]; + size_t volatile _region_counts[UIntNumPartitions]; // For each partition p, _left_to_right_bias is true iff allocations are normally made from lower indexed regions // before higher indexed regions. @@ -213,56 +212,41 @@ class ShenandoahRegionPartitions { inline size_t capacity_of(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free set must be valid"); - return _capacity[int(which_partition)]; + return Atomic::load(_capacity + int(which_partition)); } inline size_t used_by(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free set must be valid"); - return _used[int(which_partition)]; + return Atomic::load(_used + int(which_partition)); } inline size_t available_in(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free set must be valid"); - shenandoah_assert_heaplocked(); - assert(_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)], - "Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s", - _available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)], - partition_membership_name(ssize_t(which_partition))); - return _available[int(which_partition)]; + auto available = capacity_of(which_partition) - used_by(which_partition); + return available >= 0 ? available : 0; } // Return available_in assuming caller does not hold the heap lock. In production builds, available is // returned without acquiring the lock. In debug builds, the global heap lock is acquired in order to // enforce a consistency assert. inline size_t available_in_not_locked(ShenandoahFreeSetPartitionId which_partition) const { - assert (which_partition < NumPartitions, "selected free set must be valid"); - shenandoah_assert_not_heaplocked(); -#ifdef ASSERT - ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock()); - assert((_available[int(which_partition)] == FreeSetUnderConstruction) || - (_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)]), - "Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s", - _available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)], - partition_membership_name(ssize_t(which_partition))); -#endif - return _available[int(which_partition)]; + return available_in(which_partition); } inline void set_capacity_of(ShenandoahFreeSetPartitionId which_partition, size_t value) { - shenandoah_assert_heaplocked(); assert (which_partition < NumPartitions, "selected free set must be valid"); - _capacity[int(which_partition)] = value; - _available[int(which_partition)] = value - _used[int(which_partition)]; + Atomic::store(_capacity + int(which_partition), value); } inline void set_used_by(ShenandoahFreeSetPartitionId which_partition, size_t value) { - shenandoah_assert_heaplocked(); assert (which_partition < NumPartitions, "selected free set must be valid"); - _used[int(which_partition)] = value; - _available[int(which_partition)] = _capacity[int(which_partition)] - value; + Atomic::store(_used + int(which_partition), value); } - inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { return _region_counts[int(which_partition)]; } + inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free set must be valid"); + return Atomic::load(_region_counts + int(which_partition)); + } // Assure leftmost, rightmost, leftmost_empty, and rightmost_empty bounds are valid for all free sets. // Valid bounds honor all of the following (where max is the number of heap regions): From 977bebf931f240cbc63e880b0fc292363ac43ca4 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 01:03:11 -0700 Subject: [PATCH 17/44] Adjust alloc logic --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 98f3660b6cedb..38531ba833820 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -888,7 +888,7 @@ HeapWord* ShenandoahFreeSet::allocate_from_regions(Iter& iterator, ShenandoahAll for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { ShenandoahHeapRegion* r = _heap->get_region(idx); size_t min_size = (req.type() == ShenandoahAllocRequest::_alloc_tlab) ? req.min_size() : req.size(); - if (alloc_capacity(r) >= min_size) { + if (!r->reserved_for_direct_allocation() && alloc_capacity(r) >= min_size) { HeapWord* result = try_allocate_in(r, req, in_new_region); if (result != nullptr) { return result; @@ -2115,15 +2115,17 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); + size_t actual_size = req.size(); + size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; uint hash = (reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount; const uint max_probes = ShenandoahDirectlyAllocatableRegionCount; + for (;;) { uint idx = hash % ShenandoahDirectlyAllocatableRegionCount; ShenandoahHeapRegion* probed_regions[max_probes]; uint probed_indexes[max_probes]; HeapWord* obj = nullptr; - size_t actual_size = req.size(); - size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; + uint count = 0; for (uint i = 0u; i < max_probes; i++) { ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); if (r != nullptr && r->reserved_for_direct_allocation()) { @@ -2144,14 +2146,23 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ return obj; } } - probed_indexes[i] = idx; - probed_regions[i] = r; + + if (r == nullptr || r->free() < PLAB::min_size()) { + probed_indexes[count] = idx; + probed_regions[count] = r; + count++; + } idx = (idx + 1) % ShenandoahDirectlyAllocatableRegionCount; } // Failed to allocate in 3 consecutive directly allocatable regions. // Try to retire the region if the free size is less than minimal tlab size and try to replace with a new region. - if (!try_refill_directly_allocatable_regions(max_probes, probed_indexes, probed_regions, min_requested_size)) { - return nullptr; + if (count == 0u) { + ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); + return allocate_for_mutator(req, in_new_region); + } else { + if (!try_refill_directly_allocatable_regions(count, probed_indexes, probed_regions, min_requested_size)) { + return nullptr; + } } } } @@ -2192,26 +2203,23 @@ class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreak } }; -bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(uint probed_region_count, +bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(const uint region_count, uint probed_indexes[], ShenandoahHeapRegion* probed_regions[], size_t min_req_size) { assert(Thread::current()->is_Java_thread(), "Must be mutator"); - assert(probed_region_count > 0u && probed_region_count <= ShenandoahDirectlyAllocatableRegionCount, "Must be"); + assert(region_count > 0u && region_count <= ShenandoahDirectlyAllocatableRegionCount, "Must be"); shenandoah_assert_not_heaplocked(); ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); - ShenandoahHeapRegion** regions_to_refill[probed_region_count]; + ShenandoahHeapRegion** regions_to_refill[region_count]; uint refill_count = 0u; uint regions_refilled_by_others = 0u; - for (uint i = 0u; i < probed_region_count; i++) { + for (uint i = 0u; i < region_count; i++) { ShenandoahHeapRegion* r = Atomic::load(_directly_allocatable_regions + probed_indexes[i]); if (r == nullptr || r == probed_regions[i]) { if (r == nullptr) { regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; - } else if (!r->reserved_for_direct_allocation()) { - regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; - Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); } else { r->release_from_direct_allocation(); OrderAccess::fence(); From 4faf6182091c921064f335d7a586bbe259d3d149 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 01:27:04 -0700 Subject: [PATCH 18/44] Fix build error --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 3bc50fcf05f41..8743fd8770482 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -222,8 +222,7 @@ class ShenandoahRegionPartitions { inline size_t available_in(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free set must be valid"); - auto available = capacity_of(which_partition) - used_by(which_partition); - return available >= 0 ? available : 0; + return capacity_of(which_partition) - used_by(which_partition); } // Return available_in assuming caller does not hold the heap lock. In production builds, available is From d509856bc5bebd65c7eb4421ab69e6757175f10d Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 13:51:52 -0700 Subject: [PATCH 19/44] More refactors --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 121 +++++++++++------- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 8 +- 2 files changed, 76 insertions(+), 53 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 38531ba833820..3ac86b1f76f0c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2098,12 +2098,13 @@ HeapWord* ShenandoahFreeSet::allocate_humongous(ShenandoahAllocRequest& req) { void ShenandoahFreeSet::release_all_directly_allocatable_regions() { for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { - ShenandoahHeapRegion* r = Atomic::load(_directly_allocatable_regions + i); + ShenandoahHeapRegion** address = _directly_allocatable_regions + i; + ShenandoahHeapRegion* r = Atomic::load_acquire(address); if (r != nullptr) { if (r->reserved_for_direct_allocation()) { r->release_from_direct_allocation(); } - Atomic::release_store(_directly_allocatable_regions + i, static_cast(nullptr)); + Atomic::release_store(address, static_cast(nullptr)); } } } @@ -2117,17 +2118,17 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ size_t actual_size = req.size(); size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; - uint hash = (reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount; - const uint max_probes = ShenandoahDirectlyAllocatableRegionCount; + const uint hash = (reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount; for (;;) { uint idx = hash % ShenandoahDirectlyAllocatableRegionCount; - ShenandoahHeapRegion* probed_regions[max_probes]; - uint probed_indexes[max_probes]; + ShenandoahHeapRegion* retirable_regions[ShenandoahDirectlyAllocatableRegionCount]; + ShenandoahHeapRegion** retirable_shared_regions_addresses[ShenandoahDirectlyAllocatableRegionCount]; HeapWord* obj = nullptr; - uint count = 0; - for (uint i = 0u; i < max_probes; i++) { - ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); + uint count = 0u; + for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + idx; + ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address); if (r != nullptr && r->reserved_for_direct_allocation()) { if (IS_TLAB) { obj = r->allocate_lab_atomic(req, actual_size); @@ -2148,8 +2149,9 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ } if (r == nullptr || r->free() < PLAB::min_size()) { - probed_indexes[count] = idx; - probed_regions[count] = r; + // Region is ready to retire + retirable_regions[count] = r; + retirable_shared_regions_addresses[count] = shared_region_address; count++; } idx = (idx + 1) % ShenandoahDirectlyAllocatableRegionCount; @@ -2160,7 +2162,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); return allocate_for_mutator(req, in_new_region); } else { - if (!try_refill_directly_allocatable_regions(count, probed_indexes, probed_regions, min_requested_size)) { + if (!try_allocate_directly_allocatable_regions(count, retirable_shared_regions_addresses, retirable_regions, min_requested_size)) { return nullptr; } } @@ -2172,15 +2174,26 @@ template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(Shen template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); -class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreakableIterClosure { +class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBreakableIterClosure { public: - ShenandoahHeapRegion** *_regions_to_refill; - uint _refill_count; - uint _refilled_count; - size_t _min_req_byte_size; - - RefillDirectlyAllocatableRegionClosure(ShenandoahHeapRegion** *regions_to_refill, uint refill_count, size_t min_req_size) : - _regions_to_refill(regions_to_refill), _refill_count(refill_count), _refilled_count(0u), _min_req_byte_size(min_req_size * HeapWordSize) {} + ShenandoahHeapRegion*** _shared_region_addresses; + const uint _shared_region_address_count; + uint _current_index = 0u; + const uint _request_count; + uint _fulfilled_count = 0u; + const size_t _min_req_byte_size; + + DirectlyAllocatableRegionAllocationClosure( + ShenandoahHeapRegion*** shared_region_addresses, const uint shared_region_address_count, const uint request_count, const size_t min_req_size) + : _shared_region_addresses(shared_region_addresses), _shared_region_address_count(shared_region_address_count), _request_count(request_count), _min_req_byte_size(min_req_size * HeapWordSize) { + skip_invalid_address(); + } + + void skip_invalid_address() { + while (_current_index < _shared_region_address_count && _shared_region_addresses[_current_index] == nullptr) { + _current_index++; + } + } bool heap_region_do(ShenandoahHeapRegion *r) override { if (r->reserved_for_direct_allocation()) return false; @@ -2194,60 +2207,70 @@ class RefillDirectlyAllocatableRegionClosure : public ShenandoahHeapRegionBreak r->set_affiliation(YOUNG_GENERATION); r->make_regular_allocation(YOUNG_GENERATION); ShenandoahHeap::heap()->generation_for(r->affiliation())->increment_affiliated_region_count(); - Atomic::store(_regions_to_refill[_refilled_count++], r); + Atomic::store(_shared_region_addresses[_current_index++], r); + skip_invalid_address(); + _fulfilled_count++; } else if (r->affiliation() == YOUNG_GENERATION && r->is_regular() && r->free() >= _min_req_byte_size) { r->reserve_for_direct_allocation(); - Atomic::store(_regions_to_refill[_refilled_count++], r); + Atomic::store(_shared_region_addresses[_current_index++], r); + skip_invalid_address(); + _fulfilled_count++; } - return _refilled_count == _refill_count; + return _fulfilled_count == _request_count || _current_index == _shared_region_address_count; } }; -bool ShenandoahFreeSet::try_refill_directly_allocatable_regions(const uint region_count, - uint probed_indexes[], - ShenandoahHeapRegion* probed_regions[], - size_t min_req_size) { +bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(const uint region_count, + ShenandoahHeapRegion** shared_region_address[], + ShenandoahHeapRegion* original_shared_regions[], + size_t min_req_size) { assert(Thread::current()->is_Java_thread(), "Must be mutator"); assert(region_count > 0u && region_count <= ShenandoahDirectlyAllocatableRegionCount, "Must be"); shenandoah_assert_not_heaplocked(); ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); - ShenandoahHeapRegion** regions_to_refill[region_count]; - uint refill_count = 0u; - uint regions_refilled_by_others = 0u; + uint request_count = 0u; + uint fulfilled_by_others = 0u; for (uint i = 0u; i < region_count; i++) { - ShenandoahHeapRegion* r = Atomic::load(_directly_allocatable_regions + probed_indexes[i]); - if (r == nullptr || r == probed_regions[i]) { - if (r == nullptr) { - regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; - } else { - r->release_from_direct_allocation(); - OrderAccess::fence(); - if (r->free() < PLAB::min_size() && _partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { - _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); + ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address[i]); + if (r != original_shared_regions[i]) { + fulfilled_by_others++; + shared_region_address[i] = nullptr; + original_shared_regions[i] = nullptr; + } else { + request_count++; + if (r != nullptr) { + if (r->free() < PLAB::min_size()) { + r->release_from_direct_allocation(); + Atomic::release_store(shared_region_address[i], static_cast(nullptr)); + if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { + _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); + } + } else { + // Although r is same as original one when tried CAS allocation, but it has more free space. + fulfilled_by_others++; + shared_region_address[i] = nullptr; + original_shared_regions[i] = nullptr; + request_count--; } - regions_to_refill[refill_count++] = _directly_allocatable_regions + probed_indexes[i]; - Atomic::store(_directly_allocatable_regions + probed_indexes[i] , static_cast(nullptr)); } - } else { - regions_refilled_by_others++; } } - RefillDirectlyAllocatableRegionClosure cl(regions_to_refill, refill_count, min_req_size); - if (refill_count > 0u) { + DirectlyAllocatableRegionAllocationClosure cl(shared_region_address, region_count, request_count, min_req_size); + if (request_count > 0u) { iterate_regions_for_alloc(&cl, true); } - return cl._refilled_count > 0u || regions_refilled_by_others > 0u; + return cl._fulfilled_count > 0u || fulfilled_by_others > 0u; } void ShenandoahFreeSet::release_directly_allocatable_region(ShenandoahHeapRegion* region) { shenandoah_assert_heaplocked(); region->release_from_direct_allocation(); for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { - ShenandoahHeapRegion** shared_region = _directly_allocatable_regions + i; - if (Atomic::load(shared_region) == region) { - Atomic::release_store(shared_region, static_cast(nullptr)); + ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + i; + if (Atomic::load_acquire(shared_region_address) == region) { + Atomic::release_store(shared_region_address, static_cast(nullptr)); break; } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 8743fd8770482..286bcf964817f 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -394,10 +394,10 @@ class ShenandoahFreeSet : public CHeapObj { // log status, assuming lock has already been acquired by the caller. void log_status(); - bool try_refill_directly_allocatable_regions(uint probed_region_count, - uint probed_indexes[], - ShenandoahHeapRegion* probed_regions[], - size_t min_req_size); + bool try_allocate_directly_allocatable_regions(uint probed_region_count, + ShenandoahHeapRegion** shared_region_address[], + ShenandoahHeapRegion* original_shared_regions[], + size_t min_req_size); template uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty); From 970f3ddacec8a4180cd46cd8b2fe79c629f8eb2e Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 14:00:16 -0700 Subject: [PATCH 20/44] Add todo comments --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 3ac86b1f76f0c..f8ab6813e57a3 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2243,6 +2243,7 @@ bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(const uint reg if (r->free() < PLAB::min_size()) { r->release_from_direct_allocation(); Atomic::release_store(shared_region_address[i], static_cast(nullptr)); + // TODO confirm when&why the region is moved out of Mutator partition? if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); } From bc5e72a539ea2b914a622b3962af6c15f838d004 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 14:04:35 -0700 Subject: [PATCH 21/44] Revert "Duplicate Z's CPUAffinity in gc shared" This reverts commit 66f3919d1b4c31945718494c0ec9d8b960158a2c. --- src/hotspot/share/gc/shared/CPUAffinity.cpp | 67 ------------------- src/hotspot/share/gc/shared/CPUAffinity.hpp | 49 -------------- .../share/gc/shared/CPUAffinity.inline.hpp | 49 -------------- 3 files changed, 165 deletions(-) delete mode 100644 src/hotspot/share/gc/shared/CPUAffinity.cpp delete mode 100644 src/hotspot/share/gc/shared/CPUAffinity.hpp delete mode 100644 src/hotspot/share/gc/shared/CPUAffinity.inline.hpp diff --git a/src/hotspot/share/gc/shared/CPUAffinity.cpp b/src/hotspot/share/gc/shared/CPUAffinity.cpp deleted file mode 100644 index c085fe65f1ba2..0000000000000 --- a/src/hotspot/share/gc/shared/CPUAffinity.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* -* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "gc/shared/gcLogPrecious.hpp" -#include "gc/shared/CPUAffinity.inline.hpp" -#include "memory/padded.inline.hpp" -#include "runtime/javaThread.hpp" -#include "runtime/os.hpp" -#include "utilities/debug.hpp" - - -#define UNKNOWN_AFFINITY ((Thread*)-1) -#define UNKNOWN_SELF ((Thread*)-2) - -PaddedEnd* CPUAffinity::_affinity = nullptr; -THREAD_LOCAL Thread* CPUAffinity::_self = UNKNOWN_SELF; -THREAD_LOCAL uint32_t CPUAffinity::_cpu = 0; - -void CPUAffinity::initialize() { - assert(_affinity == nullptr, "Already initialized"); - const uint32_t ncpus = count(); - - _affinity = PaddedArray::create_unfreeable(ncpus); - - for (uint32_t i = 0; i < ncpus; i++) { - _affinity[i]._thread = UNKNOWN_AFFINITY; - } - - log_info_p(gc, init)("CPUs: %u total, %u available", - os::processor_count(), - os::initial_active_processor_count()); -} - -uint32_t CPUAffinity::id_slow() { - // Set current thread - if (_self == UNKNOWN_SELF) { - _self = Thread::current(); - } - - // Set current CPU - _cpu = os::processor_id(); - - // Update affinity table - _affinity[_cpu]._thread = _self; - - return _cpu; -} diff --git a/src/hotspot/share/gc/shared/CPUAffinity.hpp b/src/hotspot/share/gc/shared/CPUAffinity.hpp deleted file mode 100644 index cdc2af23778dd..0000000000000 --- a/src/hotspot/share/gc/shared/CPUAffinity.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#ifndef SHARE_GC_SHARED_CPUAFFINITY_HPP -#define SHARE_GC_SHARED_CPUAFFINITY_HPP - -#include "memory/allStatic.hpp" -#include "memory/padded.hpp" -#include "utilities/globalDefinitions.hpp" - -class Thread; - -class CPUAffinity : public AllStatic { - struct Affinity { - Thread* _thread; - }; - static PaddedEnd* _affinity; - static THREAD_LOCAL Thread* _self; - static THREAD_LOCAL uint32_t _cpu; - - static uint32_t id_slow(); -public: - static void initialize(); - - static uint32_t count(); - static uint32_t id(); -}; - -#endif diff --git a/src/hotspot/share/gc/shared/CPUAffinity.inline.hpp b/src/hotspot/share/gc/shared/CPUAffinity.inline.hpp deleted file mode 100644 index 6f1a59e65418b..0000000000000 --- a/src/hotspot/share/gc/shared/CPUAffinity.inline.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#ifndef SHARE_GC_SHARED_CPUAFFINITY_INLINE_HPP -#define SHARE_GC_SHARED_CPUAFFINITY_INLINE_HPP - -#include "gc/shared/CPUAffinity.hpp" - -#include "runtime/os.hpp" -#include "utilities/debug.hpp" - -inline uint32_t CPUAffinity::count() { - return (uint32_t)os::processor_count(); -} - -inline uint32_t CPUAffinity::id() { - assert(_affinity != nullptr, "Not initialized"); - - // Fast path - if (_affinity[_cpu]._thread == _self) { - return _cpu; - } - - // Slow path - return id_slow(); -} - - -#endif \ No newline at end of file From 103e42f684fc2af697dcfc6ca317b341026af212 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 15:43:18 -0700 Subject: [PATCH 22/44] Steal alloc from other shared regions --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 64 +++++++++++++------ .../share/gc/shenandoah/shenandoahFreeSet.hpp | 3 + .../gc/shenandoah/shenandoah_globals.hpp | 2 +- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index f8ab6813e57a3..3c81cefe60448 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2113,37 +2113,29 @@ template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { shenandoah_assert_not_heaplocked(); assert(req.is_mutator_alloc(), "Must be mutator allocation"); + assert(req.is_young(), "Mutator allocations always come from young generation."); assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); size_t actual_size = req.size(); size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; - const uint hash = (reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount; - +#if defined(__APPLE__) && defined(__aarch64__) + const uint hash = abs(os::random()) % ShenandoahDirectlyAllocatableRegionCount; +#else + const uint hash = (os::is_MP() ? os::processor_id() : abs(os::random())) % ShenandoahDirectlyAllocatableRegionCount; +#endif for (;;) { uint idx = hash % ShenandoahDirectlyAllocatableRegionCount; - ShenandoahHeapRegion* retirable_regions[ShenandoahDirectlyAllocatableRegionCount]; - ShenandoahHeapRegion** retirable_shared_regions_addresses[ShenandoahDirectlyAllocatableRegionCount]; + ShenandoahHeapRegion* retirable_regions[3]; + ShenandoahHeapRegion** retirable_shared_regions_addresses[3]; HeapWord* obj = nullptr; uint count = 0u; - for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + for (uint i = 0u; i < 3; i++) { ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + idx; ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address); if (r != nullptr && r->reserved_for_direct_allocation()) { - if (IS_TLAB) { - obj = r->allocate_lab_atomic(req, actual_size); - } else { - obj = r->allocate_atomic(req.size(), req); - } + obj = par_allocate_in(r, req, in_new_region); if (obj != nullptr) { - assert(actual_size > 0, "Must be"); - req.set_actual_size(actual_size); - if (pointer_delta(obj, r->bottom()) == actual_size) { - // Set to true if it is the first object/tlab allocated in the region. - in_new_region = true; - } - assert(req.is_young(), "Mutator allocations always come from young generation."); - _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); return obj; } } @@ -2161,10 +2153,20 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ if (count == 0u) { ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); return allocate_for_mutator(req, in_new_region); - } else { - if (!try_allocate_directly_allocatable_regions(count, retirable_shared_regions_addresses, retirable_regions, min_requested_size)) { - return nullptr; + } + + if (!try_allocate_directly_allocatable_regions(count, retirable_shared_regions_addresses, retirable_regions, min_requested_size)) { + //only tried 3 shared regions, try to steal from other shared regions before OOM + for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + i); + if (r != nullptr) { + obj = par_allocate_in(r, req, in_new_region); + if (obj != nullptr) { + return obj; + } + } } + return nullptr; } } } @@ -2173,6 +2175,26 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); +template +HeapWord* ShenandoahFreeSet::par_allocate_in(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region) { + HeapWord* obj = nullptr; + size_t actual_size = req.size(); + if (IS_TLAB) { + obj = region->allocate_lab_atomic(req, actual_size); + } else { + obj = region->allocate_atomic(actual_size, req); + } + if (obj != nullptr) { + assert(actual_size > 0, "Must be"); + req.set_actual_size(actual_size); + if (pointer_delta(obj, region->bottom()) == actual_size) { + // Set to true if it is the first object/tlab allocated in the region. + in_new_region = true; + } + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + } + return obj; +} class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBreakableIterClosure { public: diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 286bcf964817f..bdcc0838005b6 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -394,6 +394,9 @@ class ShenandoahFreeSet : public CHeapObj { // log status, assuming lock has already been acquired by the caller. void log_status(); + template + HeapWord* par_allocate_in(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region); + bool try_allocate_directly_allocatable_regions(uint probed_region_count, ShenandoahHeapRegion** shared_region_address[], ShenandoahHeapRegion* original_shared_regions[], diff --git a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp index 1a84f595cdd0b..df638796bd8ef 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp @@ -564,7 +564,7 @@ "to prevent starvation of the old collector. Setting this to " \ "0 will allow back to back young collections to run during old " \ "collections.") \ - product(uintx, ShenandoahDirectlyAllocatableRegionCount, 7, EXPERIMENTAL, \ + product(uintx, ShenandoahDirectlyAllocatableRegionCount, 13, EXPERIMENTAL,\ "Number of regions Shenandoah will pre-allocate for " \ "direct allocation with CAS, the values should less than " \ "number of CPU cores. Ideally it should be a prime number. ") \ From 2f5d8182cf8a7416599369c5a08dfcea4bfae533 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 17:26:59 -0700 Subject: [PATCH 23/44] Use current thread id for hash --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 3c81cefe60448..dfca53aac0629 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2119,13 +2119,9 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ size_t actual_size = req.size(); size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; -#if defined(__APPLE__) && defined(__aarch64__) - const uint hash = abs(os::random()) % ShenandoahDirectlyAllocatableRegionCount; -#else - const uint hash = (os::is_MP() ? os::processor_id() : abs(os::random())) % ShenandoahDirectlyAllocatableRegionCount; -#endif + const uint hash = (reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount; for (;;) { - uint idx = hash % ShenandoahDirectlyAllocatableRegionCount; + uint idx = hash; ShenandoahHeapRegion* retirable_regions[3]; ShenandoahHeapRegion** retirable_shared_regions_addresses[3]; HeapWord* obj = nullptr; From 6aa2dbace84d3d9bdb7d7da727e44d0589b5c9cb Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Thu, 3 Jul 2025 18:15:49 -0700 Subject: [PATCH 24/44] Fix build error for Windows --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 8 ++++---- src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index dfca53aac0629..ddee28478c928 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -253,9 +253,9 @@ void ShenandoahRegionPartitions::establish_mutator_intervals(idx_t mutator_leftm _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = _max; _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = -1; - Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::Collector), 0ul); - Atomic::store(_used + int(ShenandoahFreeSetPartitionId::Collector), 0ul); - Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Collector), 0ul); + Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::Collector), size_t(0)); + Atomic::store(_used + int(ShenandoahFreeSetPartitionId::Collector), size_t(0)); + Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Collector), size_t(0)); } void ShenandoahRegionPartitions::establish_old_collector_intervals(idx_t old_collector_leftmost, idx_t old_collector_rightmost, @@ -2119,7 +2119,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ size_t actual_size = req.size(); size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; - const uint hash = (reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount; + const uint hash = uint((reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount); for (;;) { uint idx = hash; ShenandoahHeapRegion* retirable_regions[3]; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp index 37b9b58f5aa20..6ebfbb6a3d60f 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp @@ -371,9 +371,9 @@ void ShenandoahHeapRegion::make_committed_bypass() { } void ShenandoahHeapRegion::reset_alloc_metadata() { - Atomic::store(&_tlab_allocs, 0ul); - Atomic::store(&_gclab_allocs, 0ul); - Atomic::store(&_plab_allocs, 0ul); + Atomic::store(&_tlab_allocs, size_t(0)); + Atomic::store(&_gclab_allocs, size_t(0)); + Atomic::store(&_plab_allocs, size_t(0)); } size_t ShenandoahHeapRegion::get_shared_allocs() const { From ce5616c513c821a515735041ed75c85baf906306 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Fri, 4 Jul 2025 00:13:14 -0700 Subject: [PATCH 25/44] Not reserve a region if it is ready for promotion --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index ddee28478c928..e30cf06e5685b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2228,7 +2228,8 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr Atomic::store(_shared_region_addresses[_current_index++], r); skip_invalid_address(); _fulfilled_count++; - } else if (r->affiliation() == YOUNG_GENERATION && r->is_regular() && r->free() >= _min_req_byte_size) { + } else if (r->affiliation() == YOUNG_GENERATION && r->is_regular() && + r->get_top_before_promote() != nullptr && r->free() >= _min_req_byte_size) { r->reserve_for_direct_allocation(); Atomic::store(_shared_region_addresses[_current_index++], r); skip_invalid_address(); From d1d71bc8a69e83ecbcd15f83117b65a49035244d Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Fri, 4 Jul 2025 02:14:06 -0700 Subject: [PATCH 26/44] Only reserve empty region for direct allocation, also take the chance to allocate object under lock from the non-empty region with enough capacity --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 71 ++++++++++++------- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 10 +-- .../gc/shenandoah/shenandoahHeapRegion.hpp | 2 + .../shenandoahHeapRegion.inline.hpp | 20 ++++++ 4 files changed, 72 insertions(+), 31 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index e30cf06e5685b..09b293520a8b0 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2117,8 +2117,6 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); - size_t actual_size = req.size(); - size_t min_requested_size = IS_TLAB ? req.min_size() : actual_size; const uint hash = uint((reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount); for (;;) { uint idx = hash; @@ -2130,7 +2128,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + idx; ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address); if (r != nullptr && r->reserved_for_direct_allocation()) { - obj = par_allocate_in(r, req, in_new_region); + obj = par_allocate_in_for_mutator(r, req, in_new_region); if (obj != nullptr) { return obj; } @@ -2144,25 +2142,30 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ } idx = (idx + 1) % ShenandoahDirectlyAllocatableRegionCount; } - // Failed to allocate in 3 consecutive directly allocatable regions. - // Try to retire the region if the free size is less than minimal tlab size and try to replace with a new region. + // Failed to allocate in 3 consecutive directly allocatable regions, meanwhile none of the 3 regions + // is ready for retire and replacement, it will fall back to allocate from other regions with a heap lock. if (count == 0u) { ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); return allocate_for_mutator(req, in_new_region); } - - if (!try_allocate_directly_allocatable_regions(count, retirable_shared_regions_addresses, retirable_regions, min_requested_size)) { - //only tried 3 shared regions, try to steal from other shared regions before OOM - for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { - ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + i); - if (r != nullptr) { - obj = par_allocate_in(r, req, in_new_region); - if (obj != nullptr) { - return obj; + // If any of the 3 consecutive directly allocatable regions is ready for retire and replacement, + // grab heap lock try to retire all ready-to-retire shared regions. + if (!try_allocate_directly_allocatable_regions(retirable_shared_regions_addresses, retirable_regions, count, req, obj, in_new_region)) { + if (obj == nullptr) { + //only tried 3 shared regions, try to steal from other shared regions before OOM + for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + i); + if (r != nullptr) { + obj = par_allocate_in_for_mutator(r, req, in_new_region); + if (obj != nullptr) break; } } + return obj; } - return nullptr; + } + if (obj != nullptr) { + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + return obj; } } } @@ -2172,7 +2175,7 @@ template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(Shen template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); template -HeapWord* ShenandoahFreeSet::par_allocate_in(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region) { +HeapWord* ShenandoahFreeSet::par_allocate_in_for_mutator(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region) { HeapWord* obj = nullptr; size_t actual_size = req.size(); if (IS_TLAB) { @@ -2199,11 +2202,17 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr uint _current_index = 0u; const uint _request_count; uint _fulfilled_count = 0u; + ShenandoahAllocRequest &_req; + HeapWord* &_obj; + bool &_in_new_region; const size_t _min_req_byte_size; DirectlyAllocatableRegionAllocationClosure( - ShenandoahHeapRegion*** shared_region_addresses, const uint shared_region_address_count, const uint request_count, const size_t min_req_size) - : _shared_region_addresses(shared_region_addresses), _shared_region_address_count(shared_region_address_count), _request_count(request_count), _min_req_byte_size(min_req_size * HeapWordSize) { + ShenandoahHeapRegion*** shared_region_addresses, const uint shared_region_address_count, const uint request_count, + ShenandoahAllocRequest &req, HeapWord* &obj, bool &in_new_region) + : _shared_region_addresses(shared_region_addresses), _shared_region_address_count(shared_region_address_count), _request_count(request_count), + _req(req), _obj(obj), _in_new_region(in_new_region), + _min_req_byte_size((req.type() == ShenandoahAllocRequest::_alloc_tlab ? req.min_size() : req.size()) * HeapWordSize) { skip_invalid_address(); } @@ -2228,21 +2237,29 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr Atomic::store(_shared_region_addresses[_current_index++], r); skip_invalid_address(); _fulfilled_count++; - } else if (r->affiliation() == YOUNG_GENERATION && r->is_regular() && + } else if (_obj == nullptr && r->affiliation() == YOUNG_GENERATION && r->is_regular() && r->get_top_before_promote() != nullptr && r->free() >= _min_req_byte_size) { - r->reserve_for_direct_allocation(); - Atomic::store(_shared_region_addresses[_current_index++], r); - skip_invalid_address(); - _fulfilled_count++; + size_t actual_size = _req.size(); + if (_req.is_lab_alloc()) { + _obj = r->allocate_lab(_req, actual_size); + } else { + _obj = r->allocate(actual_size, _req); + } + if (_obj != nullptr) { + _req.set_actual_size(actual_size); + _in_new_region = false; + } } return _fulfilled_count == _request_count || _current_index == _shared_region_address_count; } }; -bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(const uint region_count, - ShenandoahHeapRegion** shared_region_address[], +bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeapRegion** shared_region_address[], ShenandoahHeapRegion* original_shared_regions[], - size_t min_req_size) { + const uint region_count, + ShenandoahAllocRequest &req, + HeapWord* &obj, + bool &in_new_region) { assert(Thread::current()->is_Java_thread(), "Must be mutator"); assert(region_count > 0u && region_count <= ShenandoahDirectlyAllocatableRegionCount, "Must be"); shenandoah_assert_not_heaplocked(); @@ -2277,7 +2294,7 @@ bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(const uint reg } } - DirectlyAllocatableRegionAllocationClosure cl(shared_region_address, region_count, request_count, min_req_size); + DirectlyAllocatableRegionAllocationClosure cl(shared_region_address, region_count, request_count, req, obj, in_new_region); if (request_count > 0u) { iterate_regions_for_alloc(&cl, true); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index bdcc0838005b6..da608084dcb06 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -395,12 +395,14 @@ class ShenandoahFreeSet : public CHeapObj { void log_status(); template - HeapWord* par_allocate_in(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region); + HeapWord* par_allocate_in_for_mutator(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region); - bool try_allocate_directly_allocatable_regions(uint probed_region_count, - ShenandoahHeapRegion** shared_region_address[], + bool try_allocate_directly_allocatable_regions(ShenandoahHeapRegion** shared_region_address[], ShenandoahHeapRegion* original_shared_regions[], - size_t min_req_size); + uint region_count, + ShenandoahAllocRequest &req, + HeapWord* &obj, + bool &in_new_region); template uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp index 0908de3f9165f..4cde038bc1764 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp @@ -368,6 +368,8 @@ class ShenandoahHeapRegion { // Allocation (return nullptr if full) inline HeapWord* allocate(size_t word_size, const ShenandoahAllocRequest& req); + inline HeapWord* allocate_lab(const ShenandoahAllocRequest &req, size_t &actual_size); + // Atomic allocation using CAS, return nullptr if full or no enough space for the req inline HeapWord* allocate_atomic(size_t word_size, const ShenandoahAllocRequest &req); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 2a0796870ab46..bbfd325edb36d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -109,6 +109,26 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque } } +HeapWord* ShenandoahHeapRegion::allocate_lab(const ShenandoahAllocRequest& req, size_t &actual_size) { + shenandoah_assert_heaplocked_or_safepoint(); + assert(req.is_lab_alloc(), "Only lab alloc"); + assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); + + size_t adjusted_size = req.size(); + HeapWord* obj = nullptr; + HeapWord* old_top = top(); + size_t free_words = align_down(byte_size(old_top, end()) >> LogHeapWordSize, MinObjAlignment); + if (adjusted_size > free_words) { + adjusted_size = free_words; + } + if (adjusted_size >= req.min_size()) { + obj = allocate(adjusted_size, req); + actual_size = adjusted_size; + assert(obj == old_top, "Must be"); + } + return obj; +} + HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAllocRequest& req) { assert(is_object_aligned(size), "alloc size breaks alignment: %zu", size); assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); From 96db6192aab39f39fea792bbd73c730b4ac93bb9 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Fri, 4 Jul 2025 02:50:22 -0700 Subject: [PATCH 27/44] reserve region when non-empty region has enough capacity --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 09b293520a8b0..0084d3458247c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2237,17 +2237,24 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr Atomic::store(_shared_region_addresses[_current_index++], r); skip_invalid_address(); _fulfilled_count++; - } else if (_obj == nullptr && r->affiliation() == YOUNG_GENERATION && r->is_regular() && + } else if (r->affiliation() == YOUNG_GENERATION && r->is_regular() && r->get_top_before_promote() != nullptr && r->free() >= _min_req_byte_size) { - size_t actual_size = _req.size(); - if (_req.is_lab_alloc()) { - _obj = r->allocate_lab(_req, actual_size); + if (_obj == nullptr) { + size_t actual_size = _req.size(); + if (_req.is_lab_alloc()) { + _obj = r->allocate_lab(_req, actual_size); + } else { + _obj = r->allocate(actual_size, _req); + } + if (_obj != nullptr) { + _req.set_actual_size(actual_size); + _in_new_region = false; + } } else { - _obj = r->allocate(actual_size, _req); - } - if (_obj != nullptr) { - _req.set_actual_size(actual_size); - _in_new_region = false; + r->reserve_for_direct_allocation(); + Atomic::store(_shared_region_addresses[_current_index++], r); + skip_invalid_address(); + _fulfilled_count++; } } return _fulfilled_count == _request_count || _current_index == _shared_region_address_count; From d4dcb28fbd19fbac3a159994ca735638781a282c Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Fri, 4 Jul 2025 03:09:42 -0700 Subject: [PATCH 28/44] touch up --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 0084d3458247c..c22e8377b5e36 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2119,12 +2119,13 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ const uint hash = uint((reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount); for (;;) { + constexpr uint max_probes = 3; uint idx = hash; - ShenandoahHeapRegion* retirable_regions[3]; - ShenandoahHeapRegion** retirable_shared_regions_addresses[3]; + ShenandoahHeapRegion* retirable_regions[max_probes]; + ShenandoahHeapRegion** retirable_shared_regions_addresses[max_probes]; HeapWord* obj = nullptr; uint count = 0u; - for (uint i = 0u; i < 3; i++) { + for (uint i = 0u; i < max_probes; i++) { ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + idx; ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address); if (r != nullptr && r->reserved_for_direct_allocation()) { From 2ea822c98bc94d55b1d5373256afe8ff69f3ab3e Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 02:40:32 -0700 Subject: [PATCH 29/44] Allocate new obj before storing the new reserved shared region --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index c22e8377b5e36..d371f5c747c8b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2235,6 +2235,13 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr r->set_affiliation(YOUNG_GENERATION); r->make_regular_allocation(YOUNG_GENERATION); ShenandoahHeap::heap()->generation_for(r->affiliation())->increment_affiliated_region_count(); + if (_obj == nullptr) { + size_t actual_size = _req.size(); + _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : _obj = r->allocate(actual_size, _req); + _req.set_actual_size(actual_size); + _in_new_region = true; + } + OrderAccess::fence(); Atomic::store(_shared_region_addresses[_current_index++], r); skip_invalid_address(); _fulfilled_count++; @@ -2242,15 +2249,9 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr r->get_top_before_promote() != nullptr && r->free() >= _min_req_byte_size) { if (_obj == nullptr) { size_t actual_size = _req.size(); - if (_req.is_lab_alloc()) { - _obj = r->allocate_lab(_req, actual_size); - } else { - _obj = r->allocate(actual_size, _req); - } - if (_obj != nullptr) { - _req.set_actual_size(actual_size); - _in_new_region = false; - } + _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : _obj = r->allocate(actual_size, _req); + _req.set_actual_size(actual_size); + _in_new_region = false; } else { r->reserve_for_direct_allocation(); Atomic::store(_shared_region_addresses[_current_index++], r); From e4ddfdc9ed67e7eaea4573fe2085bd9fff0850c3 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 02:44:15 -0700 Subject: [PATCH 30/44] Fix typo --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index d371f5c747c8b..d906712187de5 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2237,7 +2237,7 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr ShenandoahHeap::heap()->generation_for(r->affiliation())->increment_affiliated_region_count(); if (_obj == nullptr) { size_t actual_size = _req.size(); - _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : _obj = r->allocate(actual_size, _req); + _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : r->allocate(actual_size, _req); _req.set_actual_size(actual_size); _in_new_region = true; } @@ -2249,7 +2249,7 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr r->get_top_before_promote() != nullptr && r->free() >= _min_req_byte_size) { if (_obj == nullptr) { size_t actual_size = _req.size(); - _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : _obj = r->allocate(actual_size, _req); + _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : r->allocate(actual_size, _req); _req.set_actual_size(actual_size); _in_new_region = false; } else { From dceff3ce3f56d576d247983b81db3c023b620dc0 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 08:28:10 -0700 Subject: [PATCH 31/44] Not repeat allocation on the regions already tried before stealing alloc from other shared regions --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index d906712187de5..312d4bb6d8547 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2117,10 +2117,10 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); - const uint hash = uint((reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount); + const uint start_idx = uint((reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount); for (;;) { constexpr uint max_probes = 3; - uint idx = hash; + uint idx = start_idx; ShenandoahHeapRegion* retirable_regions[max_probes]; ShenandoahHeapRegion** retirable_shared_regions_addresses[max_probes]; HeapWord* obj = nullptr; @@ -2154,19 +2154,21 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ if (!try_allocate_directly_allocatable_regions(retirable_shared_regions_addresses, retirable_regions, count, req, obj, in_new_region)) { if (obj == nullptr) { //only tried 3 shared regions, try to steal from other shared regions before OOM - for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + do { ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + i); if (r != nullptr) { obj = par_allocate_in_for_mutator(r, req, in_new_region); if (obj != nullptr) break; } - } + idx = (idx + 1) % ShenandoahDirectlyAllocatableRegionCount; + } while (idx != start_idx); + return obj; + } + } else { + if (obj != nullptr) { + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); return obj; } - } - if (obj != nullptr) { - _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); - return obj; } } } From 138acb785586b402d33837f66c1b156c1a1594af Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 08:41:42 -0700 Subject: [PATCH 32/44] Fix typo --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 312d4bb6d8547..914fe6b901297 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2155,7 +2155,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ if (obj == nullptr) { //only tried 3 shared regions, try to steal from other shared regions before OOM do { - ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + i); + ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); if (r != nullptr) { obj = par_allocate_in_for_mutator(r, req, in_new_region); if (obj != nullptr) break; From fccbd0da9b83b3ce3146ef1dc4a614ad92e44b0b Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 08:54:37 -0700 Subject: [PATCH 33/44] Fix improper order when release a region from direct allocation --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 914fe6b901297..b00715974daa4 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2156,7 +2156,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ //only tried 3 shared regions, try to steal from other shared regions before OOM do { ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); - if (r != nullptr) { + if (r != nullptr && r->reserved_for_direct_allocation()) { obj = par_allocate_in_for_mutator(r, req, in_new_region); if (obj != nullptr) break; } @@ -2288,8 +2288,8 @@ bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeap request_count++; if (r != nullptr) { if (r->free() < PLAB::min_size()) { + Atomic::release_store_fence(shared_region_address[i], static_cast(nullptr)); r->release_from_direct_allocation(); - Atomic::release_store(shared_region_address[i], static_cast(nullptr)); // TODO confirm when&why the region is moved out of Mutator partition? if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); From 34529958af92d45f2cf44b8c72060779fabdaf2e Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 08:56:29 -0700 Subject: [PATCH 34/44] Fix improper order when release a region from direct allocation --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index b00715974daa4..7bdfdbaea991e 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2289,11 +2289,11 @@ bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeap if (r != nullptr) { if (r->free() < PLAB::min_size()) { Atomic::release_store_fence(shared_region_address[i], static_cast(nullptr)); - r->release_from_direct_allocation(); // TODO confirm when&why the region is moved out of Mutator partition? if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); } + r->release_from_direct_allocation(); } else { // Although r is same as original one when tried CAS allocation, but it has more free space. fulfilled_by_others++; From c93dc012299d4d92328fb66f4605c6aedbb2484a Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 08:59:44 -0700 Subject: [PATCH 35/44] Fix improper order when release a region from direct allocation --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 7bdfdbaea991e..a57095d27f2e7 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2101,10 +2101,9 @@ void ShenandoahFreeSet::release_all_directly_allocatable_regions() { ShenandoahHeapRegion** address = _directly_allocatable_regions + i; ShenandoahHeapRegion* r = Atomic::load_acquire(address); if (r != nullptr) { - if (r->reserved_for_direct_allocation()) { - r->release_from_direct_allocation(); - } - Atomic::release_store(address, static_cast(nullptr)); + assert(r->reserved_for_direct_allocation(), "Must be"); + Atomic::release_store_fence(address, static_cast(nullptr)); + r->release_from_direct_allocation(); } } } @@ -2314,7 +2313,6 @@ bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeap void ShenandoahFreeSet::release_directly_allocatable_region(ShenandoahHeapRegion* region) { shenandoah_assert_heaplocked(); - region->release_from_direct_allocation(); for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + i; if (Atomic::load_acquire(shared_region_address) == region) { @@ -2322,6 +2320,8 @@ void ShenandoahFreeSet::release_directly_allocatable_region(ShenandoahHeapRegion break; } } + OrderAccess::fence(); + region->release_from_direct_allocation(); } template From 3e80fdce277dbb9a4c35547f6a57ff70883f30a3 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 09:20:41 -0700 Subject: [PATCH 36/44] Fix a bug --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index a57095d27f2e7..f2cbd02c9d870 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -2163,11 +2163,11 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ } while (idx != start_idx); return obj; } - } else { - if (obj != nullptr) { - _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); - return obj; - } + } + // Regardless whether result of directly allocatable region allocation, the obj may have been allocated. + if (obj != nullptr) { + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + return obj; } } } From b3d359229bf173fadc01e0be1943dd6a960ef89f Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 13:55:35 -0700 Subject: [PATCH 37/44] Not update allocation bias --- src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index f2cbd02c9d870..281554ee4fd3c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -836,7 +836,7 @@ HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool& } HeapWord* ShenandoahFreeSet::allocate_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { - update_allocation_bias(); + //update_allocation_bias(); if (_partitions.is_empty(ShenandoahFreeSetPartitionId::Mutator)) { // There is no recovery. Mutator does not touch collector view at all. @@ -2332,9 +2332,11 @@ uint ShenandoahFreeSet::iterate_regions_for_alloc(ShenandoahHeapRegionBreakableI if (_partitions.is_empty(partition)) { return 0u; } + /* if (IS_MUTATOR) { update_allocation_bias(); } + */ if (_partitions.alloc_from_left_bias(partition)) { ShenandoahLeftRightIterator iterator(&_partitions, partition, use_empty); return iterate_regions_for_alloc(iterator, cl); From 9340e6e318132e7b94cf45267efd1e3016e3a124 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 17:31:42 -0700 Subject: [PATCH 38/44] Add CPU afinity support and use CPU process id instead thread id for CAS allocation --- .../share/gc/shenandoah/shenandoahCPU.cpp | 66 +++++++++++++++++++ .../share/gc/shenandoah/shenandoahCPU.hpp | 50 ++++++++++++++ .../gc/shenandoah/shenandoahCPU.inline.hpp | 44 +++++++++++++ .../share/gc/shenandoah/shenandoahFreeSet.cpp | 3 +- .../share/gc/shenandoah/shenandoahHeap.cpp | 2 + 5 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp create mode 100644 src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp create mode 100644 src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp new file mode 100644 index 0000000000000..c7d0b0032b499 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "gc/shared/gcLogPrecious.hpp" +#include "gc/shenandoah/shenandoahCPU.inline.hpp" +#include "memory/padded.inline.hpp" +#include "runtime/javaThread.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" + +#define SHENANDOAH_CPU_UNKNOWN_AFFINITY ((Thread*)-1) +#define SHENANDOAH_CPU_UNKNOWN_SELF ((Thread*)-2) + +PaddedEnd* ShenandoahCPU::_affinity = nullptr; +THREAD_LOCAL Thread* ShenandoahCPU::_self = SHENANDOAH_CPU_UNKNOWN_SELF; +THREAD_LOCAL uint32_t ShenandoahCPU::_cpu = 0; + +void ShenandoahCPU::initialize() { + assert(_affinity == nullptr, "Already initialized"); + const uint32_t ncpus = os::processor_count(); + + _affinity = PaddedArray::create_unfreeable(ncpus); + + for (uint32_t i = 0; i < ncpus; i++) { + _affinity[i]._thread = SHENANDOAH_CPU_UNKNOWN_AFFINITY; + } + + log_info_p(gc, init)("CPUs: %u total, %u available", + os::processor_count(), + os::initial_active_processor_count()); +} + +uint32_t ShenandoahCPU::id_slow() { + // Set current thread + if (_self == SHENANDOAH_CPU_UNKNOWN_SELF) { + _self = Thread::current(); + } + + // Set current CPU + _cpu = os::processor_id(); + + // Update affinity table + _affinity[_cpu]._thread = _self; + + return _cpu; +} diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp new file mode 100644 index 0000000000000..52ec005d445e3 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_HPP + +#include "memory/allStatic.hpp" +#include "memory/padded.hpp" +#include "utilities/globalDefinitions.hpp" + +class Thread; + +class ShenandoahCPU : public AllStatic { +private: + struct Affinity { + Thread* _thread; + }; + + static PaddedEnd* _affinity; + static THREAD_LOCAL Thread* _self; + static THREAD_LOCAL uint32_t _cpu; + + static uint32_t id_slow(); + +public: + static void initialize(); + static uint32_t id(); +}; + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp new file mode 100644 index 0000000000000..502d49e5c190d --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP + +#include "gc/shared/shenandoah/shenandoahCPU.php" + +#include "runtime/os.hpp" +#include "utilities/debug.hpp" + +inline uint32_t ShenandoahCPU::id() { + assert(_affinity != nullptr, "Not initialized"); + + // Fast path + if (_affinity[_cpu]._thread == _self) { + return _cpu; + } + + // Slow path + return id_slow(); +} + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 281554ee4fd3c..a45a1ca57735d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -26,6 +26,7 @@ #include "gc/shared/tlab_globals.hpp" #include "gc/shenandoah/shenandoahAffiliation.hpp" +#include "gc/shenandoah/shenandoahCPU.inline.hpp" #include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" @@ -2116,7 +2117,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); - const uint start_idx = uint((reinterpret_cast(Thread::current()) >> 5) % ShenandoahDirectlyAllocatableRegionCount); + const uint start_idx = uint(ShenandoahCPU::id() % ShenandoahDirectlyAllocatableRegionCount); for (;;) { constexpr uint max_probes = 3; uint idx = start_idx; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 0fd230153d70b..8192da309a6aa 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -36,6 +36,7 @@ #include "gc/shared/memAllocator.hpp" #include "gc/shared/plab.hpp" #include "gc/shared/tlab_globals.hpp" +#include "gc/shenandoah/shenandoahCPU.inline.hpp" #include "gc/shenandoah/heuristics/shenandoahOldHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp" #include "gc/shenandoah/mode/shenandoahGenerationalMode.hpp" @@ -578,6 +579,7 @@ ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) : { // Initialize GC mode early, many subsequent initialization procedures depend on it initialize_mode(); + ShenandoahCPU::initialize(); _cancelled_gc.set(GCCause::_no_gc); } From 1557472b1f640cf7f7a3b03fd46a5ff0da08ea68 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 17:34:10 -0700 Subject: [PATCH 39/44] Fix wrong include --- src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp index 502d49e5c190d..d93f82525a4fd 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp @@ -24,7 +24,7 @@ #ifndef SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP #define SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP -#include "gc/shared/shenandoah/shenandoahCPU.php" +#include "gc/shenandoah/shenandoahCPU.php" #include "runtime/os.hpp" #include "utilities/debug.hpp" From 926462f7cb280bbd877d2f5498b0c40ab84bbe5a Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Mon, 7 Jul 2025 17:36:23 -0700 Subject: [PATCH 40/44] Fix wrong include --- src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp index d93f82525a4fd..3a5b2d1cfb47c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp @@ -24,7 +24,7 @@ #ifndef SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP #define SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP -#include "gc/shenandoah/shenandoahCPU.php" +#include "gc/shenandoah/shenandoahCPU.hpp" #include "runtime/os.hpp" #include "utilities/debug.hpp" From c640e68915d2e8ef536eae22e31700114f470c2c Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 8 Jul 2025 01:14:55 -0700 Subject: [PATCH 41/44] Use random to decide the start index where mutator starts allocating with CAS --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 42 ++++++++++++++++++- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 17 ++++++++ .../share/gc/shenandoah/shenandoahHeap.cpp | 2 - 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index a45a1ca57735d..1be81cad79d55 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -26,7 +26,6 @@ #include "gc/shared/tlab_globals.hpp" #include "gc/shenandoah/shenandoahAffiliation.hpp" -#include "gc/shenandoah/shenandoahCPU.inline.hpp" #include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" @@ -750,6 +749,44 @@ void ShenandoahRegionPartitions::assert_bounds() { } #endif +PaddedEnd* ShenandoahDirectlyAllocatableRegionAffinity::_affinity = nullptr; +THREAD_LOCAL Thread* ShenandoahDirectlyAllocatableRegionAffinity::_self = UNKNOWN_SELF; +THREAD_LOCAL uint ShenandoahDirectlyAllocatableRegionAffinity::_index = 0; + +uint ShenandoahDirectlyAllocatableRegionAffinity::index_slow() { + // Set current thread + if (_self == UNKNOWN_SELF) { + _self = Thread::current(); + } + + // Create a new random index where the thread will start allocation + _index = static_cast(os::random()) % ShenandoahDirectlyAllocatableRegionCount; + + // Update affinity table + _affinity[_index]._thread = _self; + + return _index; +} + +void ShenandoahDirectlyAllocatableRegionAffinity::initialize() { + assert(_affinity == nullptr, "Already initialized"); + _affinity = PaddedArray::create_unfreeable(ShenandoahDirectlyAllocatableRegionCount); + for (uint32_t i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + _affinity[i]._thread = UNKNOWN_AFFINITY; + } +} + +uint ShenandoahDirectlyAllocatableRegionAffinity::index() { + assert(_affinity != nullptr, "Not initialized"); + // Fast path + if (_affinity[_index]._thread == _self) { + return _index; + } + + // Slow path + return index_slow(); +} + ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _heap(heap), _partitions(max_regions, this), @@ -760,6 +797,7 @@ ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { _directly_allocatable_regions[i] = nullptr; } + ShenandoahDirectlyAllocatableRegionAffinity::initialize(); } void ShenandoahFreeSet::add_promoted_in_place_region_to_old_collector(ShenandoahHeapRegion* region) { @@ -2117,7 +2155,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); - const uint start_idx = uint(ShenandoahCPU::id() % ShenandoahDirectlyAllocatableRegionCount); + const uint start_idx = ShenandoahDirectlyAllocatableRegionAffinity::index(); for (;;) { constexpr uint max_probes = 3; uint idx = start_idx; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index da608084dcb06..161915511608a 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -29,6 +29,7 @@ #include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" #include "gc/shenandoah/shenandoahSimpleBitMap.hpp" +#include "memory/padded.inline.hpp" // Each ShenandoahHeapRegion is associated with a ShenandoahFreeSetPartitionId. enum class ShenandoahFreeSetPartitionId : uint8_t { @@ -270,6 +271,22 @@ class ShenandoahRegionPartitions { void assert_bounds() NOT_DEBUG_RETURN; }; +#define UNKNOWN_AFFINITY ((Thread*)-1) +#define UNKNOWN_SELF ((Thread*)-2) +class ShenandoahDirectlyAllocatableRegionAffinity : public AllStatic { + struct Affinity { + Thread* _thread; + }; + + static PaddedEnd* _affinity; + static THREAD_LOCAL Thread* _self; + static THREAD_LOCAL uint _index; + static uint index_slow(); +public: + static void initialize(); + static uint index(); +}; + // Publicly, ShenandoahFreeSet represents memory that is available to mutator threads. The public capacity(), used(), // and available() methods represent this public notion of memory that is under control of the mutator. Separately, // ShenandoahFreeSet also represents memory available to garbage collection activities for compaction purposes. diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 8192da309a6aa..0fd230153d70b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -36,7 +36,6 @@ #include "gc/shared/memAllocator.hpp" #include "gc/shared/plab.hpp" #include "gc/shared/tlab_globals.hpp" -#include "gc/shenandoah/shenandoahCPU.inline.hpp" #include "gc/shenandoah/heuristics/shenandoahOldHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp" #include "gc/shenandoah/mode/shenandoahGenerationalMode.hpp" @@ -579,7 +578,6 @@ ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) : { // Initialize GC mode early, many subsequent initialization procedures depend on it initialize_mode(); - ShenandoahCPU::initialize(); _cancelled_gc.set(GCCause::_no_gc); } From ca04034f47c96c16ef5a34fabcd7b7b5d49fadba Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 8 Jul 2025 01:25:28 -0700 Subject: [PATCH 42/44] Delete ShenandoahCPU --- .../share/gc/shenandoah/shenandoahCPU.cpp | 66 ------------------- .../share/gc/shenandoah/shenandoahCPU.hpp | 50 -------------- .../gc/shenandoah/shenandoahCPU.inline.hpp | 44 ------------- 3 files changed, 160 deletions(-) delete mode 100644 src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp delete mode 100644 src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp delete mode 100644 src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp deleted file mode 100644 index c7d0b0032b499..0000000000000 --- a/src/hotspot/share/gc/shenandoah/shenandoahCPU.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "gc/shared/gcLogPrecious.hpp" -#include "gc/shenandoah/shenandoahCPU.inline.hpp" -#include "memory/padded.inline.hpp" -#include "runtime/javaThread.hpp" -#include "runtime/os.hpp" -#include "utilities/debug.hpp" - -#define SHENANDOAH_CPU_UNKNOWN_AFFINITY ((Thread*)-1) -#define SHENANDOAH_CPU_UNKNOWN_SELF ((Thread*)-2) - -PaddedEnd* ShenandoahCPU::_affinity = nullptr; -THREAD_LOCAL Thread* ShenandoahCPU::_self = SHENANDOAH_CPU_UNKNOWN_SELF; -THREAD_LOCAL uint32_t ShenandoahCPU::_cpu = 0; - -void ShenandoahCPU::initialize() { - assert(_affinity == nullptr, "Already initialized"); - const uint32_t ncpus = os::processor_count(); - - _affinity = PaddedArray::create_unfreeable(ncpus); - - for (uint32_t i = 0; i < ncpus; i++) { - _affinity[i]._thread = SHENANDOAH_CPU_UNKNOWN_AFFINITY; - } - - log_info_p(gc, init)("CPUs: %u total, %u available", - os::processor_count(), - os::initial_active_processor_count()); -} - -uint32_t ShenandoahCPU::id_slow() { - // Set current thread - if (_self == SHENANDOAH_CPU_UNKNOWN_SELF) { - _self = Thread::current(); - } - - // Set current CPU - _cpu = os::processor_id(); - - // Update affinity table - _affinity[_cpu]._thread = _self; - - return _cpu; -} diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp deleted file mode 100644 index 52ec005d445e3..0000000000000 --- a/src/hotspot/share/gc/shenandoah/shenandoahCPU.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#ifndef SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_HPP -#define SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_HPP - -#include "memory/allStatic.hpp" -#include "memory/padded.hpp" -#include "utilities/globalDefinitions.hpp" - -class Thread; - -class ShenandoahCPU : public AllStatic { -private: - struct Affinity { - Thread* _thread; - }; - - static PaddedEnd* _affinity; - static THREAD_LOCAL Thread* _self; - static THREAD_LOCAL uint32_t _cpu; - - static uint32_t id_slow(); - -public: - static void initialize(); - static uint32_t id(); -}; - -#endif // SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp deleted file mode 100644 index 3a5b2d1cfb47c..0000000000000 --- a/src/hotspot/share/gc/shenandoah/shenandoahCPU.inline.hpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#ifndef SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP -#define SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP - -#include "gc/shenandoah/shenandoahCPU.hpp" - -#include "runtime/os.hpp" -#include "utilities/debug.hpp" - -inline uint32_t ShenandoahCPU::id() { - assert(_affinity != nullptr, "Not initialized"); - - // Fast path - if (_affinity[_cpu]._thread == _self) { - return _cpu; - } - - // Slow path - return id_slow(); -} - -#endif // SHARE_GC_SHENANDOAH_SHENANDOAH_CPU_INLINE_HPP From f4c8e55a148bce193e988509b762f6f371a43a1f Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 8 Jul 2025 09:33:49 -0700 Subject: [PATCH 43/44] Comments to explain ShenandoahDirectlyAllocatableRegionAffinity --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 8 ++++---- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 14 ++++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 1be81cad79d55..784c4f32d34b4 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -750,12 +750,12 @@ void ShenandoahRegionPartitions::assert_bounds() { #endif PaddedEnd* ShenandoahDirectlyAllocatableRegionAffinity::_affinity = nullptr; -THREAD_LOCAL Thread* ShenandoahDirectlyAllocatableRegionAffinity::_self = UNKNOWN_SELF; +THREAD_LOCAL Thread* ShenandoahDirectlyAllocatableRegionAffinity::_self = DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF; THREAD_LOCAL uint ShenandoahDirectlyAllocatableRegionAffinity::_index = 0; uint ShenandoahDirectlyAllocatableRegionAffinity::index_slow() { // Set current thread - if (_self == UNKNOWN_SELF) { + if (_self == DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF) { _self = Thread::current(); } @@ -770,9 +770,9 @@ uint ShenandoahDirectlyAllocatableRegionAffinity::index_slow() { void ShenandoahDirectlyAllocatableRegionAffinity::initialize() { assert(_affinity == nullptr, "Already initialized"); - _affinity = PaddedArray::create_unfreeable(ShenandoahDirectlyAllocatableRegionCount); + _affinity = PaddedArray::create_unfreeable(ShenandoahDirectlyAllocatableRegionCount); for (uint32_t i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { - _affinity[i]._thread = UNKNOWN_AFFINITY; + _affinity[i]._thread = DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_AFFINITY; } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 161915511608a..682d221c42eb3 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -271,16 +271,22 @@ class ShenandoahRegionPartitions { void assert_bounds() NOT_DEBUG_RETURN; }; -#define UNKNOWN_AFFINITY ((Thread*)-1) -#define UNKNOWN_SELF ((Thread*)-2) +#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_AFFINITY ((Thread*)-1) +#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF ((Thread*)-2) +// When mutator threads allocate from directly allocatable regions, ideally the allocation should be evenly +// distributed to all the directly allocatable regions, random is the best portable option for this, but with random +// distribution it may worsen memory locality, e.g. two consecutive allocation from same thread are randomly +// distributed to different allocatable regions. ShenandoahDirectlyAllocatableRegionAffinity solves/mitigates +// the memory locality issue. +// The idea and code is borrowed from ZGC's CPU affinity, but with random number instead of CPU id. class ShenandoahDirectlyAllocatableRegionAffinity : public AllStatic { struct Affinity { Thread* _thread; }; static PaddedEnd* _affinity; - static THREAD_LOCAL Thread* _self; - static THREAD_LOCAL uint _index; + static THREAD_LOCAL Thread* _self; + static THREAD_LOCAL uint _index; static uint index_slow(); public: static void initialize(); From 53383462d635a9f4b08d722f9eeab4e25b92ef41 Mon Sep 17 00:00:00 2001 From: Xiaolong Peng Date: Tue, 8 Jul 2025 14:39:31 -0700 Subject: [PATCH 44/44] Use PaddedArray to store directly allocatable regions --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 23 +++++++++---------- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 7 ++++-- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 784c4f32d34b4..4687f91a77896 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -793,9 +793,9 @@ ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _alloc_bias_weight(0) { clear_internal(); - _directly_allocatable_regions = NEW_C_HEAP_ARRAY(ShenandoahHeapRegion*, ShenandoahDirectlyAllocatableRegionCount, mtGC); + _directly_allocatable_regions = PaddedArray::create_unfreeable(ShenandoahDirectlyAllocatableRegionCount); for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { - _directly_allocatable_regions[i] = nullptr; + _directly_allocatable_regions[i].address = nullptr; } ShenandoahDirectlyAllocatableRegionAffinity::initialize(); } @@ -2137,7 +2137,7 @@ HeapWord* ShenandoahFreeSet::allocate_humongous(ShenandoahAllocRequest& req) { void ShenandoahFreeSet::release_all_directly_allocatable_regions() { for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { - ShenandoahHeapRegion** address = _directly_allocatable_regions + i; + ShenandoahHeapRegion* volatile* address = &_directly_allocatable_regions[i].address; ShenandoahHeapRegion* r = Atomic::load_acquire(address); if (r != nullptr) { assert(r->reserved_for_direct_allocation(), "Must be"); @@ -2160,11 +2160,11 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ constexpr uint max_probes = 3; uint idx = start_idx; ShenandoahHeapRegion* retirable_regions[max_probes]; - ShenandoahHeapRegion** retirable_shared_regions_addresses[max_probes]; + ShenandoahHeapRegion* volatile * retirable_shared_regions_addresses[max_probes]; HeapWord* obj = nullptr; uint count = 0u; for (uint i = 0u; i < max_probes; i++) { - ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + idx; + ShenandoahHeapRegion* volatile * shared_region_address = &_directly_allocatable_regions[idx].address; ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address); if (r != nullptr && r->reserved_for_direct_allocation()) { obj = par_allocate_in_for_mutator(r, req, in_new_region); @@ -2193,7 +2193,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequ if (obj == nullptr) { //only tried 3 shared regions, try to steal from other shared regions before OOM do { - ShenandoahHeapRegion* r = Atomic::load_acquire(_directly_allocatable_regions + idx); + ShenandoahHeapRegion* r = Atomic::load_acquire(&_directly_allocatable_regions[idx].address); if (r != nullptr && r->reserved_for_direct_allocation()) { obj = par_allocate_in_for_mutator(r, req, in_new_region); if (obj != nullptr) break; @@ -2238,7 +2238,7 @@ HeapWord* ShenandoahFreeSet::par_allocate_in_for_mutator(ShenandoahHeapRegion* r class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBreakableIterClosure { public: - ShenandoahHeapRegion*** _shared_region_addresses; + ShenandoahHeapRegion* volatile ** _shared_region_addresses; const uint _shared_region_address_count; uint _current_index = 0u; const uint _request_count; @@ -2249,7 +2249,7 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr const size_t _min_req_byte_size; DirectlyAllocatableRegionAllocationClosure( - ShenandoahHeapRegion*** shared_region_addresses, const uint shared_region_address_count, const uint request_count, + ShenandoahHeapRegion* volatile * shared_region_addresses[], const uint shared_region_address_count, const uint request_count, ShenandoahAllocRequest &req, HeapWord* &obj, bool &in_new_region) : _shared_region_addresses(shared_region_addresses), _shared_region_address_count(shared_region_address_count), _request_count(request_count), _req(req), _obj(obj), _in_new_region(in_new_region), @@ -2303,7 +2303,7 @@ class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBr } }; -bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeapRegion** shared_region_address[], +bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeapRegion* volatile * shared_region_address[], ShenandoahHeapRegion* original_shared_regions[], const uint region_count, ShenandoahAllocRequest &req, @@ -2353,9 +2353,8 @@ bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeap void ShenandoahFreeSet::release_directly_allocatable_region(ShenandoahHeapRegion* region) { shenandoah_assert_heaplocked(); for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { - ShenandoahHeapRegion** shared_region_address = _directly_allocatable_regions + i; - if (Atomic::load_acquire(shared_region_address) == region) { - Atomic::release_store(shared_region_address, static_cast(nullptr)); + if (_directly_allocatable_regions[i].address == region) { + Atomic::release_store(&_directly_allocatable_regions[i].address, static_cast(nullptr)); break; } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 682d221c42eb3..94af6241b4190 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -319,9 +319,12 @@ class ShenandoahDirectlyAllocatableRegionAffinity : public AllStatic { class ShenandoahFreeSet : public CHeapObj { private: + struct ShenandoahHeapRegionAddress { + ShenandoahHeapRegion* volatile address; + }; ShenandoahHeap* const _heap; ShenandoahRegionPartitions _partitions; - ShenandoahHeapRegion** _directly_allocatable_regions; + PaddedEnd* _directly_allocatable_regions; HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r); @@ -420,7 +423,7 @@ class ShenandoahFreeSet : public CHeapObj { template HeapWord* par_allocate_in_for_mutator(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region); - bool try_allocate_directly_allocatable_regions(ShenandoahHeapRegion** shared_region_address[], + bool try_allocate_directly_allocatable_regions(ShenandoahHeapRegion* volatile * shared_region_address[], ShenandoahHeapRegion* original_shared_regions[], uint region_count, ShenandoahAllocRequest &req,