diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp index 08fd45993462b..71fff8689ad0e 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp @@ -27,6 +27,7 @@ #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahEvacInfo.hpp" +#include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahGeneration.hpp" #include "gc/shenandoah/shenandoahGenerationalHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" @@ -94,6 +95,9 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio immediate_regions++; immediate_garbage += garbage; region->make_trash_immediate(); + if (region->reserved_for_direct_allocation()) { + heap->free_set()->release_directly_allocatable_region(region); + } } else { bool is_candidate; // This is our candidate for later consideration. diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp index b151a75e6e7e5..f85ef76e8e27a 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp @@ -27,6 +27,7 @@ #include "gc/shared/gcCause.hpp" #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" +#include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp" #include "logging/log.hpp" @@ -111,6 +112,9 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec immediate_regions++; immediate_garbage += garbage; region->make_trash_immediate(); + if (region->reserved_for_direct_allocation()) { + heap->free_set()->release_directly_allocatable_region(region); + } } else { // This is our candidate for later consideration. candidates[cand_idx].set_region_and_garbage(region, garbage); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp index 25b900f8d7772..60acaf349da96 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp @@ -27,6 +27,7 @@ #include "gc/shenandoah/shenandoahAgeCensus.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" +#include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" @@ -101,6 +102,9 @@ void ShenandoahCollectionSet::add_region(ShenandoahHeapRegion* r) { if (ShenandoahHeap::heap()->mode()->is_generational() && r->age() >= ShenandoahGenerationalHeap::heap()->age_census()->tenuring_threshold()) { _young_bytes_to_promote += live; } + if (r->reserved_for_direct_allocation()) { + _heap->free_set()->release_directly_allocatable_region(r); + } } else if (r->is_old()) { _old_bytes_to_evacuate += live; _old_garbage += garbage; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 1acb6a23e7a4c..4687f91a77896 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -33,6 +33,7 @@ #include "gc/shenandoah/shenandoahOldGeneration.hpp" #include "gc/shenandoah/shenandoahSimpleBitMap.hpp" #include "gc/shenandoah/shenandoahSimpleBitMap.inline.hpp" +#include "gc/shenandoah/shenandoahUtils.hpp" #include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" @@ -229,7 +230,6 @@ void ShenandoahRegionPartitions::make_all_regions_unavailable() { _rightmosts_empty[partition_id] = -1;; _capacity[partition_id] = 0; _used[partition_id] = 0; - _available[partition_id] = FreeSetUnderConstruction; } _region_counts[int(ShenandoahFreeSetPartitionId::Mutator)] = _region_counts[int(ShenandoahFreeSetPartitionId::Collector)] = 0; } @@ -244,21 +244,18 @@ void ShenandoahRegionPartitions::establish_mutator_intervals(idx_t mutator_leftm _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_leftmost_empty; _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_rightmost_empty; - _region_counts[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_region_count; - _used[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_used; - _capacity[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_region_count * _region_size_bytes; - _available[int(ShenandoahFreeSetPartitionId::Mutator)] = - _capacity[int(ShenandoahFreeSetPartitionId::Mutator)] - _used[int(ShenandoahFreeSetPartitionId::Mutator)]; + Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::Mutator), mutator_region_count); + Atomic::store(_used + int(ShenandoahFreeSetPartitionId::Mutator), mutator_used); + Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Mutator), mutator_region_count * _region_size_bytes); _leftmosts[int(ShenandoahFreeSetPartitionId::Collector)] = _max; _rightmosts[int(ShenandoahFreeSetPartitionId::Collector)] = -1; _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = _max; _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = -1; - _region_counts[int(ShenandoahFreeSetPartitionId::Collector)] = 0; - _used[int(ShenandoahFreeSetPartitionId::Collector)] = 0; - _capacity[int(ShenandoahFreeSetPartitionId::Collector)] = 0; - _available[int(ShenandoahFreeSetPartitionId::Collector)] = 0; + Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::Collector), size_t(0)); + Atomic::store(_used + int(ShenandoahFreeSetPartitionId::Collector), size_t(0)); + Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Collector), size_t(0)); } void ShenandoahRegionPartitions::establish_old_collector_intervals(idx_t old_collector_leftmost, idx_t old_collector_rightmost, @@ -272,22 +269,14 @@ void ShenandoahRegionPartitions::establish_old_collector_intervals(idx_t old_col _leftmosts_empty[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_leftmost_empty; _rightmosts_empty[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_rightmost_empty; - _region_counts[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_region_count; - _used[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_used; - _capacity[int(ShenandoahFreeSetPartitionId::OldCollector)] = old_collector_region_count * _region_size_bytes; - _available[int(ShenandoahFreeSetPartitionId::OldCollector)] = - _capacity[int(ShenandoahFreeSetPartitionId::OldCollector)] - _used[int(ShenandoahFreeSetPartitionId::OldCollector)]; + Atomic::store(_region_counts + int(ShenandoahFreeSetPartitionId::OldCollector), old_collector_region_count); + Atomic::store(_used + int(ShenandoahFreeSetPartitionId::OldCollector), old_collector_used); + Atomic::store(_capacity + int(ShenandoahFreeSetPartitionId::Collector), old_collector_region_count * _region_size_bytes); } void ShenandoahRegionPartitions::increase_used(ShenandoahFreeSetPartitionId which_partition, size_t bytes) { - shenandoah_assert_heaplocked(); assert (which_partition < NumPartitions, "Partition must be valid"); - - _used[int(which_partition)] += bytes; - _available[int(which_partition)] -= bytes; - assert (_used[int(which_partition)] <= _capacity[int(which_partition)], - "Must not use (%zu) more than capacity (%zu) after increase by %zu", - _used[int(which_partition)], _capacity[int(which_partition)], bytes); + Atomic::add(_used + int(which_partition), bytes); } inline void ShenandoahRegionPartitions::shrink_interval_if_range_modifies_either_boundary( @@ -389,7 +378,6 @@ void ShenandoahRegionPartitions::make_free(idx_t idx, ShenandoahFreeSetPartition _membership[int(which_partition)].set_bit(idx); _capacity[int(which_partition)] += _region_size_bytes; _used[int(which_partition)] += _region_size_bytes - available; - _available[int(which_partition)] += available; expand_interval_if_boundary_modified(which_partition, idx, available); _region_counts[int(which_partition)]++; } @@ -448,17 +436,19 @@ void ShenandoahRegionPartitions::move_from_partition_to_partition(idx_t idx, She "Orig partition used: %zu must exceed moved used: %zu within region %zd", _used[int(orig_partition)], used, idx); + if (orig_partition == ShenandoahFreeSetPartitionId::Mutator && r->reserved_for_direct_allocation()) { + ShenandoahHeap::heap()->free_set()->release_directly_allocatable_region(r); + } + _membership[int(orig_partition)].clear_bit(idx); _membership[int(new_partition)].set_bit(idx); _capacity[int(orig_partition)] -= _region_size_bytes; _used[int(orig_partition)] -= used; - _available[int(orig_partition)] -= available; shrink_interval_if_boundary_modified(orig_partition, idx); _capacity[int(new_partition)] += _region_size_bytes;; _used[int(new_partition)] += used; - _available[int(new_partition)] += available; expand_interval_if_boundary_modified(new_partition, idx, available); _region_counts[int(orig_partition)]--; @@ -601,6 +591,7 @@ void ShenandoahRegionPartitions::assert_bounds() { idx_t rightmosts[UIntNumPartitions]; idx_t empty_leftmosts[UIntNumPartitions]; idx_t empty_rightmosts[UIntNumPartitions]; + ShenandoahHeap* heap = ShenandoahHeap::heap(); for (uint i = 0; i < UIntNumPartitions; i++) { leftmosts[i] = _max; @@ -621,18 +612,31 @@ void ShenandoahRegionPartitions::assert_bounds() { { size_t capacity = _free_set->alloc_capacity(i); bool is_empty = (capacity == _region_size_bytes); - assert(capacity > 0, "free regions must have allocation capacity"); + // TODO remove assert, not possible to pass when allow mutator to allocate w/o lock. + //assert(capacity > 0, "free regions must have allocation capacity"); if (i < leftmosts[int(partition)]) { leftmosts[int(partition)] = i; } if (is_empty && (i < empty_leftmosts[int(partition)])) { - empty_leftmosts[int(partition)] = i; + if (partition == ShenandoahFreeSetPartitionId::Mutator) { + if (!heap->get_region(i)->reserved_for_direct_allocation()){ + empty_leftmosts[int(partition)] = i; + } + } else { + empty_leftmosts[int(partition)] = i; + } } if (i > rightmosts[int(partition)]) { rightmosts[int(partition)] = i; } if (is_empty && (i > empty_rightmosts[int(partition)])) { - empty_rightmosts[int(partition)] = i; + if (partition == ShenandoahFreeSetPartitionId::Mutator) { + if (!heap->get_region(i)->reserved_for_direct_allocation()) { + empty_rightmosts[int(partition)] = i; + } + } else { + empty_rightmosts[int(partition)] = i; + } } break; } @@ -745,12 +749,55 @@ void ShenandoahRegionPartitions::assert_bounds() { } #endif +PaddedEnd* ShenandoahDirectlyAllocatableRegionAffinity::_affinity = nullptr; +THREAD_LOCAL Thread* ShenandoahDirectlyAllocatableRegionAffinity::_self = DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF; +THREAD_LOCAL uint ShenandoahDirectlyAllocatableRegionAffinity::_index = 0; + +uint ShenandoahDirectlyAllocatableRegionAffinity::index_slow() { + // Set current thread + if (_self == DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF) { + _self = Thread::current(); + } + + // Create a new random index where the thread will start allocation + _index = static_cast(os::random()) % ShenandoahDirectlyAllocatableRegionCount; + + // Update affinity table + _affinity[_index]._thread = _self; + + return _index; +} + +void ShenandoahDirectlyAllocatableRegionAffinity::initialize() { + assert(_affinity == nullptr, "Already initialized"); + _affinity = PaddedArray::create_unfreeable(ShenandoahDirectlyAllocatableRegionCount); + for (uint32_t i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + _affinity[i]._thread = DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_AFFINITY; + } +} + +uint ShenandoahDirectlyAllocatableRegionAffinity::index() { + assert(_affinity != nullptr, "Not initialized"); + // Fast path + if (_affinity[_index]._thread == _self) { + return _index; + } + + // Slow path + return index_slow(); +} + ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _heap(heap), _partitions(max_regions, this), _alloc_bias_weight(0) { clear_internal(); + _directly_allocatable_regions = PaddedArray::create_unfreeable(ShenandoahDirectlyAllocatableRegionCount); + for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + _directly_allocatable_regions[i].address = nullptr; + } + ShenandoahDirectlyAllocatableRegionAffinity::initialize(); } void ShenandoahFreeSet::add_promoted_in_place_region_to_old_collector(ShenandoahHeapRegion* region) { @@ -784,7 +831,7 @@ template HeapWord* ShenandoahFreeSet::allocate_with_affiliation(Iter& iterator, ShenandoahAffiliation affiliation, ShenandoahAllocRequest& req, bool& in_new_region) { for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { ShenandoahHeapRegion* r = _heap->get_region(idx); - if (r->affiliation() == affiliation) { + if (r->affiliation() == affiliation && !r->reserved_for_direct_allocation()) { HeapWord* result = try_allocate_in(r, req, in_new_region); if (result != nullptr) { return result; @@ -828,7 +875,7 @@ HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool& } HeapWord* ShenandoahFreeSet::allocate_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { - update_allocation_bias(); + //update_allocation_bias(); if (_partitions.is_empty(ShenandoahFreeSetPartitionId::Mutator)) { // There is no recovery. Mutator does not touch collector view at all. @@ -880,7 +927,7 @@ HeapWord* ShenandoahFreeSet::allocate_from_regions(Iter& iterator, ShenandoahAll for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { ShenandoahHeapRegion* r = _heap->get_region(idx); size_t min_size = (req.type() == ShenandoahAllocRequest::_alloc_tlab) ? req.min_size() : req.size(); - if (alloc_capacity(r) >= min_size) { + if (!r->reserved_for_direct_allocation() && alloc_capacity(r) >= min_size) { HeapWord* result = try_allocate_in(r, req, in_new_region); if (result != nullptr) { return result; @@ -948,7 +995,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_from_mutator(ShenandoahAllocRequest& r ShenandoahRightLeftIterator iterator(&_partitions, ShenandoahFreeSetPartitionId::Mutator, true); for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { ShenandoahHeapRegion* r = _heap->get_region(idx); - if (can_allocate_from(r)) { + if (can_allocate_from(r) && !r->reserved_for_direct_allocation()) { if (req.is_old()) { if (!flip_to_old_gc(r)) { continue; @@ -1202,7 +1249,8 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { // We've confirmed num contiguous regions belonging to Mutator partition, so no need to confirm membership. // If region is not completely free, the current [beg; end] is useless, and we may fast-forward. If we can extend // the existing range, we can exploit that certain regions are already known to be in the Mutator free set. - while (!can_allocate_from(_heap->get_region(end))) { + ShenandoahHeapRegion* region = _heap->get_region(end); + while (!can_allocate_from(region) || region->reserved_for_direct_allocation()) { // region[end] is not empty, so we restart our search after region[end] idx_t slide_delta = end + 1 - beg; if (beg + slide_delta > last_possible_start) { @@ -1225,6 +1273,7 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { return nullptr; } end = beg; + region = _heap->get_region(end); } if ((end - beg + 1) == num) { @@ -1979,12 +2028,12 @@ void ShenandoahFreeSet::log_status() { } size_t max_humongous = max_contig * ShenandoahHeapRegion::region_size_bytes(); - size_t free = capacity() - used(); // Since certain regions that belonged to the Mutator free partition at the time of most recent rebuild may have been // retired, the sum of used and capacities within regions that are still in the Mutator free partition may not match // my internally tracked values of used() and free(). - assert(free == total_free, "Free memory should match"); + //TODO remove assert, it is not possible to mach since mutators may allocate on region w/o acquiring lock + //assert(free == total_free, "Free memory should match"); ls.print("Free: %zu%s, Max: %zu%s regular, %zu%s humongous, ", byte_size_in_proper_unit(total_free), proper_unit_for_byte_size(total_free), byte_size_in_proper_unit(max), proper_unit_for_byte_size(max), @@ -2080,6 +2129,274 @@ HeapWord* ShenandoahFreeSet::allocate(ShenandoahAllocRequest& req, bool& in_new_ } } +HeapWord* ShenandoahFreeSet::allocate_humongous(ShenandoahAllocRequest& req) { + assert(ShenandoahHeapRegion::requires_humongous(req.size()), "Must be humongous alloc"); + ShenandoahHeapLocker locker(_heap->lock(), req.is_mutator_alloc()); + return allocate_contiguous(req); +} + +void ShenandoahFreeSet::release_all_directly_allocatable_regions() { + for (uint i = 0; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + ShenandoahHeapRegion* volatile* address = &_directly_allocatable_regions[i].address; + ShenandoahHeapRegion* r = Atomic::load_acquire(address); + if (r != nullptr) { + assert(r->reserved_for_direct_allocation(), "Must be"); + Atomic::release_store_fence(address, static_cast(nullptr)); + r->release_from_direct_allocation(); + } + } +} + +template +HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) { + shenandoah_assert_not_heaplocked(); + assert(req.is_mutator_alloc(), "Must be mutator allocation"); + assert(req.is_young(), "Mutator allocations always come from young generation."); + assert(!ShenandoahHeapRegion::requires_humongous(req.size()), "Must not"); + assert(req.type() == ShenandoahAllocRequest::_alloc_tlab || req.type() == ShenandoahAllocRequest::_alloc_shared, "Must be"); + + const uint start_idx = ShenandoahDirectlyAllocatableRegionAffinity::index(); + for (;;) { + constexpr uint max_probes = 3; + uint idx = start_idx; + ShenandoahHeapRegion* retirable_regions[max_probes]; + ShenandoahHeapRegion* volatile * retirable_shared_regions_addresses[max_probes]; + HeapWord* obj = nullptr; + uint count = 0u; + for (uint i = 0u; i < max_probes; i++) { + ShenandoahHeapRegion* volatile * shared_region_address = &_directly_allocatable_regions[idx].address; + ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address); + if (r != nullptr && r->reserved_for_direct_allocation()) { + obj = par_allocate_in_for_mutator(r, req, in_new_region); + if (obj != nullptr) { + return obj; + } + } + + if (r == nullptr || r->free() < PLAB::min_size()) { + // Region is ready to retire + retirable_regions[count] = r; + retirable_shared_regions_addresses[count] = shared_region_address; + count++; + } + idx = (idx + 1) % ShenandoahDirectlyAllocatableRegionCount; + } + // Failed to allocate in 3 consecutive directly allocatable regions, meanwhile none of the 3 regions + // is ready for retire and replacement, it will fall back to allocate from other regions with a heap lock. + if (count == 0u) { + ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); + return allocate_for_mutator(req, in_new_region); + } + // If any of the 3 consecutive directly allocatable regions is ready for retire and replacement, + // grab heap lock try to retire all ready-to-retire shared regions. + if (!try_allocate_directly_allocatable_regions(retirable_shared_regions_addresses, retirable_regions, count, req, obj, in_new_region)) { + if (obj == nullptr) { + //only tried 3 shared regions, try to steal from other shared regions before OOM + do { + ShenandoahHeapRegion* r = Atomic::load_acquire(&_directly_allocatable_regions[idx].address); + if (r != nullptr && r->reserved_for_direct_allocation()) { + obj = par_allocate_in_for_mutator(r, req, in_new_region); + if (obj != nullptr) break; + } + idx = (idx + 1) % ShenandoahDirectlyAllocatableRegionCount; + } while (idx != start_idx); + return obj; + } + } + // Regardless whether result of directly allocatable region allocation, the obj may have been allocated. + if (obj != nullptr) { + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + return obj; + } + } +} + +// Explicit specializations +template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); +template HeapWord* ShenandoahFreeSet::par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); + +template +HeapWord* ShenandoahFreeSet::par_allocate_in_for_mutator(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region) { + HeapWord* obj = nullptr; + size_t actual_size = req.size(); + if (IS_TLAB) { + obj = region->allocate_lab_atomic(req, actual_size); + } else { + obj = region->allocate_atomic(actual_size, req); + } + if (obj != nullptr) { + assert(actual_size > 0, "Must be"); + req.set_actual_size(actual_size); + if (pointer_delta(obj, region->bottom()) == actual_size) { + // Set to true if it is the first object/tlab allocated in the region. + in_new_region = true; + } + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + } + return obj; +} + +class DirectlyAllocatableRegionAllocationClosure : public ShenandoahHeapRegionBreakableIterClosure { +public: + ShenandoahHeapRegion* volatile ** _shared_region_addresses; + const uint _shared_region_address_count; + uint _current_index = 0u; + const uint _request_count; + uint _fulfilled_count = 0u; + ShenandoahAllocRequest &_req; + HeapWord* &_obj; + bool &_in_new_region; + const size_t _min_req_byte_size; + + DirectlyAllocatableRegionAllocationClosure( + ShenandoahHeapRegion* volatile * shared_region_addresses[], const uint shared_region_address_count, const uint request_count, + ShenandoahAllocRequest &req, HeapWord* &obj, bool &in_new_region) + : _shared_region_addresses(shared_region_addresses), _shared_region_address_count(shared_region_address_count), _request_count(request_count), + _req(req), _obj(obj), _in_new_region(in_new_region), + _min_req_byte_size((req.type() == ShenandoahAllocRequest::_alloc_tlab ? req.min_size() : req.size()) * HeapWordSize) { + skip_invalid_address(); + } + + void skip_invalid_address() { + while (_current_index < _shared_region_address_count && _shared_region_addresses[_current_index] == nullptr) { + _current_index++; + } + } + + bool heap_region_do(ShenandoahHeapRegion *r) override { + if (r->reserved_for_direct_allocation()) return false; + if (r->is_empty()) { + if (ShenandoahHeap::heap()->is_concurrent_weak_root_in_progress() && r->is_trash()) { + return false; + } + r->try_recycle_under_lock(); + + r->reserve_for_direct_allocation(); + r->set_affiliation(YOUNG_GENERATION); + r->make_regular_allocation(YOUNG_GENERATION); + ShenandoahHeap::heap()->generation_for(r->affiliation())->increment_affiliated_region_count(); + if (_obj == nullptr) { + size_t actual_size = _req.size(); + _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : r->allocate(actual_size, _req); + _req.set_actual_size(actual_size); + _in_new_region = true; + } + OrderAccess::fence(); + Atomic::store(_shared_region_addresses[_current_index++], r); + skip_invalid_address(); + _fulfilled_count++; + } else if (r->affiliation() == YOUNG_GENERATION && r->is_regular() && + r->get_top_before_promote() != nullptr && r->free() >= _min_req_byte_size) { + if (_obj == nullptr) { + size_t actual_size = _req.size(); + _obj = _req.is_lab_alloc() ? r ->allocate_lab(_req, actual_size) : r->allocate(actual_size, _req); + _req.set_actual_size(actual_size); + _in_new_region = false; + } else { + r->reserve_for_direct_allocation(); + Atomic::store(_shared_region_addresses[_current_index++], r); + skip_invalid_address(); + _fulfilled_count++; + } + } + return _fulfilled_count == _request_count || _current_index == _shared_region_address_count; + } +}; + +bool ShenandoahFreeSet::try_allocate_directly_allocatable_regions(ShenandoahHeapRegion* volatile * shared_region_address[], + ShenandoahHeapRegion* original_shared_regions[], + const uint region_count, + ShenandoahAllocRequest &req, + HeapWord* &obj, + bool &in_new_region) { + assert(Thread::current()->is_Java_thread(), "Must be mutator"); + assert(region_count > 0u && region_count <= ShenandoahDirectlyAllocatableRegionCount, "Must be"); + shenandoah_assert_not_heaplocked(); + + ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock(), true); + uint request_count = 0u; + uint fulfilled_by_others = 0u; + for (uint i = 0u; i < region_count; i++) { + ShenandoahHeapRegion* r = Atomic::load_acquire(shared_region_address[i]); + if (r != original_shared_regions[i]) { + fulfilled_by_others++; + shared_region_address[i] = nullptr; + original_shared_regions[i] = nullptr; + } else { + request_count++; + if (r != nullptr) { + if (r->free() < PLAB::min_size()) { + Atomic::release_store_fence(shared_region_address[i], static_cast(nullptr)); + // TODO confirm when&why the region is moved out of Mutator partition? + if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, r->index())) { + _partitions.retire_from_partition(ShenandoahFreeSetPartitionId::Mutator, r->index(), r->used()); + } + r->release_from_direct_allocation(); + } else { + // Although r is same as original one when tried CAS allocation, but it has more free space. + fulfilled_by_others++; + shared_region_address[i] = nullptr; + original_shared_regions[i] = nullptr; + request_count--; + } + } + } + } + + DirectlyAllocatableRegionAllocationClosure cl(shared_region_address, region_count, request_count, req, obj, in_new_region); + if (request_count > 0u) { + iterate_regions_for_alloc(&cl, true); + } + return cl._fulfilled_count > 0u || fulfilled_by_others > 0u; +} + +void ShenandoahFreeSet::release_directly_allocatable_region(ShenandoahHeapRegion* region) { + shenandoah_assert_heaplocked(); + for (uint i = 0u; i < ShenandoahDirectlyAllocatableRegionCount; i++) { + if (_directly_allocatable_regions[i].address == region) { + Atomic::release_store(&_directly_allocatable_regions[i].address, static_cast(nullptr)); + break; + } + } + OrderAccess::fence(); + region->release_from_direct_allocation(); +} + +template +uint ShenandoahFreeSet::iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty) { + assert((IS_MUTATOR && !IS_OLD) || !IS_MUTATOR, "Sanity check"); + ShenandoahFreeSetPartitionId partition = IS_MUTATOR ? ShenandoahFreeSetPartitionId::Mutator : + (IS_OLD ? ShenandoahFreeSetPartitionId::OldCollector : ShenandoahFreeSetPartitionId::Mutator); + if (_partitions.is_empty(partition)) { + return 0u; + } + /* + if (IS_MUTATOR) { + update_allocation_bias(); + } + */ + if (_partitions.alloc_from_left_bias(partition)) { + ShenandoahLeftRightIterator iterator(&_partitions, partition, use_empty); + return iterate_regions_for_alloc(iterator, cl); + } else { + ShenandoahRightLeftIterator iterator(&_partitions, partition, use_empty); + return iterate_regions_for_alloc(iterator, cl); + } +} + +template +uint ShenandoahFreeSet::iterate_regions_for_alloc(Iter& iterator, ShenandoahHeapRegionBreakableIterClosure* cl) { + uint regions_iterated = 0u; + for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) { + regions_iterated++; + ShenandoahHeapRegion* r = _heap->get_region(idx); + if (cl->heap_region_do(r)) { + break; + } + } + return regions_iterated; +} + void ShenandoahFreeSet::print_on(outputStream* out) const { out->print_cr("Mutator Free Set: %zu", _partitions.count(ShenandoahFreeSetPartitionId::Mutator)); ShenandoahLeftRightIterator mutator(const_cast(&_partitions), ShenandoahFreeSetPartitionId::Mutator); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 55f23480618b9..94af6241b4190 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -29,6 +29,7 @@ #include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" #include "gc/shenandoah/shenandoahSimpleBitMap.hpp" +#include "memory/padded.inline.hpp" // Each ShenandoahHeapRegion is associated with a ShenandoahFreeSetPartitionId. enum class ShenandoahFreeSetPartitionId : uint8_t { @@ -78,10 +79,9 @@ class ShenandoahRegionPartitions { // are denoted in bytes. Note that some regions that had been assigned to a particular partition at rebuild time // may have been retired following the rebuild. The tallies for these regions are still reflected in _capacity[p] // and _used[p], even though the region may have been removed from the free set. - size_t _capacity[UIntNumPartitions]; - size_t _used[UIntNumPartitions]; - size_t _available[UIntNumPartitions]; - size_t _region_counts[UIntNumPartitions]; + size_t volatile _capacity[UIntNumPartitions]; + size_t volatile _used[UIntNumPartitions]; + size_t volatile _region_counts[UIntNumPartitions]; // For each partition p, _left_to_right_bias is true iff allocations are normally made from lower indexed regions // before higher indexed regions. @@ -213,56 +213,40 @@ class ShenandoahRegionPartitions { inline size_t capacity_of(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free set must be valid"); - return _capacity[int(which_partition)]; + return Atomic::load(_capacity + int(which_partition)); } inline size_t used_by(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free set must be valid"); - return _used[int(which_partition)]; + return Atomic::load(_used + int(which_partition)); } inline size_t available_in(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free set must be valid"); - shenandoah_assert_heaplocked(); - assert(_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)], - "Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s", - _available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)], - partition_membership_name(ssize_t(which_partition))); - return _available[int(which_partition)]; + return capacity_of(which_partition) - used_by(which_partition); } // Return available_in assuming caller does not hold the heap lock. In production builds, available is // returned without acquiring the lock. In debug builds, the global heap lock is acquired in order to // enforce a consistency assert. inline size_t available_in_not_locked(ShenandoahFreeSetPartitionId which_partition) const { - assert (which_partition < NumPartitions, "selected free set must be valid"); - shenandoah_assert_not_heaplocked(); -#ifdef ASSERT - ShenandoahHeapLocker locker(ShenandoahHeap::heap()->lock()); - assert((_available[int(which_partition)] == FreeSetUnderConstruction) || - (_available[int(which_partition)] == _capacity[int(which_partition)] - _used[int(which_partition)]), - "Expect available (%zu) equals capacity (%zu) - used (%zu) for partition %s", - _available[int(which_partition)], _capacity[int(which_partition)], _used[int(which_partition)], - partition_membership_name(ssize_t(which_partition))); -#endif - return _available[int(which_partition)]; + return available_in(which_partition); } inline void set_capacity_of(ShenandoahFreeSetPartitionId which_partition, size_t value) { - shenandoah_assert_heaplocked(); assert (which_partition < NumPartitions, "selected free set must be valid"); - _capacity[int(which_partition)] = value; - _available[int(which_partition)] = value - _used[int(which_partition)]; + Atomic::store(_capacity + int(which_partition), value); } inline void set_used_by(ShenandoahFreeSetPartitionId which_partition, size_t value) { - shenandoah_assert_heaplocked(); assert (which_partition < NumPartitions, "selected free set must be valid"); - _used[int(which_partition)] = value; - _available[int(which_partition)] = _capacity[int(which_partition)] - value; + Atomic::store(_used + int(which_partition), value); } - inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { return _region_counts[int(which_partition)]; } + inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free set must be valid"); + return Atomic::load(_region_counts + int(which_partition)); + } // Assure leftmost, rightmost, leftmost_empty, and rightmost_empty bounds are valid for all free sets. // Valid bounds honor all of the following (where max is the number of heap regions): @@ -287,6 +271,28 @@ class ShenandoahRegionPartitions { void assert_bounds() NOT_DEBUG_RETURN; }; +#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_AFFINITY ((Thread*)-1) +#define DIRECTLY_ALLOCATABLE_REGION_UNKNOWN_SELF ((Thread*)-2) +// When mutator threads allocate from directly allocatable regions, ideally the allocation should be evenly +// distributed to all the directly allocatable regions, random is the best portable option for this, but with random +// distribution it may worsen memory locality, e.g. two consecutive allocation from same thread are randomly +// distributed to different allocatable regions. ShenandoahDirectlyAllocatableRegionAffinity solves/mitigates +// the memory locality issue. +// The idea and code is borrowed from ZGC's CPU affinity, but with random number instead of CPU id. +class ShenandoahDirectlyAllocatableRegionAffinity : public AllStatic { + struct Affinity { + Thread* _thread; + }; + + static PaddedEnd* _affinity; + static THREAD_LOCAL Thread* _self; + static THREAD_LOCAL uint _index; + static uint index_slow(); +public: + static void initialize(); + static uint index(); +}; + // Publicly, ShenandoahFreeSet represents memory that is available to mutator threads. The public capacity(), used(), // and available() methods represent this public notion of memory that is under control of the mutator. Separately, // ShenandoahFreeSet also represents memory available to garbage collection activities for compaction purposes. @@ -313,8 +319,12 @@ class ShenandoahRegionPartitions { class ShenandoahFreeSet : public CHeapObj { private: + struct ShenandoahHeapRegionAddress { + ShenandoahHeapRegion* volatile address; + }; ShenandoahHeap* const _heap; ShenandoahRegionPartitions _partitions; + PaddedEnd* _directly_allocatable_regions; HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r); @@ -410,6 +420,21 @@ class ShenandoahFreeSet : public CHeapObj { // log status, assuming lock has already been acquired by the caller. void log_status(); + template + HeapWord* par_allocate_in_for_mutator(ShenandoahHeapRegion* region, ShenandoahAllocRequest &req, bool &in_new_region); + + bool try_allocate_directly_allocatable_regions(ShenandoahHeapRegion* volatile * shared_region_address[], + ShenandoahHeapRegion* original_shared_regions[], + uint region_count, + ShenandoahAllocRequest &req, + HeapWord* &obj, + bool &in_new_region); + template + uint iterate_regions_for_alloc(ShenandoahHeapRegionBreakableIterClosure* cl, bool use_empty); + + template + uint iterate_regions_for_alloc(Iter& iterator, ShenandoahHeapRegionBreakableIterClosure* cl); + public: static const size_t FreeSetUnderConstruction = ShenandoahRegionPartitions::FreeSetUnderConstruction; @@ -484,6 +509,14 @@ class ShenandoahFreeSet : public CHeapObj { HeapWord* allocate(ShenandoahAllocRequest& req, bool& in_new_region); + HeapWord* allocate_humongous(ShenandoahAllocRequest &req); + + void release_all_directly_allocatable_regions(); + + void release_directly_allocatable_region(ShenandoahHeapRegion *region); + + template + HeapWord* par_allocate_single_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region); /* * Internal fragmentation metric: describes how fragmented the heap regions are. * diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp index 27ff45e67de19..8d5eaac4c3d66 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp @@ -219,6 +219,8 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) { heap->tlabs_retire(ResizeTLAB); } + heap->free_set()->release_all_directly_allocatable_regions(); + OrderAccess::fence(); phase1_mark_heap(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp index 9a511de939ccb..01de1cfb3fea8 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp @@ -565,7 +565,7 @@ size_t ShenandoahGeneration::select_aged_regions(size_t old_available) { // old generation. HeapWord* tams = ctx->top_at_mark_start(r); HeapWord* original_top = r->top(); - if (!heap->is_concurrent_old_mark_in_progress() && tams == original_top) { + if (!heap->is_concurrent_old_mark_in_progress() && tams == original_top && !r->reserved_for_direct_allocation()) { // No allocations from this region have been made during concurrent mark. It meets all the criteria // for in-place-promotion. Though we only need the value of top when we fill the end of the region, // we use this field to indicate that this region should be promoted in place during the evacuation diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 50881a5077833..0fd230153d70b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -976,7 +976,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { } if (!ShenandoahAllocFailureALot || !should_inject_alloc_failure()) { - result = allocate_memory_under_lock(req, in_new_region); + result = allocate_memory_for_mutator(req, in_new_region); } // Check that gc overhead is not exceeded. @@ -1008,7 +1008,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { const size_t original_count = shenandoah_policy()->full_gc_count(); while (result == nullptr && should_retry_allocation(original_count)) { control_thread()->handle_alloc_failure(req, true); - result = allocate_memory_under_lock(req, in_new_region); + result = allocate_memory_for_mutator(req, in_new_region); } if (result != nullptr) { // If our allocation request has been satisfied after it initially failed, we count this as good gc progress @@ -1062,6 +1062,22 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { return result; } +HeapWord* ShenandoahHeap::allocate_memory_for_mutator(ShenandoahAllocRequest& req, bool& in_new_region) { + assert(req.is_mutator_alloc(), "Sanity"); + assert(!req.is_old(), "Sanity"); + shenandoah_assert_not_heaplocked(); + ShenandoahFreeSet* free_set = ShenandoahHeap::free_set(); + if (ShenandoahHeapRegion::requires_humongous(req.size())) { + in_new_region = true; + return free_set->allocate_humongous(req); + } + if (req.is_lab_alloc()) { + return free_set->par_allocate_single_for_mutator(req, in_new_region); + } else { + return free_set->par_allocate_single_for_mutator(req, in_new_region); + } +} + inline bool ShenandoahHeap::should_retry_allocation(size_t original_full_gc_count) const { return shenandoah_policy()->full_gc_count() == original_full_gc_count && !shenandoah_policy()->is_at_shutdown(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp index 4124bf8be7f5a..d50c76cab12e5 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp @@ -118,6 +118,12 @@ class ShenandoahHeapRegionClosure : public StackObj { virtual bool is_thread_safe() { return false; } }; +class ShenandoahHeapRegionBreakableIterClosure : public StackObj { +public: + // Return true to break the iteration loop. + virtual bool heap_region_do(ShenandoahHeapRegion* r) { return false; }; +}; + typedef ShenandoahLock ShenandoahHeapLock; typedef ShenandoahLocker ShenandoahHeapLocker; typedef Stack ShenandoahScanObjectStack; @@ -691,6 +697,7 @@ class ShenandoahHeap : public CollectedHeap { private: HeapWord* allocate_memory_under_lock(ShenandoahAllocRequest& request, bool& in_new_region); + HeapWord* allocate_memory_for_mutator(ShenandoahAllocRequest& request, bool& in_new_region); HeapWord* allocate_from_gclab_slow(Thread* thread, size_t size); HeapWord* allocate_new_gclab(size_t min_size, size_t word_size, size_t* actual_size); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp index 05eb0c299a5ea..6ebfbb6a3d60f 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp @@ -89,6 +89,7 @@ ShenandoahHeapRegion::ShenandoahHeapRegion(HeapWord* start, size_t index, bool c SpaceMangler::mangle_region(MemRegion(_bottom, _end)); } _recycling.unset(); + _direct_alloc_reserved.unset(); } void ShenandoahHeapRegion::report_illegal_transition(const char *method) { @@ -370,25 +371,25 @@ void ShenandoahHeapRegion::make_committed_bypass() { } void ShenandoahHeapRegion::reset_alloc_metadata() { - _tlab_allocs = 0; - _gclab_allocs = 0; - _plab_allocs = 0; + Atomic::store(&_tlab_allocs, size_t(0)); + Atomic::store(&_gclab_allocs, size_t(0)); + Atomic::store(&_plab_allocs, size_t(0)); } size_t ShenandoahHeapRegion::get_shared_allocs() const { - return used() - (_tlab_allocs + _gclab_allocs + _plab_allocs) * HeapWordSize; + return used() - (Atomic::load(&_tlab_allocs) + Atomic::load(&_gclab_allocs) + Atomic::load(&_plab_allocs)) * HeapWordSize; } size_t ShenandoahHeapRegion::get_tlab_allocs() const { - return _tlab_allocs * HeapWordSize; + return Atomic::load(&_tlab_allocs) * HeapWordSize; } size_t ShenandoahHeapRegion::get_gclab_allocs() const { - return _gclab_allocs * HeapWordSize; + return Atomic::load(&_gclab_allocs) * HeapWordSize; } size_t ShenandoahHeapRegion::get_plab_allocs() const { - return _plab_allocs * HeapWordSize; + return Atomic::load(&_plab_allocs) * HeapWordSize; } void ShenandoahHeapRegion::set_live_data(size_t s) { @@ -854,6 +855,8 @@ size_t ShenandoahHeapRegion::pin_count() const { } void ShenandoahHeapRegion::set_affiliation(ShenandoahAffiliation new_affiliation) { + assert(new_affiliation != OLD_GENERATION || !reserved_for_direct_allocation(), "Reserved region can't move to old"); + ShenandoahHeap* heap = ShenandoahHeap::heap(); ShenandoahAffiliation region_affiliation = heap->region_affiliation(this); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp index 4c99364bc6ed4..4cde038bc1764 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp @@ -250,11 +250,11 @@ class ShenandoahHeapRegion { HeapWord* _coalesce_and_fill_boundary; // for old regions not selected as collection set candidates. // Frequently updated fields - HeapWord* _top; + HeapWord* volatile _top; - size_t _tlab_allocs; - size_t _gclab_allocs; - size_t _plab_allocs; + size_t volatile _tlab_allocs; + size_t volatile _gclab_allocs; + size_t volatile _plab_allocs; volatile size_t _live_data; volatile size_t _critical_pins; @@ -268,6 +268,8 @@ class ShenandoahHeapRegion { bool _needs_bitmap_reset; + ShenandoahSharedFlag _direct_alloc_reserved; // Flag to indicate that whether the region is reserved for lock-free direct allocation + public: ShenandoahHeapRegion(HeapWord* start, size_t index, bool committed); @@ -366,6 +368,15 @@ class ShenandoahHeapRegion { // Allocation (return nullptr if full) inline HeapWord* allocate(size_t word_size, const ShenandoahAllocRequest& req); + inline HeapWord* allocate_lab(const ShenandoahAllocRequest &req, size_t &actual_size); + + // Atomic allocation using CAS, return nullptr if full or no enough space for the req + inline HeapWord* allocate_atomic(size_t word_size, const ShenandoahAllocRequest &req); + + inline HeapWord* allocate_lab_atomic(const ShenandoahAllocRequest &req, size_t &actual_size); + + inline bool try_allocate(HeapWord* const obj, size_t const size); + inline void clear_live_data(); void set_live_data(size_t s); @@ -425,8 +436,12 @@ class ShenandoahHeapRegion { // Find humongous start region that this region belongs to ShenandoahHeapRegion* humongous_start_region() const; - HeapWord* top() const { return _top; } - void set_top(HeapWord* v) { _top = v; } + HeapWord* top() const { + return Atomic::load(&_top); + } + void set_top(HeapWord* v) { + Atomic::store(&_top, v); + } HeapWord* new_top() const { return _new_top; } void set_new_top(HeapWord* v) { _new_top = v; } @@ -491,6 +506,20 @@ class ShenandoahHeapRegion { _needs_bitmap_reset = false; } + inline void reserve_for_direct_allocation() { + assert(_direct_alloc_reserved.is_unset(), "Must be"); + _direct_alloc_reserved.set(); + } + + inline void release_from_direct_allocation() { + assert(_direct_alloc_reserved.is_set(), "Must be"); + _direct_alloc_reserved.unset(); + } + + inline bool reserved_for_direct_allocation() const { + return _direct_alloc_reserved.is_set(); + } + private: void decrement_humongous_waste() const; void do_commit(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index 0df482c1e2dab..bbfd325edb36d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -109,6 +109,85 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque } } +HeapWord* ShenandoahHeapRegion::allocate_lab(const ShenandoahAllocRequest& req, size_t &actual_size) { + shenandoah_assert_heaplocked_or_safepoint(); + assert(req.is_lab_alloc(), "Only lab alloc"); + assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); + + size_t adjusted_size = req.size(); + HeapWord* obj = nullptr; + HeapWord* old_top = top(); + size_t free_words = align_down(byte_size(old_top, end()) >> LogHeapWordSize, MinObjAlignment); + if (adjusted_size > free_words) { + adjusted_size = free_words; + } + if (adjusted_size >= req.min_size()) { + obj = allocate(adjusted_size, req); + actual_size = adjusted_size; + assert(obj == old_top, "Must be"); + } + return obj; +} + +HeapWord* ShenandoahHeapRegion::allocate_atomic(size_t size, const ShenandoahAllocRequest& req) { + assert(is_object_aligned(size), "alloc size breaks alignment: %zu", size); + assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); + assert(this->is_regular() || this->is_regular_pinned(), "must be a regular region"); + + for (;;) { + if (!reserved_for_direct_allocation()) { + return nullptr; + } + HeapWord* obj = top(); + if (pointer_delta(end(), obj) >= size) { + if (try_allocate(obj, size)) { + adjust_alloc_metadata(req.type(), size); + return obj; + } + } else { + return nullptr; + } + } +} + +HeapWord* ShenandoahHeapRegion::allocate_lab_atomic(const ShenandoahAllocRequest& req, size_t &actual_size) { + assert(req.is_lab_alloc(), "Only lab alloc"); + assert(this->affiliation() == req.affiliation(), "Region affiliation should already be established"); + assert(this->is_regular() || this->is_regular_pinned(), "must be a regular region"); + size_t adjusted_size = req.size(); + for (;;) { + if (!reserved_for_direct_allocation()) { + return nullptr; + } + HeapWord* obj = top(); + size_t free_words = align_down(byte_size(obj, end()) >> LogHeapWordSize, MinObjAlignment); + if (adjusted_size > free_words) { + adjusted_size = free_words; + } + if (adjusted_size >= req.min_size()) { + if (try_allocate(obj, adjusted_size)) { + actual_size = adjusted_size; + adjust_alloc_metadata(req.type(), adjusted_size); + return obj; + } + } else { + log_trace(gc, free)("Failed to shrink TLAB or GCLAB request (%zu) in region %zu to %zu" + " because min_size() is %zu", req.size(), index(), adjusted_size, req.min_size()); + return nullptr; + } + } +} + +bool ShenandoahHeapRegion::try_allocate(HeapWord* const obj, size_t const size) { + HeapWord* new_top = obj + size; + if (Atomic::cmpxchg(&_top, obj, new_top) == obj) { + assert(is_object_aligned(new_top), "new top breaks alignment: " PTR_FORMAT, p2i(new_top)); + assert(is_object_aligned(obj), "obj is not aligned: " PTR_FORMAT, p2i(obj)); + return true; + } + return false; +} + inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t size) { switch (type) { case ShenandoahAllocRequest::_alloc_shared: @@ -116,13 +195,13 @@ inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest:: // Counted implicitly by tlab/gclab allocs break; case ShenandoahAllocRequest::_alloc_tlab: - _tlab_allocs += size; + Atomic::add(&_tlab_allocs, size); break; case ShenandoahAllocRequest::_alloc_gclab: - _gclab_allocs += size; + Atomic::add(&_gclab_allocs, size); break; case ShenandoahAllocRequest::_alloc_plab: - _plab_allocs += size; + Atomic::add(&_plab_allocs, size); break; default: ShouldNotReachHere(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp index ad0beeafed79e..df638796bd8ef 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp @@ -564,6 +564,10 @@ "to prevent starvation of the old collector. Setting this to " \ "0 will allow back to back young collections to run during old " \ "collections.") \ + product(uintx, ShenandoahDirectlyAllocatableRegionCount, 13, EXPERIMENTAL,\ + "Number of regions Shenandoah will pre-allocate for " \ + "direct allocation with CAS, the values should less than " \ + "number of CPU cores. Ideally it should be a prime number. ") \ // end of GC_SHENANDOAH_FLAGS #endif // SHARE_GC_SHENANDOAH_SHENANDOAH_GLOBALS_HPP diff --git a/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp b/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp index a245f91fa71e9..069daa4990ee6 100644 --- a/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp +++ b/src/hotspot/share/gc/shenandoah/vmStructs_shenandoah.hpp @@ -42,7 +42,7 @@ volatile_nonstatic_field(ShenandoahHeapRegion, _state, ShenandoahHeapRegion::RegionState) \ nonstatic_field(ShenandoahHeapRegion, _index, size_t const) \ nonstatic_field(ShenandoahHeapRegion, _bottom, HeapWord* const) \ - nonstatic_field(ShenandoahHeapRegion, _top, HeapWord*) \ + volatile_nonstatic_field(ShenandoahHeapRegion, _top, HeapWord*) \ nonstatic_field(ShenandoahHeapRegion, _end, HeapWord* const) \ #define VM_INT_CONSTANTS_SHENANDOAH(declare_constant, declare_constant_with_value) \