# HG changeset patch # User Haoyu Li # Date 1570797757 -28800 # Fri Oct 11 20:42:37 2019 +0800 # Node ID 136d88bd78713ae171d2fc4dd5b9680a6e2d0f7d # Parent 24df796eef3d3b64b2ec69605730277b7c75ad24 implement the shadow region optimization diff -r 24df796eef3d -r 136d88bd7871 src/hotspot/share/gc/parallel/psCompactionManager.cpp --- a/src/hotspot/share/gc/parallel/psCompactionManager.cpp Sun Sep 15 21:00:15 2019 -0400 +++ b/src/hotspot/share/gc/parallel/psCompactionManager.cpp Fri Oct 11 20:42:37 2019 +0800 @@ -50,6 +50,8 @@ ObjectStartArray* ParCompactionManager::_start_array = NULL; ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; +GrowableArray* ParCompactionManager::_free_shadow = new (ResourceObj::C_HEAP, mtInternal) GrowableArray(10, true); +Monitor* ParCompactionManager::_monitor = NULL; ParCompactionManager::ParCompactionManager() : _action(CopyAndUpdate) { @@ -64,6 +66,11 @@ _region_stack.initialize(); reset_bitmap_query_cache(); + + if (_monitor == NULL) { + _monitor = new Monitor(Mutex::barrier, "CompactionManager monitor", + Mutex::_allow_vm_block_flag, Monitor::_safepoint_check_never); + } } void ParCompactionManager::initialize(ParMarkBitMap* mbm) { @@ -164,3 +171,32 @@ } } while (!region_stack()->is_empty()); } + +size_t ParCompactionManager::acquire_shadow_region(PSParallelCompact::RegionData* region_ptr) { + while (true) { + MutexLocker ml(_monitor, Mutex::_no_safepoint_check_flag); + if (_free_shadow->is_empty()) { + // The corresponding heap region may be available now, + // so we don't need to acquire a shadow region anymore, + // and we return 0 to indicate this case + if (region_ptr->claimed()) { + return 0; + } + } else { + return _free_shadow->pop(); + } + } +} + +void ParCompactionManager::release_shadow_region(size_t shadow_region) { + MutexLocker ml(_monitor, Mutex::_no_safepoint_check_flag); + _free_shadow->append(shadow_region); +} + +void ParCompactionManager::enqueue_shadow_region(size_t shadow_region) { + _free_shadow->append(shadow_region); +} + +void ParCompactionManager::dequeue_shadow_region() { + _free_shadow->clear(); +} \ No newline at end of file diff -r 24df796eef3d -r 136d88bd7871 src/hotspot/share/gc/parallel/psCompactionManager.hpp --- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp Sun Sep 15 21:00:15 2019 -0400 +++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp Fri Oct 11 20:42:37 2019 +0800 @@ -28,6 +28,7 @@ #include "gc/shared/taskqueue.hpp" #include "memory/allocation.hpp" #include "utilities/stack.hpp" +#include "psParallelCompact.hpp" class MutableSpace; class PSOldGen; @@ -77,6 +78,7 @@ private: OverflowTaskQueue _marking_stack; ObjArrayTaskQueue _objarray_stack; + size_t _shadow_record; // Is there a way to reuse the _marking_stack for the // saving empty regions? For now just create a different @@ -85,6 +87,14 @@ static ParMarkBitMap* _mark_bitmap; + // The shadow region queue, we use it in a LIFO fashion, so + // that we can reuse shadow regions for better data locality + // and utilization + static GrowableArray* _free_shadow; + + // This Monitor provides mutual exclusive access of _free_shadow + static Monitor* _monitor; + Action _action; HeapWord* _last_query_beg; @@ -109,6 +119,12 @@ // marking stack and overflow stack directly. public: + static size_t acquire_shadow_region(PSParallelCompact::RegionData* region_ptr); + static void release_shadow_region(size_t shadow_region); + static void enqueue_shadow_region(size_t shadow_region); + static void dequeue_shadow_region(); + inline size_t& shadow_record() { return _shadow_record; } + void reset_bitmap_query_cache() { _last_query_beg = NULL; _last_query_obj = NULL; diff -r 24df796eef3d -r 136d88bd7871 src/hotspot/share/gc/parallel/psParallelCompact.cpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp Sun Sep 15 21:00:15 2019 -0400 +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp Fri Oct 11 20:42:37 2019 +0800 @@ -1023,6 +1023,7 @@ void PSParallelCompact::post_compact() { GCTraceTime(Info, gc, phases) tm("Post Compact", &_gc_timer); + ParCompactionManager::dequeue_shadow_region(); for (unsigned int id = old_space_id; id < last_space_id; ++id) { // Clear the marking bitmap, summary data and split info. @@ -2417,8 +2418,10 @@ for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) { if (sd.region(cur)->claim_unsafe()) { ParCompactionManager* cm = ParCompactionManager::manager_array(worker_id); - cm->region_stack()->push(cur); - region_logger.handle(cur); + if (sd.region(cur)->push()) { + cm->region_stack()->push(cur); + region_logger.handle(cur); + } // Assign regions to tasks in round-robin fashion. if (++worker_id == parallel_gc_threads) { worker_id = 0; @@ -2598,10 +2601,15 @@ size_t region_index = 0; + PSParallelCompact::initialize_steal_record(worker_id); while (true) { if (ParCompactionManager::steal(worker_id, region_index)) { PSParallelCompact::fill_and_update_region(cm, region_index); cm->drain_region_stacks(); + } else if (PSParallelCompact::steal_shadow_region(cm, region_index)) { + // Keep working with the help of shadow regions + PSParallelCompact::fill_and_update_shadow_region(cm, region_index); + cm->drain_region_stacks(); } else { if (terminator->offer_termination()) { break; @@ -2656,6 +2664,7 @@ // // max push count is thus: last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1) TaskQueue task_queue(last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1)); + enqueue_shadow_region(); prepare_region_draining_tasks(active_gc_threads); enqueue_dense_prefix_tasks(task_queue, active_gc_threads); @@ -2962,7 +2971,15 @@ assert(cur->data_size() > 0, "region must have live data"); cur->decrement_destination_count(); if (cur < enqueue_end && cur->available() && cur->claim()) { - cm->push_region(sd.region(cur)); + if (cur->push()) { + cm->push_region(sd.region(cur)); + } else if (cur->copy()) { + // Try to copy the content of the shadow region back to its corresponding + // heap region if the shadow region is filled + copy_back(sd.region_to_addr(cur), sd.region_to_addr(cur->shadow_region())); + cm->release_shadow_region(cur->shadow_region()); + cur->set_completed(); + } } } } @@ -3040,7 +3057,8 @@ return 0; } -void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx) +template +void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx, size_t shadow, size_t offset) { typedef ParMarkBitMap::IterationStatus IterationStatus; const size_t RegionSize = ParallelCompactData::RegionSize; @@ -3061,7 +3079,7 @@ SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx)); HeapWord* src_space_top = _space_info[src_space_id].space()->top(); - MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words); + T closure(bitmap, cm, start_array, dest_addr, words, shadow, offset); closure.set_source(first_src_addr(dest_addr, src_space_id, src_region_idx)); // Adjust src_region_idx to prepare for decrementing destination counts (the @@ -3080,7 +3098,7 @@ decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); region_ptr->set_deferred_obj_addr(NULL); - region_ptr->set_completed(); + closure.complete_region(cm, dest_addr, region_ptr); return; } @@ -3129,7 +3147,7 @@ decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); - region_ptr->set_completed(); + closure.complete_region(cm, dest_addr, region_ptr); return; } @@ -3137,7 +3155,7 @@ decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); region_ptr->set_deferred_obj_addr(NULL); - region_ptr->set_completed(); + closure.complete_region(cm, dest_addr, region_ptr); return; } @@ -3150,6 +3168,92 @@ } while (true); } +template void PSParallelCompact::fill_region(ParCompactionManager *cm, size_t region_idx, + size_t shadow = 0, size_t offset = 0); + +template void PSParallelCompact::fill_region(ParCompactionManager *cm, size_t region_idx, + size_t shadow = 0, size_t offset = 0); + +void PSParallelCompact::fill_shadow_region(ParCompactionManager* cm, size_t region_idx) +{ + // Acquire a shadow region at first + ParallelCompactData& sd = summary_data(); + RegionData* const region_ptr = sd.region(region_idx); + size_t shadow_region = cm->acquire_shadow_region(region_ptr); + // The zero return value indicates the corresponding heap region is available, + // so use MoveAndUpdateClosure to fill the normal region. Otherwise, use + // ShadowClosure to fill the acquired shadow region. + if (shadow_region == 0) { + return fill_region(cm, region_idx); + } else { + HeapWord* dest_addr = sd.region_to_addr(region_idx); + HeapWord* shadow_addr = sd.region_to_addr(shadow_region); + size_t shadow_offset = pointer_delta(shadow_addr, dest_addr); + return fill_region(cm, region_idx, shadow_region, shadow_offset); + } +} + +void PSParallelCompact::copy_back(HeapWord *region_addr, HeapWord *shadow_addr) { + Copy::aligned_conjoint_words(shadow_addr, region_addr, _summary_data.RegionSize); +} + +bool PSParallelCompact::steal_shadow_region(ParCompactionManager* cm, size_t ®ion_idx) { + size_t& record = cm->shadow_record(); + ParallelCompactData& sd = _summary_data; + size_t old_new_top = sd.addr_to_region_idx(_space_info[old_space_id].new_top()); + uint active_gc_threads = + ParallelScavengeHeap::heap()->workers().active_workers(); + + while (record < old_new_top) { + if (sd.region(record)->push()) { + region_idx = record; + return true; + } + record += active_gc_threads; + } + + return false; +} + +void PSParallelCompact::enqueue_shadow_region() { + const ParallelCompactData& sd = PSParallelCompact::summary_data(); + + for (unsigned int id = old_space_id; id < last_space_id; ++id) { + SpaceInfo* const space_info = _space_info + id; + MutableSpace* const space = space_info->space(); + + const size_t beg_region = + sd.addr_to_region_idx(sd.region_align_up(MAX2(space_info->new_top(), space->top()))); + const size_t end_region = + sd.addr_to_region_idx(sd.region_align_down(space->end())); + + for (size_t cur = beg_region + 1; cur < end_region; ++cur) { + ParCompactionManager::enqueue_shadow_region(cur); + } + } +} + +void PSParallelCompact::initialize_steal_record(uint which) { + ParCompactionManager* cm = + ParCompactionManager::gc_thread_compaction_manager(which); + uint active_gc_threads = + ParallelScavengeHeap::heap()->workers().active_workers(); + + size_t& record = cm->shadow_record(); + record = _summary_data.addr_to_region_idx( + _space_info[old_space_id].dense_prefix()); + + size_t expect = which % active_gc_threads; + size_t current = record % active_gc_threads; + + if (expect > current) { + record += expect - current; + } + if (expect < current) { + record += expect - current + active_gc_threads; + } +} + void PSParallelCompact::fill_blocks(size_t region_idx) { // Fill in the block table elements for the specified region. Each block @@ -3290,6 +3394,11 @@ update_state(words); } +void MoveAndUpdateClosure::complete_region(ParCompactionManager *cm, HeapWord *dest_addr, + PSParallelCompact::RegionData *region_ptr) { + region_ptr->set_completed(); +} + ParMarkBitMapClosure::IterationStatus MoveAndUpdateClosure::do_addr(HeapWord* addr, size_t words) { assert(destination() != NULL, "sanity"); @@ -3322,6 +3431,80 @@ return is_full() ? ParMarkBitMap::full : ParMarkBitMap::incomplete; } +ParMarkBitMapClosure::IterationStatus +ShadowClosure::do_addr(HeapWord* addr, size_t words) { + HeapWord* shadow_destination = destination() + _offset; + + assert(destination() != NULL, "sanity"); + assert(bitmap()->obj_size(addr) == words, "bad size"); + + _source = addr; + + if (words > words_remaining()) { + return ParMarkBitMap::would_overflow; + } + + // The start_array must be updated even if the object is not moving. + if (_start_array != NULL) { + _start_array->allocate_block(destination()); + } + + DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) + Copy::aligned_conjoint_words(source(), shadow_destination, words); + + oop moved_oop = (oop) shadow_destination; + compaction_manager()->update_contents(moved_oop); + assert(oopDesc::is_oop_or_null(moved_oop), "Object should be whole at this point"); + + update_state(words); + assert(destination() == (HeapWord*)moved_oop - _offset + moved_oop->size(), "sanity"); + return is_full() ? ParMarkBitMap::full : ParMarkBitMap::incomplete; +} + +ParMarkBitMapClosure::IterationStatus ShadowClosure::copy_until_full() { + HeapWord* shadow_destination = destination() + _offset; + + DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) + Copy::aligned_conjoint_words(source(), shadow_destination, words_remaining()); + update_state(words_remaining()); + + assert(is_full(), "sanity"); + return ParMarkBitMap::full; +} + +void ShadowClosure::copy_partial_obj() { + HeapWord* shadow_destination = destination() + _offset; + + size_t words = words_remaining(); + + HeapWord* const range_end = MIN2(source() + words, bitmap()->region_end()); + HeapWord* const end_addr = bitmap()->find_obj_end(source(), range_end); + if (end_addr < range_end) { + words = bitmap()->obj_size(source(), end_addr); + } + + DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) + Copy::aligned_conjoint_words(source(), shadow_destination, words); + + update_state(words); +} + +void ShadowClosure::complete_region(ParCompactionManager *cm, HeapWord *dest_addr, + PSParallelCompact::RegionData *region_ptr) { + // Record the shadow region index + region_ptr->set_shadow_region(_shadow); + // Mark the shadow region filled + region_ptr->fill(); + // Try to copy the content of the shadow region back to its corresponding + // heap region if available + if (((region_ptr->available() && region_ptr->claim()) || region_ptr->claimed()) && region_ptr->copy()) { + region_ptr->set_completed(); + PSParallelCompact::copy_back(dest_addr, dest_addr + _offset); + cm->release_shadow_region(_shadow); + } + +} + UpdateOnlyClosure::UpdateOnlyClosure(ParMarkBitMap* mbm, ParCompactionManager* cm, PSParallelCompact::SpaceId space_id) : diff -r 24df796eef3d -r 136d88bd7871 src/hotspot/share/gc/parallel/psParallelCompact.hpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp Sun Sep 15 21:00:15 2019 -0400 +++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp Fri Oct 11 20:42:37 2019 +0800 @@ -239,6 +239,9 @@ // The first region containing data destined for this region. size_t source_region() const { return _source_region; } + // Reuse _source_region to store the corresponding shadow region index + size_t shadow_region() const { return _source_region; } + // The object (if any) starting in this region and ending in a different // region that could not be updated during the main (parallel) compaction // phase. This is different from _partial_obj_addr, which is an object that @@ -307,6 +310,7 @@ // These are not atomic. void set_destination(HeapWord* addr) { _destination = addr; } void set_source_region(size_t region) { _source_region = region; } + void set_shadow_region(size_t region) { _source_region = region; } void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_size(size_t words) { @@ -326,6 +330,13 @@ inline void decrement_destination_count(); inline bool claim(); + // Preempt the region to avoid double processes + inline bool push(); + // Mark the region as filled and ready to be copied back + inline bool fill(); + // Preempt the region to copy the shadow region content back + inline bool copy(); + private: // The type used to represent object sizes within a region. typedef uint region_sz_t; @@ -346,6 +357,7 @@ region_sz_t _partial_obj_size; region_sz_t volatile _dc_and_los; bool volatile _blocks_filled; + int volatile _shadow; #ifdef ASSERT size_t _blocks_filled_count; // Number of block table fills. @@ -596,6 +608,18 @@ return old == los; } +inline bool ParallelCompactData::RegionData::push() { + return Atomic::cmpxchg(1, &_shadow, 0) == 0; +} + +inline bool ParallelCompactData::RegionData::fill() { + return Atomic::cmpxchg(2, &_shadow, 1) == 1; +} + +inline bool ParallelCompactData::RegionData::copy() { + return Atomic::cmpxchg(3, &_shadow, 2) == 2; +} + inline ParallelCompactData::RegionData* ParallelCompactData::region(size_t region_idx) const { @@ -1182,12 +1206,26 @@ size_t beg_region, HeapWord* end_addr); - // Fill a region, copying objects from one or more source regions. - static void fill_region(ParCompactionManager* cm, size_t region_idx); + // T must be either MoveAndUpdateClosure or ShadowClosure, indicating + // filling a normal region or shadow region, respectively. + template + static void fill_region(ParCompactionManager* cm, size_t region_idx, size_t shadow = 0, size_t offset = 0); static void fill_and_update_region(ParCompactionManager* cm, size_t region) { - fill_region(cm, region); + fill_region(cm, region); } + static bool steal_shadow_region(ParCompactionManager* cm, size_t& region_idx); + static void fill_shadow_region(ParCompactionManager* cm, size_t region_idx); + static void fill_and_update_shadow_region(ParCompactionManager* cm, size_t region) { + fill_shadow_region(cm, region); + } + // Copy the content of a shadow region back to its corresponding heap region + static void copy_back(HeapWord* region_addr, HeapWord* shadow_addr); + // Initialize the steal record of a GC thread + static void initialize_steal_record(uint which); + // Reuse the empty heap regions as shadow regions, like to-space regions + static void enqueue_shadow_region(); + // Fill in the block table for the specified region. static void fill_blocks(size_t region_idx); @@ -1236,7 +1274,8 @@ public: inline MoveAndUpdateClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm, ObjectStartArray* start_array, - HeapWord* destination, size_t words); + HeapWord* destination, size_t words, + size_t shadow = 0, size_t offset = 0); // Accessors. HeapWord* destination() const { return _destination; } @@ -1249,12 +1288,15 @@ // Copy enough words to fill this closure, starting at source(). Interior // oops and the start array are not updated. Return full. - IterationStatus copy_until_full(); + virtual IterationStatus copy_until_full(); // Copy enough words to fill this closure or to the end of an object, // whichever is smaller, starting at source(). Interior oops and the start // array are not updated. - void copy_partial_obj(); + virtual void copy_partial_obj(); + + virtual void complete_region(ParCompactionManager* cm, HeapWord* dest_addr, + PSParallelCompact::RegionData* region_ptr); protected: // Update variables to indicate that word_count words were processed. @@ -1270,7 +1312,8 @@ ParCompactionManager* cm, ObjectStartArray* start_array, HeapWord* destination, - size_t words) : + size_t words, + size_t shadow, size_t offset) : ParMarkBitMapClosure(bitmap, cm, words), _start_array(start_array) { _destination = destination; @@ -1283,6 +1326,35 @@ _destination += words; } +class ShadowClosure: public MoveAndUpdateClosure { +public: + inline ShadowClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm, + ObjectStartArray* start_array, + HeapWord* destination, size_t words, + size_t shadow, size_t offset); + virtual IterationStatus do_addr(HeapWord* addr, size_t size); + virtual IterationStatus copy_until_full(); + virtual void copy_partial_obj(); + virtual void complete_region(ParCompactionManager* cm, HeapWord* dest_addr, + PSParallelCompact::RegionData* region_ptr); +private: + size_t _shadow; + size_t _offset; +}; + +inline +ShadowClosure::ShadowClosure(ParMarkBitMap *bitmap, + ParCompactionManager *cm, + ObjectStartArray *start_array, + HeapWord *destination, + size_t words, + size_t shadow, size_t offset) : + MoveAndUpdateClosure(bitmap, cm, start_array, destination, words) +{ + _shadow = shadow; + _offset = offset; +} + class UpdateOnlyClosure: public ParMarkBitMapClosure { private: const PSParallelCompact::SpaceId _space_id;