# HG changeset patch # User Haoyu Li # Date 1545585499 -28800 # Mon Dec 24 01:18:19 2018 +0800 # Node ID 61c2e1471bff5f62f7988ac00b24cf02258e20e4 # Parent 1ddf9a99e4ade8f06e234b43b60c17fe75583ac0 implement shadow region optimization to enhance PS full GC performance diff --git a/src/hotspot/share/gc/parallel/pcTasks.cpp b/src/hotspot/share/gc/parallel/pcTasks.cpp --- a/src/hotspot/share/gc/parallel/pcTasks.cpp +++ b/src/hotspot/share/gc/parallel/pcTasks.cpp @@ -219,10 +219,15 @@ size_t region_index = 0; int random_seed = 17; + PSParallelCompact::initialize_steal_record(which); while(true) { if (ParCompactionManager::steal(which, &random_seed, region_index)) { PSParallelCompact::fill_and_update_region(cm, region_index); cm->drain_region_stacks(); + } else if (PSParallelCompact::steal_shadow_region(cm, region_index)) { + // Keep working with the help of shadow regions + PSParallelCompact::fill_and_update_shadow_region(cm, region_index); + cm->drain_region_stacks(); } else { if (terminator()->offer_termination()) { break; diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.cpp b/src/hotspot/share/gc/parallel/psCompactionManager.cpp --- a/src/hotspot/share/gc/parallel/psCompactionManager.cpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.cpp @@ -52,6 +52,12 @@ ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; +GrowableArray* ParCompactionManager::_free_shadow = + new (ResourceObj::C_HEAP, mtInternal) GrowableArray(10, true); +Monitor* ParCompactionManager::_monitor = + new Monitor(Mutex::barrier, "CompactionManager monitor", + Mutex::_allow_vm_block_flag, Monitor::_safepoint_check_never); + ParCompactionManager::ParCompactionManager() : _action(CopyAndUpdate) { @@ -273,3 +279,32 @@ } } while (!region_stack()->is_empty()); } + +size_t ParCompactionManager::acquire_shadow_region(PSParallelCompact::RegionData* region_ptr) { + while (true) { + MutexLockerEx ml(_monitor, Mutex::_no_safepoint_check_flag); + if (_free_shadow->is_empty()) { + // The corresponding heap region may be available now, + // so we don't need to acquire a shadow region anymore, + // and we return 0 to indicate this case + if (region_ptr->claimed()) { + return 0; + } + } else { + return _free_shadow->pop(); + } + } +} + +void ParCompactionManager::release_shadow_region(size_t shadow_region) { + MutexLockerEx ml(_monitor, Mutex::_no_safepoint_check_flag); + _free_shadow->append(shadow_region); +} + +void ParCompactionManager::enqueue_shadow_region(size_t shadow_region) { + _free_shadow->append(shadow_region); +} + +void ParCompactionManager::dequeue_shadow_region() { + _free_shadow->clear(); +} \ No newline at end of file diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.hpp --- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp @@ -28,6 +28,7 @@ #include "gc/shared/taskqueue.hpp" #include "memory/allocation.hpp" #include "utilities/stack.hpp" +#include "psParallelCompact.hpp" class MutableSpace; class PSOldGen; @@ -73,15 +74,24 @@ static PSOldGen* _old_gen; private: - OverflowTaskQueue _marking_stack; + size_t _shadow_record; + OverflowTaskQueue _marking_stack; ObjArrayTaskQueue _objarray_stack; // Is there a way to reuse the _marking_stack for the // saving empty regions? For now just create a different // type of TaskQueue. - RegionTaskQueue _region_stack; + RegionTaskQueue _region_stack; - static ParMarkBitMap* _mark_bitmap; + static ParMarkBitMap* _mark_bitmap; + + // The shadow region queue, we use it in a LIFO fashion, so + // that we can reuse shadow regions for better data locality + // and utilization + static GrowableArray* _free_shadow; + + // This Monitor provides mutual exclusive access of _free_shadow + static Monitor* _monitor; Action _action; @@ -107,6 +117,12 @@ // marking stack and overflow stack directly. public: + static size_t acquire_shadow_region(PSParallelCompact::RegionData* region_ptr); + static void release_shadow_region(size_t shadow_region); + static void enqueue_shadow_region(size_t shadow_region); + static void dequeue_shadow_region(); + inline size_t& shadow_record() { return _shadow_record; } + void reset_bitmap_query_cache() { _last_query_beg = NULL; _last_query_obj = NULL; diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp @@ -998,6 +998,7 @@ void PSParallelCompact::post_compact() { GCTraceTime(Info, gc, phases) tm("Post Compact", &_gc_timer); + ParCompactionManager::dequeue_shadow_region(); for (unsigned int id = old_space_id; id < last_space_id; ++id) { // Clear the marking bitmap, summary data and split info. @@ -2275,8 +2276,10 @@ for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) { if (sd.region(cur)->claim_unsafe()) { ParCompactionManager* cm = ParCompactionManager::manager_array(which); - cm->region_stack()->push(cur); - region_logger.handle(cur); + if (sd.region(cur)->push()) { + cm->region_stack()->push(cur); + region_logger.handle(cur); + } // Assign regions to tasks in round-robin fashion. if (++which == parallel_gc_threads) { which = 0; @@ -2430,12 +2433,12 @@ ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); PSOldGen* old_gen = heap->old_gen(); old_gen->start_array()->reset(); - uint parallel_gc_threads = heap->gc_task_manager()->workers(); uint active_gc_threads = heap->gc_task_manager()->active_workers(); TaskQueueSetSuper* qset = ParCompactionManager::region_array(); ParallelTaskTerminator terminator(active_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); + enqueue_shadow_region(); prepare_region_draining_tasks(q, active_gc_threads); enqueue_dense_prefix_tasks(q, active_gc_threads); enqueue_region_stealing_tasks(q, &terminator, active_gc_threads); @@ -2742,7 +2745,15 @@ assert(cur->data_size() > 0, "region must have live data"); cur->decrement_destination_count(); if (cur < enqueue_end && cur->available() && cur->claim()) { - cm->push_region(sd.region(cur)); + if (cur->push()) { + cm->push_region(sd.region(cur)); + } else if (cur->copy()) { + // Try to copy the content of the shadow region back to its corresponding + // heap region if the shadow region is filled + copy_back(sd.region_to_addr(cur), sd.region_to_addr(cur->shadow_region())); + cm->release_shadow_region(cur->shadow_region()); + cur->set_completed(); + } } } } @@ -2930,6 +2941,212 @@ } while (true); } +void PSParallelCompact::fill_shadow_region(ParCompactionManager* cm, size_t region_idx) +{ + // Acquire a shadow region at first + ParallelCompactData& sd = summary_data(); + RegionData* const region_ptr = sd.region(region_idx); + size_t shadow_region = cm->acquire_shadow_region(region_ptr); + // The zero return value indicates the corresponding heap region is available, + // so we just jump to fill_region(). + if (shadow_region == 0) { + return fill_region(cm, region_idx); + } + + typedef ParMarkBitMap::IterationStatus IterationStatus; + const size_t RegionSize = ParallelCompactData::RegionSize; + ParMarkBitMap* const bitmap = mark_bitmap(); + + // Get the items needed to construct the closure. + HeapWord* dest_addr = sd.region_to_addr(region_idx); + HeapWord* shadow_addr = sd.region_to_addr(shadow_region); + size_t shadow_offset = pointer_delta(shadow_addr, dest_addr); + SpaceId dest_space_id = space_id(dest_addr); + ObjectStartArray* start_array = _space_info[dest_space_id].start_array(); + HeapWord* new_top = _space_info[dest_space_id].new_top(); + assert(dest_addr < new_top, "sanity"); + const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize); + + // Get the source region and related info. + size_t src_region_idx = region_ptr->source_region(); + SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx)); + HeapWord* src_space_top = _space_info[src_space_id].space()->top(); + + ShadowClosure closure(bitmap, cm, start_array, dest_addr, words, shadow_offset); + closure.set_source(first_src_addr(dest_addr, src_space_id, src_region_idx)); + + // Adjust src_region_idx to prepare for decrementing destination counts (the + // destination count is not decremented when a region is copied to itself). + if (src_region_idx == region_idx) { + src_region_idx += 1; + } + + if (bitmap->is_unmarked(closure.source())) { + // The first source word is in the middle of an object; copy the remainder + // of the object or as much as will fit. The fact that pointer updates were + // deferred will be noted when the object header is processed. + HeapWord* const old_src_addr = closure.source(); + closure.copy_partial_obj(); + if (closure.is_full()) { + decrement_destination_counts(cm, src_space_id, src_region_idx, + closure.source()); + region_ptr->set_deferred_obj_addr(NULL); + + // Record the shadow region index + region_ptr->set_shadow_region(shadow_region); + // Mark the shadow region filled + region_ptr->fill(); + // Try to copy the content of the shadow region back to its corresponding + // heap region if available + if (((region_ptr->available() && region_ptr->claim()) || region_ptr->claimed()) && region_ptr->copy()) { + region_ptr->set_completed(); + copy_back(dest_addr, shadow_addr); + cm->release_shadow_region(shadow_region); + } + return; + } + + HeapWord* const end_addr = sd.region_align_down(closure.source()); + if (sd.region_align_down(old_src_addr) != end_addr) { + // The partial object was copied from more than one source region. + decrement_destination_counts(cm, src_space_id, src_region_idx, end_addr); + + // Move to the next source region, possibly switching spaces as well. All + // args except end_addr may be modified. + src_region_idx = next_src_region(closure, src_space_id, src_space_top, + end_addr); + } + } + + do { + HeapWord* const cur_addr = closure.source(); + HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1), + src_space_top); + IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr); + + if (status == ParMarkBitMap::incomplete) { + // The last obj that starts in the source region does not end in the + // region. + assert(closure.source() < end_addr, "sanity"); + HeapWord* const obj_beg = closure.source(); + HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(), + src_space_top); + HeapWord* const obj_end = bitmap->find_obj_end(obj_beg, range_end); + if (obj_end < range_end) { + // The end was found; the entire object will fit. + status = closure.do_addr(obj_beg, bitmap->obj_size(obj_beg, obj_end)); + assert(status != ParMarkBitMap::would_overflow, "sanity"); + } else { + // The end was not found; the object will not fit. + assert(range_end < src_space_top, "obj cannot cross space boundary"); + status = ParMarkBitMap::would_overflow; + } + } + + if (status == ParMarkBitMap::would_overflow) { + // The last object did not fit. Note that interior oop updates were + // deferred, then copy enough of the object to fill the region. + region_ptr->set_deferred_obj_addr(closure.destination()); + status = closure.copy_until_full(); // copies from closure.source() + + decrement_destination_counts(cm, src_space_id, src_region_idx, + closure.source()); + + region_ptr->set_shadow_region(shadow_region); + region_ptr->fill(); + if (((region_ptr->available() && region_ptr->claim()) || region_ptr->claimed()) && region_ptr->copy()) { + region_ptr->set_completed(); + copy_back(dest_addr, shadow_addr); + cm->release_shadow_region(shadow_region); + } + return; + } + + if (status == ParMarkBitMap::full) { + decrement_destination_counts(cm, src_space_id, src_region_idx, + closure.source()); + region_ptr->set_deferred_obj_addr(NULL); + + region_ptr->set_shadow_region(shadow_region); + region_ptr->fill(); + if (((region_ptr->available() && region_ptr->claim()) || region_ptr->claimed()) && region_ptr->copy()) { + region_ptr->set_completed(); + copy_back(dest_addr, shadow_addr); + cm->release_shadow_region(shadow_region); + } + return; + } + + decrement_destination_counts(cm, src_space_id, src_region_idx, end_addr); + + // Move to the next source region, possibly switching spaces as well. All + // args except end_addr may be modified. + src_region_idx = next_src_region(closure, src_space_id, src_space_top, + end_addr); + } while (true); +} + +void PSParallelCompact::copy_back(HeapWord *region_addr, HeapWord *shadow_addr) { + Copy::aligned_conjoint_words(shadow_addr, region_addr, _summary_data.RegionSize); +} + +bool PSParallelCompact::steal_shadow_region(ParCompactionManager* cm, size_t ®ion_idx) { + size_t& record = cm->shadow_record(); + ParallelCompactData& sd = _summary_data; + size_t old_new_top = sd.addr_to_region_idx(_space_info[old_space_id].new_top()); + uint active_gc_threads = + ParallelScavengeHeap::heap()->gc_task_manager()->active_workers(); + + while (record < old_new_top) { + if (sd.region(record)->push()) { + region_idx = record; + return true; + } + record += active_gc_threads; + } + + return false; +} + +void PSParallelCompact::enqueue_shadow_region() { + const ParallelCompactData& sd = PSParallelCompact::summary_data(); + + for (unsigned int id = old_space_id; id < last_space_id; ++id) { + SpaceInfo* const space_info = _space_info + id; + MutableSpace* const space = space_info->space(); + + const size_t beg_region = + sd.addr_to_region_idx(sd.region_align_up(MAX2(space_info->new_top(), space->top()))); + const size_t end_region = + sd.addr_to_region_idx(sd.region_align_down(space->end())); + + for (size_t cur = beg_region + 1; cur < end_region; ++cur) { + ParCompactionManager::enqueue_shadow_region(cur); + } + } +} + +void PSParallelCompact::initialize_steal_record(uint which) { + ParCompactionManager* cm = + ParCompactionManager::gc_thread_compaction_manager(which); + uint active_gc_threads = + ParallelScavengeHeap::heap()->gc_task_manager()->active_workers(); + + size_t& record = cm->shadow_record(); + record = _summary_data.addr_to_region_idx( + _space_info[old_space_id].dense_prefix()); + + size_t expect = which % active_gc_threads; + size_t current = record % active_gc_threads; + + if (expect > current) { + record += expect - current; + } + if (expect < current) { + record += expect - current + active_gc_threads; + } +} + void PSParallelCompact::fill_blocks(size_t region_idx) { // Fill in the block table elements for the specified region. Each block @@ -3172,6 +3389,64 @@ return is_full() ? ParMarkBitMap::full : ParMarkBitMap::incomplete; } +ParMarkBitMapClosure::IterationStatus +ShadowClosure::do_addr(HeapWord* addr, size_t words) { + HeapWord* shadow_destination = destination() + _offset; + + assert(destination() != NULL, "sanity"); + assert(bitmap()->obj_size(addr) == words, "bad size"); + + _source = addr; + + if (words > words_remaining()) { + return ParMarkBitMap::would_overflow; + } + + // The start_array must be updated even if the object is not moving. + if (_start_array != NULL) { + _start_array->allocate_block(destination()); + } + + DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) + Copy::aligned_conjoint_words(source(), shadow_destination, words); + + oop moved_oop = (oop) shadow_destination; + compaction_manager()->update_contents(moved_oop); + assert(oopDesc::is_oop_or_null(moved_oop), "Object should be whole at this point"); + + update_state(words); + assert(destination() == (HeapWord*)moved_oop - _offset + moved_oop->size(), "sanity"); + return is_full() ? ParMarkBitMap::full : ParMarkBitMap::incomplete; +} + +ParMarkBitMapClosure::IterationStatus ShadowClosure::copy_until_full() { + HeapWord* shadow_destination = destination() + _offset; + + DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) + Copy::aligned_conjoint_words(source(), shadow_destination, words_remaining()); + update_state(words_remaining()); + + assert(is_full(), "sanity"); + return ParMarkBitMap::full; +} + +void ShadowClosure::copy_partial_obj() { + HeapWord* shadow_destination = destination() + _offset; + + size_t words = words_remaining(); + + HeapWord* const range_end = MIN2(source() + words, bitmap()->region_end()); + HeapWord* const end_addr = bitmap()->find_obj_end(source(), range_end); + if (end_addr < range_end) { + words = bitmap()->obj_size(source(), end_addr); + } + + DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) + Copy::aligned_conjoint_words(source(), shadow_destination, words); + + update_state(words); +} + UpdateOnlyClosure::UpdateOnlyClosure(ParMarkBitMap* mbm, ParCompactionManager* cm, PSParallelCompact::SpaceId space_id) : diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.hpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp @@ -241,6 +241,9 @@ // The first region containing data destined for this region. size_t source_region() const { return _source_region; } + // Reuse _source_region to store the corresponding shadow region index + size_t shadow_region() const { return _source_region; } + // The object (if any) starting in this region and ending in a different // region that could not be updated during the main (parallel) compaction // phase. This is different from _partial_obj_addr, which is an object that @@ -309,6 +312,7 @@ // These are not atomic. void set_destination(HeapWord* addr) { _destination = addr; } void set_source_region(size_t region) { _source_region = region; } + void set_shadow_region(size_t region) { _source_region = region; } void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_size(size_t words) { @@ -328,6 +332,13 @@ inline void decrement_destination_count(); inline bool claim(); + // Preempt the region to avoid double processes + inline bool push(); + // Mark the region as filled and ready to be copied back + inline bool fill(); + // Preempt the region to copy the shadow region content back + inline bool copy(); + private: // The type used to represent object sizes within a region. typedef uint region_sz_t; @@ -348,6 +359,7 @@ region_sz_t _partial_obj_size; region_sz_t volatile _dc_and_los; bool volatile _blocks_filled; + int volatile _shadow; #ifdef ASSERT size_t _blocks_filled_count; // Number of block table fills. @@ -598,6 +610,18 @@ return old == los; } +inline bool ParallelCompactData::RegionData::push() { + return Atomic::cmpxchg(1, &_shadow, 0) == 0; +} + +inline bool ParallelCompactData::RegionData::fill() { + return Atomic::cmpxchg(2, &_shadow, 1) == 1; +} + +inline bool ParallelCompactData::RegionData::copy() { + return Atomic::cmpxchg(3, &_shadow, 2) == 2; +} + inline ParallelCompactData::RegionData* ParallelCompactData::region(size_t region_idx) const { @@ -1197,6 +1221,21 @@ fill_region(cm, region); } + static bool steal_shadow_region(ParCompactionManager* cm, size_t& region_idx); + static void fill_shadow_region(ParCompactionManager* cm, size_t region_idx); + static void fill_and_update_shadow_region(ParCompactionManager* cm, size_t region) { + fill_shadow_region(cm, region); + } + + // Copy the content of a shadow region back to its corresponding heap region + static void copy_back(HeapWord* region_addr, HeapWord* shadow_addr); + + // Initialize the steal record of a GC thread + static void initialize_steal_record(uint which); + + // Reuse the empty heap regions as shadow regions, like to-space regions + static void enqueue_shadow_region(); + // Fill in the block table for the specified region. static void fill_blocks(size_t region_idx); @@ -1258,12 +1297,12 @@ // Copy enough words to fill this closure, starting at source(). Interior // oops and the start array are not updated. Return full. - IterationStatus copy_until_full(); + virtual IterationStatus copy_until_full(); // Copy enough words to fill this closure or to the end of an object, // whichever is smaller, starting at source(). Interior oops and the start // array are not updated. - void copy_partial_obj(); + virtual void copy_partial_obj(); protected: // Update variables to indicate that word_count words were processed. @@ -1292,6 +1331,33 @@ _destination += words; } +class ShadowClosure: public MoveAndUpdateClosure { +public: + inline ShadowClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm, + ObjectStartArray* start_array, + HeapWord* destination, size_t words, size_t offset); + + virtual IterationStatus do_addr(HeapWord* addr, size_t size); + + virtual IterationStatus copy_until_full(); + + virtual void copy_partial_obj(); + +private: + size_t _offset; +}; + +inline +ShadowClosure::ShadowClosure(ParMarkBitMap *bitmap, + ParCompactionManager *cm, + ObjectStartArray *start_array, + HeapWord *destination, + size_t words, size_t offset) : + MoveAndUpdateClosure(bitmap, cm, start_array, destination, words) +{ + _offset = offset; +} + class UpdateOnlyClosure: public ParMarkBitMapClosure { private: const PSParallelCompact::SpaceId _space_id;