From shade at redhat.com Wed Jul 1 13:59:04 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 1 Jul 2020 15:59:04 +0200 Subject: RFR (XS/T) 8248634: Shenandoah: incorrect include in shenandoahInitLogger.cpp Message-ID: <177e1e58-82b8-e4af-d12b-b8f61dd26a4c@redhat.com> Bug: https://bugs.openjdk.java.net/browse/JDK-8248634 diff -r f42f4771b5d1 src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp Wed Jul 01 15:53:23 2020 +0200 +++ b/src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp Wed Jul 01 15:56:53 2020 +0200 @@ -24,5 +24,5 @@ #include "precompiled.hpp" -#include "gc/shenandoah/shenandoahHeap.hpp" +#include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahInitLogger.hpp" Testing: builds -- Thanks, -Aleksey From shade at redhat.com Wed Jul 1 13:59:20 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 1 Jul 2020 15:59:20 +0200 Subject: RFR (XS) 8248632: Shenandoah: build fails without both JVMTI and JFR Message-ID: <66cb5669-c1a7-af6c-065d-5640483bb30d@redhat.com> Bug: https://bugs.openjdk.java.net/browse/JDK-8248632 We get dangling ":" when both JFR and JVMTI are disabled. The fix is to put the colon into the macro calls themselves, which would select it properly: diff -r 7ab23692b432 src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Wed Jul 01 15:09:28 2020 +0200 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Wed Jul 01 15:51:30 2020 +0200 @@ -91,13 +91,15 @@ class ShenandoahSerialWeakRoots { private: JVMTI_ONLY(ShenandoahJVMTIWeakRoot _jvmti_weak_roots;) JFR_ONLY(ShenandoahJFRWeakRoot _jfr_weak_roots;) public: - ShenandoahSerialWeakRoots(ShenandoahPhaseTimings::Phase phase) : - JVMTI_ONLY(_jvmti_weak_roots(phase)) - JFR_ONLY(JVMTI_ONLY(COMMA)_jfr_weak_roots(phase)) {}; + ShenandoahSerialWeakRoots(ShenandoahPhaseTimings::Phase phase) + JVMTI_ONLY(: _jvmti_weak_roots(phase)) + JFR_ONLY(NOT_JVMTI(:) JVMTI_ONLY(COMMA) _jfr_weak_roots(phase)) + {}; + void weak_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, uint worker_id); void weak_oops_do(OopClosure* cl, uint worker_id); }; Testing: builds -- Thanks, -Aleksey From rkennke at redhat.com Wed Jul 1 16:57:37 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 1 Jul 2020 18:57:37 +0200 Subject: RFR (XS/T) 8248634: Shenandoah: incorrect include in shenandoahInitLogger.cpp In-Reply-To: <177e1e58-82b8-e4af-d12b-b8f61dd26a4c@redhat.com> References: <177e1e58-82b8-e4af-d12b-b8f61dd26a4c@redhat.com> Message-ID: Ok! Thanks, Roman Aleksey Shipilev schrieb am Mi., 1. Juli 2020, 18:38: > Bug: > https://bugs.openjdk.java.net/browse/JDK-8248634 > > diff -r f42f4771b5d1 > src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp > --- a/src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp Wed Jul 01 > 15:53:23 2020 +0200 > +++ b/src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp Wed Jul 01 > 15:56:53 2020 +0200 > @@ -24,5 +24,5 @@ > > #include "precompiled.hpp" > -#include "gc/shenandoah/shenandoahHeap.hpp" > +#include "gc/shenandoah/shenandoahHeap.inline.hpp" > #include "gc/shenandoah/shenandoahHeapRegion.hpp" > #include "gc/shenandoah/shenandoahInitLogger.hpp" > > Testing: builds > > -- > Thanks, > -Aleksey > > From rkennke at redhat.com Wed Jul 1 17:01:46 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 01 Jul 2020 19:01:46 +0200 Subject: RFR (XS) 8248632: Shenandoah: build fails without both JVMTI and JFR In-Reply-To: <66cb5669-c1a7-af6c-065d-5640483bb30d@redhat.com> References: <66cb5669-c1a7-af6c-065d-5640483bb30d@redhat.com> Message-ID: <20c0000ffcf3778857f7dbd846637fe02481353a.camel@redhat.com> Oh dear. Ok. Thanks, Roman On Wed, 2020-07-01 at 15:59 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > Bug: > https://bugs.openjdk.java.net/browse/JDK-8248632 > > We get dangling ":" when both JFR and JVMTI are disabled. The fix is > to put the colon into the macro > calls themselves, which would select it properly: > > diff -r 7ab23692b432 > src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp > --- > a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp W > ed Jul 01 15:09:28 2020 +0200 > +++ > b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp W > ed Jul 01 15:51:30 2020 +0200 > @@ -91,13 +91,15 @@ > class ShenandoahSerialWeakRoots { > private: > JVMTI_ONLY(ShenandoahJVMTIWeakRoot _jvmti_weak_roots;) > JFR_ONLY(ShenandoahJFRWeakRoot _jfr_weak_roots;) > public: > - ShenandoahSerialWeakRoots(ShenandoahPhaseTimings::Phase phase) : > - JVMTI_ONLY(_jvmti_weak_roots(phase)) > - JFR_ONLY(JVMTI_ONLY(COMMA)_jfr_weak_roots(phase)) {}; > + ShenandoahSerialWeakRoots(ShenandoahPhaseTimings::Phase phase) > + JVMTI_ONLY(: _jvmti_weak_roots(phase)) > + JFR_ONLY(NOT_JVMTI(:) JVMTI_ONLY(COMMA) _jfr_weak_roots(phase)) > + {}; > + > void weak_oops_do(BoolObjectClosure* is_alive, OopClosure* > keep_alive, uint worker_id); > void weak_oops_do(OopClosure* cl, uint worker_id); > }; > > > Testing: builds > From zgu at redhat.com Wed Jul 1 17:08:38 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 1 Jul 2020 13:08:38 -0400 Subject: RFR (XS/T) 8248634: Shenandoah: incorrect include in shenandoahInitLogger.cpp In-Reply-To: <177e1e58-82b8-e4af-d12b-b8f61dd26a4c@redhat.com> References: <177e1e58-82b8-e4af-d12b-b8f61dd26a4c@redhat.com> Message-ID: Looks good. -Zhengyu On 7/1/20 9:59 AM, Aleksey Shipilev wrote: > Bug: > https://bugs.openjdk.java.net/browse/JDK-8248634 > > diff -r f42f4771b5d1 src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp > --- a/src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp Wed Jul 01 15:53:23 2020 +0200 > +++ b/src/hotspot/share/gc/shenandoah/shenandoahInitLogger.cpp Wed Jul 01 15:56:53 2020 +0200 > @@ -24,5 +24,5 @@ > > #include "precompiled.hpp" > -#include "gc/shenandoah/shenandoahHeap.hpp" > +#include "gc/shenandoah/shenandoahHeap.inline.hpp" > #include "gc/shenandoah/shenandoahHeapRegion.hpp" > #include "gc/shenandoah/shenandoahInitLogger.hpp" > > Testing: builds > From zgu at redhat.com Wed Jul 1 17:08:25 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 1 Jul 2020 13:08:25 -0400 Subject: RFR (XS) 8248632: Shenandoah: build fails without both JVMTI and JFR In-Reply-To: <66cb5669-c1a7-af6c-065d-5640483bb30d@redhat.com> References: <66cb5669-c1a7-af6c-065d-5640483bb30d@redhat.com> Message-ID: <4c313084-39f6-3832-bb41-d04804cce752@redhat.com> Okay. -Zhengyu On 7/1/20 9:59 AM, Aleksey Shipilev wrote: > Bug: > https://bugs.openjdk.java.net/browse/JDK-8248632 > > We get dangling ":" when both JFR and JVMTI are disabled. The fix is to put the colon into the macro > calls themselves, which would select it properly: > > diff -r 7ab23692b432 src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp > --- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Wed Jul 01 15:09:28 2020 +0200 > +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Wed Jul 01 15:51:30 2020 +0200 > @@ -91,13 +91,15 @@ > class ShenandoahSerialWeakRoots { > private: > JVMTI_ONLY(ShenandoahJVMTIWeakRoot _jvmti_weak_roots;) > JFR_ONLY(ShenandoahJFRWeakRoot _jfr_weak_roots;) > public: > - ShenandoahSerialWeakRoots(ShenandoahPhaseTimings::Phase phase) : > - JVMTI_ONLY(_jvmti_weak_roots(phase)) > - JFR_ONLY(JVMTI_ONLY(COMMA)_jfr_weak_roots(phase)) {}; > + ShenandoahSerialWeakRoots(ShenandoahPhaseTimings::Phase phase) > + JVMTI_ONLY(: _jvmti_weak_roots(phase)) > + JFR_ONLY(NOT_JVMTI(:) JVMTI_ONLY(COMMA) _jfr_weak_roots(phase)) > + {}; > + > void weak_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, uint worker_id); > void weak_oops_do(OopClosure* cl, uint worker_id); > }; > > > Testing: builds > From shade at redhat.com Wed Jul 1 17:56:51 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 1 Jul 2020 19:56:51 +0200 Subject: RFR (S) 8248652: Shenandoah: SATB buffer handling may assume no forwarded objects Message-ID: <046b7175-361c-e823-06c5-90d053d4c47d@redhat.com> RFE: https://bugs.openjdk.java.net/browse/JDK-8248652 Since CM-with-UR is gone, SATB may assume no forwarded objects are ever exposed through it. The only way marking code can experience forwarded objects is due to Full GC marking. In that case, SATB should be inactive. Fix: https://cr.openjdk.java.net/~shade/8248652/webrev.01/ Testing: hotspot_gc_shenandoah -- Thanks, -Aleksey From rkennke at redhat.com Wed Jul 1 22:20:33 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 02 Jul 2020 00:20:33 +0200 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 2) In-Reply-To: References: Message-ID: <848c86ff456895f2fe4cbebeda7d0926e4386a84.camel@redhat.com> Hi Kelvin, I had something similar in mind with x86, but Aleksey stopped me :-) It may be worth noting that we need a maximum of 3 consequtive CASes to cover all cases, it is not necessary to make a loop: 1. Fast-path CAS#1 If that fails, it *may* be because of value-in-memory being a from- space-ref. Check that and do: 2. CAS#2 with expected = previous from CAS#1 If that fails, it may be because a competing thread just wrote a to- space-ref of our expected object (i.e. our *original* expected value), try again: 3. CAS#3 with expected = previous from CAS#2 (== original expected) at that point the CAS cannot fail because of false negative because of to-space-invariant Correct me if I am wrong here! Ah I see you optimized for smaller code there: + // It is extremely rare we reach this point. For this reason, the + // implementation opts for smaller rather than potentially faster + // code. Ultimately, smaller code for this rare case most likely + // delivers higher overall throughput by enabling improved icache + // performance. Good then. Some comments on the patch: // It is required that addr represent a +// memory location at which a non-null reference value is stored and that +// expected holds a non-null reference value. Is this true? I believe the memory location may hold NULL. The expected value may hold NULL too, in which case we can skip the barrier. The compiler optimizes the case where it *knows statically* that expected == NULL to not emit the barrier. We also should have run-time checks for these situations that skip to 'done' on first CAS-failure with NULL for the case when compiler cannot determine statically NULL, or alternatively don't use the _not_null() variants of encode/decode methods. +// weak: This relaxes the "strong" property so that CAS is allowed +// to fail even when the expected value is present in memory. +// This is useful for load-with-lock, store-conditional +// loops where certain failures require retries. If weak is +// enabled, it is ok to return failure rather than retrying. If we are certain that the meaning of 'weak' allows us to skip the barrier, we can change/remove our corresponding implementation of the weak paths to emit a plain CAS. Right? + // Try to CAS with given arguments using LL/SC pair. If successful, + // then we are done. LL/SC pair is not (generally) true anymore with this updated patch. Other than that, I like the impl. I'd actually like to see a similar implementation in x86, I believe it should be slightly more efficient. However, I'm not sure it's really worth - I don't think I've ever seen this code path very hot anywhere. We'd need to convince Aleksey though - this code has undergone a couple of changes already and it's basically always caused unforeseen troubles (weird register clashes, extremely rare corner cases like the above mentioned 3rd CAS, etc) Thank you! Roman On Tue, 2020-06-30 at 22:39 +0000, Nilsen, Kelvin wrote: > Thank you for feedback from previously distributed draft patch. This > new patch is similar to the patch distributed on June 24. However, > this version uses MacroAssembler::cmpxchng() instead of hard-coding > the use of ldxr/stxr instructions. > > See http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.02/ > > This patch addresses the problem described in > https://bugs.openjdk.java.net/browse/JDK-8232782 > > The implementation mimics the behavior of the recently revised x86 > implementation of cmpxchg_oop with slight refinements: > > X86 version: > Step 1: Try CAS > Step 2: if CAS fails, check if original memory holds equivalent from- > space pointer > Step 3: Use CAS to overwrite memory with equivalent to-space pointer > Step 4: Try CAS again > Step 5: Return boolean result to indicate success or failure > > AARCH64 version: > Step 1: Try CAS > Step 2: if CAS fails, check if original memory holds equivalent from- > space pointer > Step 3 (differs): Do not overwrite memory with equivalent to-space > pointer, Instead, run the original CAS request with from-space > pointer as the "expected" value. If this succeeds, we're done. If > this fails, go back to step 1 and try that again. > > Step 5: Return boolean result to indicate success or failure > > This patch satisfies tier1, tier2, and hotspot_gc_shenandoah > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023-aws > aarch64). I have also run an "extreme" garbage collection workload > for 20 minutes without problem. > > Is this ok to merge? > > > From rkennke at redhat.com Wed Jul 1 22:55:27 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 02 Jul 2020 00:55:27 +0200 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 2) In-Reply-To: <848c86ff456895f2fe4cbebeda7d0926e4386a84.camel@redhat.com> References: <848c86ff456895f2fe4cbebeda7d0926e4386a84.camel@redhat.com> Message-ID: Ah now I remember why we want to do the CAS barrier even for weak-CAS: while it is technically true that calling code must be prepared to deal with occasional spurious failures, we probably don't want it to get stuck in retry-loop until GC gets to update the field - which may be a fairly long period of time. Not sure if we can draw any other benefit from 'weak' here? Another *possible* optimization (while we're at it) is to check if GC is actually active (HAS_FORWARDED, see LRB impl) and avoid the whole decoding of failure-witness business. This may be something to keep in mind should we ever find out that this code affects us outside of GC cycles. Roman On Thu, 2020-07-02 at 00:20 +0200, Roman Kennke wrote: > Hi Kelvin, > > I had something similar in mind with x86, but Aleksey stopped me :-) > It > may be worth noting that we need a maximum of 3 consequtive CASes to > cover all cases, it is not necessary to make a loop: > > 1. Fast-path CAS#1 > If that fails, it *may* be because of value-in-memory being a from- > space-ref. Check that and do: > 2. CAS#2 with expected = previous from CAS#1 > If that fails, it may be because a competing thread just wrote a to- > space-ref of our expected object (i.e. our *original* expected > value), > try again: > 3. CAS#3 with expected = previous from CAS#2 (== original expected) > at that point the CAS cannot fail because of false negative because > of > to-space-invariant > > Correct me if I am wrong here! > > Ah I see you optimized for smaller code there: > + // It is extremely rare we reach this point. For this reason, the > + // implementation opts for smaller rather than potentially faster > + // code. Ultimately, smaller code for this rare case most likely > + // delivers higher overall throughput by enabling improved icache > + // performance. > Good then. > > Some comments on the patch: > > // It is required that addr represent a > +// memory location at which a non-null reference value is stored and > that > +// expected holds a non-null reference value. > > Is this true? I believe the memory location may hold NULL. The > expected > value may hold NULL too, in which case we can skip the barrier. The > compiler optimizes the case where it *knows statically* that expected > == NULL to not emit the barrier. We also should have run-time checks > for these situations that skip to 'done' on first CAS-failure with > NULL > for the case when compiler cannot determine statically NULL, or > alternatively don't use the _not_null() variants of encode/decode > methods. > > +// weak: This relaxes the "strong" property so that CAS is > allowed > +// to fail even when the expected value is present in > memory. > +// This is useful for load-with-lock, store-conditional > +// loops where certain failures require retries. If weak > is > +// enabled, it is ok to return failure rather than > retrying. > > If we are certain that the meaning of 'weak' allows us to skip the > barrier, we can change/remove our corresponding implementation of the > weak paths to emit a plain CAS. Right? > > + // Try to CAS with given arguments using LL/SC pair. If > successful, > + // then we are done. > > LL/SC pair is not (generally) true anymore with this updated patch. > > Other than that, I like the impl. > > I'd actually like to see a similar implementation in x86, I believe > it > should be slightly more efficient. However, I'm not sure it's really > worth - I don't think I've ever seen this code path very hot > anywhere. > We'd need to convince Aleksey though - this code has undergone a > couple > of changes already and it's basically always caused unforeseen > troubles > (weird register clashes, extremely rare corner cases like the above > mentioned 3rd CAS, etc) > > Thank you! > Roman > > > On Tue, 2020-06-30 at 22:39 +0000, Nilsen, Kelvin wrote: > > Thank you for feedback from previously distributed draft > > patch. This > > new patch is similar to the patch distributed on June 24. However, > > this version uses MacroAssembler::cmpxchng() instead of hard-coding > > the use of ldxr/stxr instructions. > > > > See http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.02/ > > > > This patch addresses the problem described in > > https://bugs.openjdk.java.net/browse/JDK-8232782 > > > > The implementation mimics the behavior of the recently revised x86 > > implementation of cmpxchg_oop with slight refinements: > > > > X86 version: > > Step 1: Try CAS > > Step 2: if CAS fails, check if original memory holds equivalent > > from- > > space pointer > > Step 3: Use CAS to overwrite memory with equivalent to-space > > pointer > > Step 4: Try CAS again > > Step 5: Return boolean result to indicate success or failure > > > > AARCH64 version: > > Step 1: Try CAS > > Step 2: if CAS fails, check if original memory holds equivalent > > from- > > space pointer > > Step 3 (differs): Do not overwrite memory with equivalent to-space > > pointer, Instead, run the original CAS request with from-space > > pointer as the "expected" value. If this succeeds, we're done. If > > this fails, go back to step 1 and try that again. > > > > Step 5: Return boolean result to indicate success or failure > > > > This patch satisfies tier1, tier2, and hotspot_gc_shenandoah > > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023-aws > > aarch64). I have also run an "extreme" garbage collection workload > > for 20 minutes without problem. > > > > Is this ok to merge? > > > > > > From kdnilsen at amazon.com Thu Jul 2 00:35:21 2020 From: kdnilsen at amazon.com (Nilsen, Kelvin) Date: Thu, 2 Jul 2020 00:35:21 +0000 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 2) In-Reply-To: References: <848c86ff456895f2fe4cbebeda7d0926e4386a84.camel@redhat.com> Message-ID: Thanks for the careful review. I'll make another round with it. I wasn?t entirely sure what the preconditions for this function were so I documented my best guess. Good thing you reviewed my comments carefully because apparently I guessed wrong. Regarding use of the weak Boolean argument, it was not clear to me why a caller would want to specify weak, but I assumed if they did request it, I should go ahead and honor it. Are you suggesting I should ignore the value of the weak argument and always behave as if the caller had requested not weak? Also, wil you check my understanding further? I believe this service (cmpxchg_oop) is used primarily to resolve races between background GC threads and mutator threads. There is no need to need for the JVM to resolve races between multiple mutator threads. That is a programmer responsibility. The reason for cmpxchg_oop is to resolve the following two race conditions: 1. A mutator thread is overwriting a field that the GC may be overwriting in parallel. The mutator needs to use this protocol to overwrite pointers in order to prevent the following race: i. GC fetches from-space pointer at addr ii. GC computes equivalent to-space pointer iii. Mutator overwrites pointer at addr with new_val iv. GC overwrites pointer at addr with to-space replacement of original from-space value, incorrectly clobbering the mutation! The GC thread uses cmpxchg_oop to abandon its overwrite attempt in case the value has changed since it began its effort. 2. A mutator thread fetches a pointer from memory and the LRB discovers that the pointer refers to from-space memory. After determining the to-space pointer that represents the new location of the object originally referenced by the fetched from-space pointer, the mutator attempts to heal the value held in memory by overwriting that memory with the to-space equivalent. The healing effort must use cmpxchg_oop to resolve the exact same race condition that might occur when a background GC thread is healing a from-space pointer residing in memory. In both of the above cases, we know that GC is active and both expected and new_val are not equal to null. We also know for both of these cases, by the way, that expected refers to from-space, so maybe our implementation of cmpxchg_oop should really be starting with step 2! Can you help me understand the other situations in which cmpxchg_oop will be called, under which expected and new_val may equal null? Thanks. ?On 7/1/20, 3:56 PM, "Roman Kennke" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Ah now I remember why we want to do the CAS barrier even for weak-CAS: while it is technically true that calling code must be prepared to deal with occasional spurious failures, we probably don't want it to get stuck in retry-loop until GC gets to update the field - which may be a fairly long period of time. Not sure if we can draw any other benefit from 'weak' here? Another *possible* optimization (while we're at it) is to check if GC is actually active (HAS_FORWARDED, see LRB impl) and avoid the whole decoding of failure-witness business. This may be something to keep in mind should we ever find out that this code affects us outside of GC cycles. Roman On Thu, 2020-07-02 at 00:20 +0200, Roman Kennke wrote: > Hi Kelvin, > > I had something similar in mind with x86, but Aleksey stopped me :-) > It > may be worth noting that we need a maximum of 3 consequtive CASes to > cover all cases, it is not necessary to make a loop: > > 1. Fast-path CAS#1 > If that fails, it *may* be because of value-in-memory being a from- > space-ref. Check that and do: > 2. CAS#2 with expected = previous from CAS#1 > If that fails, it may be because a competing thread just wrote a to- > space-ref of our expected object (i.e. our *original* expected > value), > try again: > 3. CAS#3 with expected = previous from CAS#2 (== original expected) > at that point the CAS cannot fail because of false negative because > of > to-space-invariant > > Correct me if I am wrong here! > > Ah I see you optimized for smaller code there: > + // It is extremely rare we reach this point. For this reason, the > + // implementation opts for smaller rather than potentially faster > + // code. Ultimately, smaller code for this rare case most likely > + // delivers higher overall throughput by enabling improved icache > + // performance. > Good then. > > Some comments on the patch: > > // It is required that addr represent a > +// memory location at which a non-null reference value is stored and > that > +// expected holds a non-null reference value. > > Is this true? I believe the memory location may hold NULL. The > expected > value may hold NULL too, in which case we can skip the barrier. The > compiler optimizes the case where it *knows statically* that expected > == NULL to not emit the barrier. We also should have run-time checks > for these situations that skip to 'done' on first CAS-failure with > NULL > for the case when compiler cannot determine statically NULL, or > alternatively don't use the _not_null() variants of encode/decode > methods. > > +// weak: This relaxes the "strong" property so that CAS is > allowed > +// to fail even when the expected value is present in > memory. > +// This is useful for load-with-lock, store-conditional > +// loops where certain failures require retries. If weak > is > +// enabled, it is ok to return failure rather than > retrying. > > If we are certain that the meaning of 'weak' allows us to skip the > barrier, we can change/remove our corresponding implementation of the > weak paths to emit a plain CAS. Right? > > + // Try to CAS with given arguments using LL/SC pair. If > successful, > + // then we are done. > > LL/SC pair is not (generally) true anymore with this updated patch. > > Other than that, I like the impl. > > I'd actually like to see a similar implementation in x86, I believe > it > should be slightly more efficient. However, I'm not sure it's really > worth - I don't think I've ever seen this code path very hot > anywhere. > We'd need to convince Aleksey though - this code has undergone a > couple > of changes already and it's basically always caused unforeseen > troubles > (weird register clashes, extremely rare corner cases like the above > mentioned 3rd CAS, etc) > > Thank you! > Roman > > > On Tue, 2020-06-30 at 22:39 +0000, Nilsen, Kelvin wrote: > > Thank you for feedback from previously distributed draft > > patch. This > > new patch is similar to the patch distributed on June 24. However, > > this version uses MacroAssembler::cmpxchng() instead of hard-coding > > the use of ldxr/stxr instructions. > > > > See http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.02/ > > > > This patch addresses the problem described in > > https://bugs.openjdk.java.net/browse/JDK-8232782 > > > > The implementation mimics the behavior of the recently revised x86 > > implementation of cmpxchg_oop with slight refinements: > > > > X86 version: > > Step 1: Try CAS > > Step 2: if CAS fails, check if original memory holds equivalent > > from- > > space pointer > > Step 3: Use CAS to overwrite memory with equivalent to-space > > pointer > > Step 4: Try CAS again > > Step 5: Return boolean result to indicate success or failure > > > > AARCH64 version: > > Step 1: Try CAS > > Step 2: if CAS fails, check if original memory holds equivalent > > from- > > space pointer > > Step 3 (differs): Do not overwrite memory with equivalent to-space > > pointer, Instead, run the original CAS request with from-space > > pointer as the "expected" value. If this succeeds, we're done. If > > this fails, go back to step 1 and try that again. > > > > Step 5: Return boolean result to indicate success or failure > > > > This patch satisfies tier1, tier2, and hotspot_gc_shenandoah > > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023-aws > > aarch64). I have also run an "extreme" garbage collection workload > > for 20 minutes without problem. > > > > Is this ok to merge? > > > > > > From rkennke at redhat.com Thu Jul 2 08:49:45 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 02 Jul 2020 10:49:45 +0200 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 2) In-Reply-To: References: <848c86ff456895f2fe4cbebeda7d0926e4386a84.camel@redhat.com>

Message-ID: Hi Kelvin, > Thanks for the careful review. I'll make another round with it. > > I wasn?t entirely sure what the preconditions for this function were > so I documented my best guess. Good thing you reviewed my comments > carefully because apparently I guessed wrong. > > Regarding use of the weak Boolean argument, it was not clear to me > why a caller would want to specify weak, but I assumed if they did > request it, I should go ahead and honor it. Are you suggesting I > should ignore the value of the weak argument and always behave as if > the caller had requested not weak? That is a good question. Let me answer the next one first, maybe it becomes clearer then. > Also, wil you check my understanding further? I believe this > service (cmpxchg_oop) is used primarily to resolve races between > background GC threads and mutator threads. There is no need to need > for the JVM to resolve races between multiple mutator threads. That > is a programmer responsibility. The reason for cmpxchg_oop is to > resolve the following two race conditions: > > 1. A mutator thread is overwriting a field that the GC may be > overwriting in parallel. The mutator needs to use this protocol to > overwrite pointers in order to prevent the following race: > i. GC fetches from-space pointer at addr > ii. GC computes equivalent to-space pointer > iii. Mutator overwrites pointer at addr with new_val > iv. GC overwrites pointer at addr with to-space replacement of > original from-space value, incorrectly clobbering the mutation! > The GC thread uses cmpxchg_oop to abandon its overwrite attempt > in case the value has changed since it began its effort. > That is not quite right. That race is solved by the GC thread using a simple CAS. See here: https://hg.openjdk.java.net/jdk/jdk/file/d886e752a7b0/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp#l180 > 2. A mutator thread fetches a pointer from memory and the LRB > discovers that the pointer refers to from-space memory. After > determining the to-space pointer that represents the new location of > the object originally referenced by the fetched from-space pointer, > the mutator attempts to heal the value held in memory by overwriting > that memory with the to-space equivalent. The healing effort must > use cmpxchg_oop to resolve the exact same race condition that might > occur when a background GC thread is healing a from-space pointer > residing in memory. > That race is also solved by a simple CAS: https://hg.openjdk.java.net/jdk/jdk/file/d886e752a7b0/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp#l73 > In both of the above cases, we know that GC is active and both > expected and new_val are not equal to null. We also know for both of > these cases, by the way, that expected refers to from-space, so maybe > our implementation of cmpxchg_oop should really be starting with step > 2! > > Can you help me understand the other situations in which cmpxchg_oop > will be called, under which expected and new_val may equal null? The assembler cmpxchg_oop() routine is only used by the C1 and C2 compilers to implement the intrinsics for Unsafe.compareAndSwapReference() and the whole family of related methods. There is also a weakCompareAndSwapReference(), which is where the weak variant comes into play. The problem that it is trying to solve is the false negative that arises when the value in memory is a from-space reference, but the expected value is a to-space reference. In this case, a simple CAS would fail, even though it shouldn't. An additional problem is that, when we retry the CAS with the from-space ref, another thread (mutator or GC) may update the field to now point to the to-space ref of the same object, in which case the 2nd CAS would fail too, and we need to retry a 3rd time. That 3rd case is very rare. It is entirely possible to see NULL refs there, both in-memory and also for the expected-value. The compiler can optimize the case when it can statically determine that the expected-value is NULL, and generate a simple CAS then, but NULLs can still happen at runtime, and we need to handle this, either by explicit short-cut or by dealing with it when resolving the object (that is what we currently do). Now, about the weak property. I am really not sure what to do. Technically we'd be allowed to produce false negatives with weak-CAS, but it is not intended to consistently produce false negatives repeatedly. It basically caters for the platforms that can use LL/SC pair which may fail sporadically and would only ever be used in a loop. If we'd do weak-CAS as simple CAS without our protocol, that would be legal, and should work, but it would send mutator threads in fairly long retry-loops until another thread heals the reference. OTOH, our protocol already does the retry-loop and turns any weak-CAS into a strong-CAS. So we can just as well ignore (or remove) the weak argument. Does it clarify the situation for you? Roman > Thanks. > > > > ?On 7/1/20, 3:56 PM, "Roman Kennke" wrote: > > CAUTION: This email originated from outside of the organization. > Do not click links or open attachments unless you can confirm the > sender and know the content is safe. > > > > Ah now I remember why we want to do the CAS barrier even for > weak-CAS: > while it is technically true that calling code must be prepared > to deal > with occasional spurious failures, we probably don't want it to > get > stuck in retry-loop until GC gets to update the field - which may > be a > fairly long period of time. Not sure if we can draw any other > benefit > from 'weak' here? > > Another *possible* optimization (while we're at it) is to check > if GC > is actually active (HAS_FORWARDED, see LRB impl) and avoid the > whole > decoding of failure-witness business. This may be something to > keep in > mind should we ever find out that this code affects us outside of > GC > cycles. > > Roman > > On Thu, 2020-07-02 at 00:20 +0200, Roman Kennke wrote: > > Hi Kelvin, > > > > I had something similar in mind with x86, but Aleksey stopped > me :-) > > It > > may be worth noting that we need a maximum of 3 consequtive > CASes to > > cover all cases, it is not necessary to make a loop: > > > > 1. Fast-path CAS#1 > > If that fails, it *may* be because of value-in-memory being a > from- > > space-ref. Check that and do: > > 2. CAS#2 with expected = previous from CAS#1 > > If that fails, it may be because a competing thread just wrote > a to- > > space-ref of our expected object (i.e. our *original* expected > > value), > > try again: > > 3. CAS#3 with expected = previous from CAS#2 (== original > expected) > > at that point the CAS cannot fail because of false negative > because > > of > > to-space-invariant > > > > Correct me if I am wrong here! > > > > Ah I see you optimized for smaller code there: > > + // It is extremely rare we reach this point. For this > reason, the > > + // implementation opts for smaller rather than potentially > faster > > + // code. Ultimately, smaller code for this rare case most > likely > > + // delivers higher overall throughput by enabling improved > icache > > + // performance. > > Good then. > > > > Some comments on the patch: > > > > // It is required that addr represent a > > +// memory location at which a non-null reference value is > stored and > > that > > +// expected holds a non-null reference value. > > > > Is this true? I believe the memory location may hold NULL. The > > expected > > value may hold NULL too, in which case we can skip the barrier. > The > > compiler optimizes the case where it *knows statically* that > expected > > == NULL to not emit the barrier. We also should have run-time > checks > > for these situations that skip to 'done' on first CAS-failure > with > > NULL > > for the case when compiler cannot determine statically NULL, or > > alternatively don't use the _not_null() variants of > encode/decode > > methods. > > > > +// weak: This relaxes the "strong" property so that CAS is > > allowed > > +// to fail even when the expected value is present > in > > memory. > > +// This is useful for load-with-lock, store- > conditional > > +// loops where certain failures require retries. If > weak > > is > > +// enabled, it is ok to return failure rather than > > retrying. > > > > If we are certain that the meaning of 'weak' allows us to skip > the > > barrier, we can change/remove our corresponding implementation > of the > > weak paths to emit a plain CAS. Right? > > > > + // Try to CAS with given arguments using LL/SC pair. If > > successful, > > + // then we are done. > > > > LL/SC pair is not (generally) true anymore with this updated > patch. > > > > Other than that, I like the impl. > > > > I'd actually like to see a similar implementation in x86, I > believe > > it > > should be slightly more efficient. However, I'm not sure it's > really > > worth - I don't think I've ever seen this code path very hot > > anywhere. > > We'd need to convince Aleksey though - this code has undergone > a > > couple > > of changes already and it's basically always caused unforeseen > > troubles > > (weird register clashes, extremely rare corner cases like the > above > > mentioned 3rd CAS, etc) > > > > Thank you! > > Roman > > > > > > On Tue, 2020-06-30 at 22:39 +0000, Nilsen, Kelvin wrote: > > > Thank you for feedback from previously distributed draft > > > patch. This > > > new patch is similar to the patch distributed on June > 24. However, > > > this version uses MacroAssembler::cmpxchng() instead of hard- > coding > > > the use of ldxr/stxr instructions. > > > > > > See > http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.02/ > > > > > > This patch addresses the problem described in > > > https://bugs.openjdk.java.net/browse/JDK-8232782 > > > > > > The implementation mimics the behavior of the recently > revised x86 > > > implementation of cmpxchg_oop with slight refinements: > > > > > > X86 version: > > > Step 1: Try CAS > > > Step 2: if CAS fails, check if original memory holds > equivalent > > > from- > > > space pointer > > > Step 3: Use CAS to overwrite memory with equivalent to-space > > > pointer > > > Step 4: Try CAS again > > > Step 5: Return boolean result to indicate success or failure > > > > > > AARCH64 version: > > > Step 1: Try CAS > > > Step 2: if CAS fails, check if original memory holds > equivalent > > > from- > > > space pointer > > > Step 3 (differs): Do not overwrite memory with equivalent to- > space > > > pointer, Instead, run the original CAS request with from- > space > > > pointer as the "expected" value. If this succeeds, we're > done. If > > > this fails, go back to step 1 and try that again. > > > > > > Step 5: Return boolean result to indicate success or failure > > > > > > This patch satisfies tier1, tier2, and hotspot_gc_shenandoah > > > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023- > aws > > > aarch64). I have also run an "extreme" garbage collection > workload > > > for 20 minutes without problem. > > > > > > Is this ok to merge? > > > > > > > > > > > From gnu.andrew at redhat.com Thu Jul 2 12:19:39 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Thu, 2 Jul 2020 13:19:39 +0100 Subject: [RFR] [8u] 8u262-b08 Upstream Sync Message-ID: <0c90e0dd-21ce-3743-fa26-df11c7affdd3@redhat.com> Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/ Merge changesets: http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/corba/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/jaxp/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/jaxws/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/jdk/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/hotspot/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/langtools/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/nashorn/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/root/merge.changeset Changes in aarch64-shenandoah-jdk8u262-b08: - S8220293: Deadlock in JFR string pool - S8225068: Remove DocuSign root certificate that is expiring in May 2020 - S8225069: Remove Comodo root certificate that is expiring in May 2020 Main issues of note: None, clean merge. diffstat for root b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for corba b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jaxp b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jaxws b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for langtools b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for nashorn b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jdk a/make/data/cacerts/addtrustclass1ca | 31 --------------------- a/make/data/cacerts/keynectisrootca | 30 -------------------- b/.hgtags | 1 b/test/sun/security/lib/cacerts/VerifyCACerts.java | 14 ++------- 4 files changed, 4 insertions(+), 72 deletions(-) diffstat for hotspot b/.hgtags | 1 b/src/share/vm/jfr/recorder/checkpoint/jfrCheckpointManager.cpp | 4 b/src/share/vm/jfr/recorder/storage/jfrBuffer.cpp | 15 - b/src/share/vm/jfr/recorder/storage/jfrBuffer.hpp | 5 b/src/share/vm/jfr/recorder/storage/jfrMemorySpace.inline.hpp | 22 +- b/src/share/vm/jfr/recorder/storage/jfrStorage.cpp | 9 b/src/share/vm/jfr/recorder/storage/jfrStorageUtils.hpp | 10 - b/src/share/vm/jfr/recorder/storage/jfrStorageUtils.inline.hpp | 23 ++ b/src/share/vm/jfr/recorder/stringpool/jfrStringPool.cpp | 91 ++++------ b/src/share/vm/jfr/recorder/stringpool/jfrStringPoolBuffer.cpp | 8 10 files changed, 103 insertions(+), 85 deletions(-) Successfully built on x86, x86_64, s390, s390x, ppc, ppc64, ppc64le & aarch64. Ok to push? Thanks, -- Andrew :) Senior Free Java Software Engineer Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From kdnilsen at amazon.com Thu Jul 2 15:05:14 2020 From: kdnilsen at amazon.com (Nilsen, Kelvin) Date: Thu, 2 Jul 2020 15:05:14 +0000 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 2) In-Reply-To: References: <848c86ff456895f2fe4cbebeda7d0926e4386a84.camel@redhat.com>

Message-ID: Thank you very much. This is very helpful. I see several options regarding how to handle the weak argument: 1. As you suggest, remove the argument from this function and fix up all the call points. The x86 version does not have it for example. 2. Allow the argument, but ignore its value. Always behave as if !weak. 3. Modify my draft patch so that I distinguish between the cause of cmpxchg failure if weak. This would involve conditional branching. If EQ flag is on, indicate success, even if weak. But if EQ flag is off, I need to check the witness value. If weak and the witness value equals expected, then cmpxchg failed due to loss of lock so we terminate early weak failure. But if the witness value does not equal expected, then this failure requires that I proceed to the "next step". 4. A fourth option generalizes on the third. The "problem" with the third option is that I introduce new code outside cmpxchg to rediscover what cmpxchg already knows. If I were to in-line the behavior of cmpxchg, I could adjust the branch destinations so that I don't have to rediscover the lost information based on new condition tests. 5. Or, maybe there already exists a subsequent peephole optimization pass that achieves the effect of solution 4 with code generated according to solution 3. I'm still very new to HotSpot architecture so am not familiar with how all the pieces fit together. Do you have any further suggestions regarding which option is preferred? ?On 7/2/20, 1:50 AM, "Roman Kennke" wrote: Hi Kelvin, > Thanks for the careful review. I'll make another round with it. > > I wasn?t entirely sure what the preconditions for this function were > so I documented my best guess. Good thing you reviewed my comments > carefully because apparently I guessed wrong. > > Regarding use of the weak Boolean argument, it was not clear to me > why a caller would want to specify weak, but I assumed if they did > request it, I should go ahead and honor it. Are you suggesting I > should ignore the value of the weak argument and always behave as if > the caller had requested not weak? That is a good question. Let me answer the next one first, maybe it becomes clearer then. > Also, wil you check my understanding further? I believe this > service (cmpxchg_oop) is used primarily to resolve races between > background GC threads and mutator threads. There is no need to need > for the JVM to resolve races between multiple mutator threads. That > is a programmer responsibility. The reason for cmpxchg_oop is to > resolve the following two race conditions: > > 1. A mutator thread is overwriting a field that the GC may be > overwriting in parallel. The mutator needs to use this protocol to > overwrite pointers in order to prevent the following race: > i. GC fetches from-space pointer at addr > ii. GC computes equivalent to-space pointer > iii. Mutator overwrites pointer at addr with new_val > iv. GC overwrites pointer at addr with to-space replacement of > original from-space value, incorrectly clobbering the mutation! > The GC thread uses cmpxchg_oop to abandon its overwrite attempt > in case the value has changed since it began its effort. > That is not quite right. That race is solved by the GC thread using a simple CAS. See here: https://hg.openjdk.java.net/jdk/jdk/file/d886e752a7b0/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp#l180 > 2. A mutator thread fetches a pointer from memory and the LRB > discovers that the pointer refers to from-space memory. After > determining the to-space pointer that represents the new location of > the object originally referenced by the fetched from-space pointer, > the mutator attempts to heal the value held in memory by overwriting > that memory with the to-space equivalent. The healing effort must > use cmpxchg_oop to resolve the exact same race condition that might > occur when a background GC thread is healing a from-space pointer > residing in memory. > That race is also solved by a simple CAS: https://hg.openjdk.java.net/jdk/jdk/file/d886e752a7b0/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp#l73 > In both of the above cases, we know that GC is active and both > expected and new_val are not equal to null. We also know for both of > these cases, by the way, that expected refers to from-space, so maybe > our implementation of cmpxchg_oop should really be starting with step > 2! > > Can you help me understand the other situations in which cmpxchg_oop > will be called, under which expected and new_val may equal null? The assembler cmpxchg_oop() routine is only used by the C1 and C2 compilers to implement the intrinsics for Unsafe.compareAndSwapReference() and the whole family of related methods. There is also a weakCompareAndSwapReference(), which is where the weak variant comes into play. The problem that it is trying to solve is the false negative that arises when the value in memory is a from-space reference, but the expected value is a to-space reference. In this case, a simple CAS would fail, even though it shouldn't. An additional problem is that, when we retry the CAS with the from-space ref, another thread (mutator or GC) may update the field to now point to the to-space ref of the same object, in which case the 2nd CAS would fail too, and we need to retry a 3rd time. That 3rd case is very rare. It is entirely possible to see NULL refs there, both in-memory and also for the expected-value. The compiler can optimize the case when it can statically determine that the expected-value is NULL, and generate a simple CAS then, but NULLs can still happen at runtime, and we need to handle this, either by explicit short-cut or by dealing with it when resolving the object (that is what we currently do). Now, about the weak property. I am really not sure what to do. Technically we'd be allowed to produce false negatives with weak-CAS, but it is not intended to consistently produce false negatives repeatedly. It basically caters for the platforms that can use LL/SC pair which may fail sporadically and would only ever be used in a loop. If we'd do weak-CAS as simple CAS without our protocol, that would be legal, and should work, but it would send mutator threads in fairly long retry-loops until another thread heals the reference. OTOH, our protocol already does the retry-loop and turns any weak-CAS into a strong-CAS. So we can just as well ignore (or remove) the weak argument. Does it clarify the situation for you? Roman > Thanks. > > > > On 7/1/20, 3:56 PM, "Roman Kennke" wrote: > > CAUTION: This email originated from outside of the organization. > Do not click links or open attachments unless you can confirm the > sender and know the content is safe. > > > > Ah now I remember why we want to do the CAS barrier even for > weak-CAS: > while it is technically true that calling code must be prepared > to deal > with occasional spurious failures, we probably don't want it to > get > stuck in retry-loop until GC gets to update the field - which may > be a > fairly long period of time. Not sure if we can draw any other > benefit > from 'weak' here? > > Another *possible* optimization (while we're at it) is to check > if GC > is actually active (HAS_FORWARDED, see LRB impl) and avoid the > whole > decoding of failure-witness business. This may be something to > keep in > mind should we ever find out that this code affects us outside of > GC > cycles. > > Roman > > On Thu, 2020-07-02 at 00:20 +0200, Roman Kennke wrote: > > Hi Kelvin, > > > > I had something similar in mind with x86, but Aleksey stopped > me :-) > > It > > may be worth noting that we need a maximum of 3 consequtive > CASes to > > cover all cases, it is not necessary to make a loop: > > > > 1. Fast-path CAS#1 > > If that fails, it *may* be because of value-in-memory being a > from- > > space-ref. Check that and do: > > 2. CAS#2 with expected = previous from CAS#1 > > If that fails, it may be because a competing thread just wrote > a to- > > space-ref of our expected object (i.e. our *original* expected > > value), > > try again: > > 3. CAS#3 with expected = previous from CAS#2 (== original > expected) > > at that point the CAS cannot fail because of false negative > because > > of > > to-space-invariant > > > > Correct me if I am wrong here! > > > > Ah I see you optimized for smaller code there: > > + // It is extremely rare we reach this point. For this > reason, the > > + // implementation opts for smaller rather than potentially > faster > > + // code. Ultimately, smaller code for this rare case most > likely > > + // delivers higher overall throughput by enabling improved > icache > > + // performance. > > Good then. > > > > Some comments on the patch: > > > > // It is required that addr represent a > > +// memory location at which a non-null reference value is > stored and > > that > > +// expected holds a non-null reference value. > > > > Is this true? I believe the memory location may hold NULL. The > > expected > > value may hold NULL too, in which case we can skip the barrier. > The > > compiler optimizes the case where it *knows statically* that > expected > > == NULL to not emit the barrier. We also should have run-time > checks > > for these situations that skip to 'done' on first CAS-failure > with > > NULL > > for the case when compiler cannot determine statically NULL, or > > alternatively don't use the _not_null() variants of > encode/decode > > methods. > > > > +// weak: This relaxes the "strong" property so that CAS is > > allowed > > +// to fail even when the expected value is present > in > > memory. > > +// This is useful for load-with-lock, store- > conditional > > +// loops where certain failures require retries. If > weak > > is > > +// enabled, it is ok to return failure rather than > > retrying. > > > > If we are certain that the meaning of 'weak' allows us to skip > the > > barrier, we can change/remove our corresponding implementation > of the > > weak paths to emit a plain CAS. Right? > > > > + // Try to CAS with given arguments using LL/SC pair. If > > successful, > > + // then we are done. > > > > LL/SC pair is not (generally) true anymore with this updated > patch. > > > > Other than that, I like the impl. > > > > I'd actually like to see a similar implementation in x86, I > believe > > it > > should be slightly more efficient. However, I'm not sure it's > really > > worth - I don't think I've ever seen this code path very hot > > anywhere. > > We'd need to convince Aleksey though - this code has undergone > a > > couple > > of changes already and it's basically always caused unforeseen > > troubles > > (weird register clashes, extremely rare corner cases like the > above > > mentioned 3rd CAS, etc) > > > > Thank you! > > Roman > > > > > > On Tue, 2020-06-30 at 22:39 +0000, Nilsen, Kelvin wrote: > > > Thank you for feedback from previously distributed draft > > > patch. This > > > new patch is similar to the patch distributed on June > 24. However, > > > this version uses MacroAssembler::cmpxchng() instead of hard- > coding > > > the use of ldxr/stxr instructions. > > > > > > See > http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.02/ > > > > > > This patch addresses the problem described in > > > https://bugs.openjdk.java.net/browse/JDK-8232782 > > > > > > The implementation mimics the behavior of the recently > revised x86 > > > implementation of cmpxchg_oop with slight refinements: > > > > > > X86 version: > > > Step 1: Try CAS > > > Step 2: if CAS fails, check if original memory holds > equivalent > > > from- > > > space pointer > > > Step 3: Use CAS to overwrite memory with equivalent to-space > > > pointer > > > Step 4: Try CAS again > > > Step 5: Return boolean result to indicate success or failure > > > > > > AARCH64 version: > > > Step 1: Try CAS > > > Step 2: if CAS fails, check if original memory holds > equivalent > > > from- > > > space pointer > > > Step 3 (differs): Do not overwrite memory with equivalent to- > space > > > pointer, Instead, run the original CAS request with from- > space > > > pointer as the "expected" value. If this succeeds, we're > done. If > > > this fails, go back to step 1 and try that again. > > > > > > Step 5: Return boolean result to indicate success or failure > > > > > > This patch satisfies tier1, tier2, and hotspot_gc_shenandoah > > > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023- > aws > > > aarch64). I have also run an "extreme" garbage collection > workload > > > for 20 minutes without problem. > > > > > > Is this ok to merge? > > > > > > > > > > > From rkennke at redhat.com Thu Jul 2 15:34:31 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 02 Jul 2020 17:34:31 +0200 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 2) In-Reply-To: References: <848c86ff456895f2fe4cbebeda7d0926e4386a84.camel@redhat.com>

Message-ID: <9fe3fbe5cf193e3c9320550e91ab469dd392f36f.camel@redhat.com> Hi Kelvin, > 1. As you suggest, remove the argument from this function and fix up > all the call points. The x86 version does not have it for example. Yeah, x86 doesn't have it because there's no weak CAS in x86 hardware. > 2. Allow the argument, but ignore its value. Always behave as if > !weak. Both caller and the implementation are 'ours', we can just as well get rid of the argument. > 3. Modify my draft patch so that I distinguish between the cause of > cmpxchg failure if weak. This would involve conditional branching. Eww, I doubt that this brings any benefit. > 4. A fourth option generalizes on the third. The "problem" with the > third option is that I introduce new code outside cmpxchg to > rediscover what cmpxchg already knows. If I were to in-line the > behavior of cmpxchg, I could adjust the branch destinations so that I > don't have to rediscover the lost information based on new condition > tests. > 5. Or, maybe there already exists a subsequent peephole optimization > pass that achieves the effect of solution 4 with code generated > according to solution 3. I'm still very new to HotSpot architecture > so am not familiar with how all the pieces fit together. > I think 4 + 5 would require that we have any control over the caller of weakCompareAndSwap(), which we haven't. This is Java code that is compiled to IR. I'd say we have no easy way to get any value out of weak, let's just ignore it, treat it as strong CAS, and remove the argument altogether. There is one additional wrinkle afaict: MacroAssembler::cmpxchg() has a retry loop itself, for the case when it sees !weak and !UseLSE, maybe we can make a tiny optimization there to avoid that loop and handle it ourselves? Not sure it would be worth the troubles, though. Roman > Do you have any further suggestions regarding which option is > preferred? > > ?On 7/2/20, 1:50 AM, "Roman Kennke" wrote: > > Hi Kelvin, > > > Thanks for the careful review. I'll make another round with > it. > > > > I wasn?t entirely sure what the preconditions for this function > were > > so I documented my best guess. Good thing you reviewed my > comments > > carefully because apparently I guessed wrong. > > > > Regarding use of the weak Boolean argument, it was not clear to > me > > why a caller would want to specify weak, but I assumed if they > did > > request it, I should go ahead and honor it. Are you suggesting > I > > should ignore the value of the weak argument and always behave > as if > > the caller had requested not weak? > > That is a good question. Let me answer the next one first, maybe > it > becomes clearer then. > > > Also, wil you check my understanding further? I believe this > > service (cmpxchg_oop) is used primarily to resolve races > between > > background GC threads and mutator threads. There is no need to > need > > for the JVM to resolve races between multiple mutator > threads. That > > is a programmer responsibility. The reason for cmpxchg_oop is > to > > resolve the following two race conditions: > > > > 1. A mutator thread is overwriting a field that the GC may be > > overwriting in parallel. The mutator needs to use this protocol > to > > overwrite pointers in order to prevent the following race: > > i. GC fetches from-space pointer at addr > > ii. GC computes equivalent to-space pointer > > iii. Mutator overwrites pointer at addr with new_val > > iv. GC overwrites pointer at addr with to-space > replacement of > > original from-space value, incorrectly clobbering the mutation! > > The GC thread uses cmpxchg_oop to abandon its overwrite > attempt > > in case the value has changed since it began its effort. > > > > That is not quite right. That race is solved by the GC thread > using a > simple CAS. See here: > > > https://hg.openjdk.java.net/jdk/jdk/file/d886e752a7b0/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp#l180 > > > 2. A mutator thread fetches a pointer from memory and the LRB > > discovers that the pointer refers to from-space memory. After > > determining the to-space pointer that represents the new > location of > > the object originally referenced by the fetched from-space > pointer, > > the mutator attempts to heal the value held in memory by > overwriting > > that memory with the to-space equivalent. The healing effort > must > > use cmpxchg_oop to resolve the exact same race condition that > might > > occur when a background GC thread is healing a from-space > pointer > > residing in memory. > > > > That race is also solved by a simple CAS: > > https://hg.openjdk.java.net/jdk/jdk/file/d886e752a7b0/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp#l73 > > > In both of the above cases, we know that GC is active and both > > expected and new_val are not equal to null. We also know for > both of > > these cases, by the way, that expected refers to from-space, so > maybe > > our implementation of cmpxchg_oop should really be starting > with step > > 2! > > > > Can you help me understand the other situations in which > cmpxchg_oop > > will be called, under which expected and new_val may equal > null? > > The assembler cmpxchg_oop() routine is only used by the C1 and C2 > compilers to implement the intrinsics for > Unsafe.compareAndSwapReference() and the whole family of related > methods. There is also a weakCompareAndSwapReference(), which is > where > the weak variant comes into play. The problem that it is trying > to > solve is the false negative that arises when the value in memory > is a > from-space reference, but the expected value is a to-space > reference. > In this case, a simple CAS would fail, even though it shouldn't. > An > additional problem is that, when we retry the CAS with the from- > space > ref, another thread (mutator or GC) may update the field to now > point > to the to-space ref of the same object, in which case the 2nd CAS > would > fail too, and we need to retry a 3rd time. That 3rd case is very > rare. > > It is entirely possible to see NULL refs there, both in-memory > and also > for the expected-value. The compiler can optimize the case when > it can > statically determine that the expected-value is NULL, and > generate a > simple CAS then, but NULLs can still happen at runtime, and we > need to > handle this, either by explicit short-cut or by dealing with it > when > resolving the object (that is what we currently do). > > Now, about the weak property. I am really not sure what to do. > Technically we'd be allowed to produce false negatives with weak- > CAS, > but it is not intended to consistently produce false negatives > repeatedly. It basically caters for the platforms that can use > LL/SC > pair which may fail sporadically and would only ever be used in a > loop. > If we'd do weak-CAS as simple CAS without our protocol, that > would be > legal, and should work, but it would send mutator threads in > fairly > long retry-loops until another thread heals the reference. OTOH, > our > protocol already does the retry-loop and turns any weak-CAS into > a > strong-CAS. So we can just as well ignore (or remove) the weak > argument. > > Does it clarify the situation for you? > Roman > > > > Thanks. > > > > > > > > On 7/1/20, 3:56 PM, "Roman Kennke" wrote: > > > > CAUTION: This email originated from outside of the > organization. > > Do not click links or open attachments unless you can confirm > the > > sender and know the content is safe. > > > > > > > > Ah now I remember why we want to do the CAS barrier even > for > > weak-CAS: > > while it is technically true that calling code must be > prepared > > to deal > > with occasional spurious failures, we probably don't want > it to > > get > > stuck in retry-loop until GC gets to update the field - > which may > > be a > > fairly long period of time. Not sure if we can draw any > other > > benefit > > from 'weak' here? > > > > Another *possible* optimization (while we're at it) is to > check > > if GC > > is actually active (HAS_FORWARDED, see LRB impl) and avoid > the > > whole > > decoding of failure-witness business. This may be something > to > > keep in > > mind should we ever find out that this code affects us > outside of > > GC > > cycles. > > > > Roman > > > > On Thu, 2020-07-02 at 00:20 +0200, Roman Kennke wrote: > > > Hi Kelvin, > > > > > > I had something similar in mind with x86, but Aleksey > stopped > > me :-) > > > It > > > may be worth noting that we need a maximum of 3 > consequtive > > CASes to > > > cover all cases, it is not necessary to make a loop: > > > > > > 1. Fast-path CAS#1 > > > If that fails, it *may* be because of value-in-memory > being a > > from- > > > space-ref. Check that and do: > > > 2. CAS#2 with expected = previous from CAS#1 > > > If that fails, it may be because a competing thread just > wrote > > a to- > > > space-ref of our expected object (i.e. our *original* > expected > > > value), > > > try again: > > > 3. CAS#3 with expected = previous from CAS#2 (== original > > expected) > > > at that point the CAS cannot fail because of false > negative > > because > > > of > > > to-space-invariant > > > > > > Correct me if I am wrong here! > > > > > > Ah I see you optimized for smaller code there: > > > + // It is extremely rare we reach this point. For this > > reason, the > > > + // implementation opts for smaller rather than > potentially > > faster > > > + // code. Ultimately, smaller code for this rare case > most > > likely > > > + // delivers higher overall throughput by enabling > improved > > icache > > > + // performance. > > > Good then. > > > > > > Some comments on the patch: > > > > > > // It is required that addr represent a > > > +// memory location at which a non-null reference value > is > > stored and > > > that > > > +// expected holds a non-null reference value. > > > > > > Is this true? I believe the memory location may hold > NULL. The > > > expected > > > value may hold NULL too, in which case we can skip the > barrier. > > The > > > compiler optimizes the case where it *knows statically* > that > > expected > > > == NULL to not emit the barrier. We also should have run- > time > > checks > > > for these situations that skip to 'done' on first CAS- > failure > > with > > > NULL > > > for the case when compiler cannot determine statically > NULL, or > > > alternatively don't use the _not_null() variants of > > encode/decode > > > methods. > > > > > > +// weak: This relaxes the "strong" property so that > CAS is > > > allowed > > > +// to fail even when the expected value is > present > > in > > > memory. > > > +// This is useful for load-with-lock, store- > > conditional > > > +// loops where certain failures require > retries. If > > weak > > > is > > > +// enabled, it is ok to return failure rather > than > > > retrying. > > > > > > If we are certain that the meaning of 'weak' allows us to > skip > > the > > > barrier, we can change/remove our corresponding > implementation > > of the > > > weak paths to emit a plain CAS. Right? > > > > > > + // Try to CAS with given arguments using LL/SC > pair. If > > > successful, > > > + // then we are done. > > > > > > LL/SC pair is not (generally) true anymore with this > updated > > patch. > > > > > > Other than that, I like the impl. > > > > > > I'd actually like to see a similar implementation in x86, > I > > believe > > > it > > > should be slightly more efficient. However, I'm not sure > it's > > really > > > worth - I don't think I've ever seen this code path very > hot > > > anywhere. > > > We'd need to convince Aleksey though - this code has > undergone > > a > > > couple > > > of changes already and it's basically always caused > unforeseen > > > troubles > > > (weird register clashes, extremely rare corner cases like > the > > above > > > mentioned 3rd CAS, etc) > > > > > > Thank you! > > > Roman > > > > > > > > > On Tue, 2020-06-30 at 22:39 +0000, Nilsen, Kelvin wrote: > > > > Thank you for feedback from previously distributed > draft > > > > patch. This > > > > new patch is similar to the patch distributed on June > > 24. However, > > > > this version uses MacroAssembler::cmpxchng() instead of > hard- > > coding > > > > the use of ldxr/stxr instructions. > > > > > > > > See > > http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.02/ > > > > > > > > This patch addresses the problem described in > > > > https://bugs.openjdk.java.net/browse/JDK-8232782 > > > > > > > > The implementation mimics the behavior of the recently > > revised x86 > > > > implementation of cmpxchg_oop with slight refinements: > > > > > > > > X86 version: > > > > Step 1: Try CAS > > > > Step 2: if CAS fails, check if original memory holds > > equivalent > > > > from- > > > > space pointer > > > > Step 3: Use CAS to overwrite memory with equivalent to- > space > > > > pointer > > > > Step 4: Try CAS again > > > > Step 5: Return boolean result to indicate success or > failure > > > > > > > > AARCH64 version: > > > > Step 1: Try CAS > > > > Step 2: if CAS fails, check if original memory holds > > equivalent > > > > from- > > > > space pointer > > > > Step 3 (differs): Do not overwrite memory with > equivalent to- > > space > > > > pointer, Instead, run the original CAS request with > from- > > space > > > > pointer as the "expected" value. If this succeeds, > we're > > done. If > > > > this fails, go back to step 1 and try that again. > > > > > > > > Step 5: Return boolean result to indicate success or > failure > > > > > > > > This patch satisfies tier1, tier2, and > hotspot_gc_shenandoah > > > > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux > 5.3.0-1023- > > aws > > > > aarch64). I have also run an "extreme" garbage > collection > > workload > > > > for 20 minutes without problem. > > > > > > > > Is this ok to merge? > > > > > > > > > > > > > > > > > > From rkennke at redhat.com Thu Jul 2 21:03:49 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Thu, 02 Jul 2020 23:03:49 +0200 Subject: RFR: JDK-8248725: Shenandoah: Claim verifier thread roots for parallel processing Message-ID: <72d487f1cd592e36d0adca9827af1a93c5aa76d8.camel@redhat.com> We see occasional crashes during CI testing: # Internal Error (/home/jenkins/workspace/nightly/jdk- jdk/src/hotspot/cpu/x86/frame_x86.cpp:684), pid=25982, tid=26009 # assert(_last_Java_pc == NULL) failed: already walkable This seems to be caused by calls to Threads::possibly_parallel_oops_do() with par=false even though it's called by multiple workers. This leads to threads being visited by more than one worker, and thus trip this assert. Bug: https://bugs.openjdk.java.net/browse/JDK-8248725 Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8248725/ Testing: multiple runs of hotspot_gc_shenandoah (which exposed the bug before) Is the right way to push this to: http://hg.openjdk.java.net/jdk/jdk15/ ? Roman From shade at redhat.com Fri Jul 3 04:34:28 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 3 Jul 2020 06:34:28 +0200 Subject: RFR: JDK-8248725: Shenandoah: Claim verifier thread roots for parallel processing In-Reply-To: <72d487f1cd592e36d0adca9827af1a93c5aa76d8.camel@redhat.com> References: <72d487f1cd592e36d0adca9827af1a93c5aa76d8.camel@redhat.com> Message-ID: On 7/2/20 11:03 PM, rkennke at redhat.com wrote: > Bug: > https://bugs.openjdk.java.net/browse/JDK-8248725 > Webrev: > http://cr.openjdk.java.net/~rkennke/JDK-8248725/ Looks fine. > Is the right way to push this to: http://hg.openjdk.java.net/jdk/jdk15/ > ? Yes, I think so. -- Thanks, -Aleksey From shade at redhat.com Fri Jul 3 04:37:31 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 3 Jul 2020 06:37:31 +0200 Subject: [RFR] [8u] 8u262-b08 Upstream Sync In-Reply-To: <0c90e0dd-21ce-3743-fa26-df11c7affdd3@redhat.com> References: <0c90e0dd-21ce-3743-fa26-df11c7affdd3@redhat.com> Message-ID: <6f05ad79-dd17-1db6-c8f2-f11f04016724@redhat.com> On 7/2/20 2:19 PM, Andrew Hughes wrote: > Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/ > > Merge changesets: > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/corba/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/jaxp/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/jaxws/merge.changeset Look trivially good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/jdk/merge.changeset Looks good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/hotspot/merge.changeset Looks good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/langtools/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/nashorn/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b08/root/merge.changeset Looks trivially good. > Ok to push? Yes. -- Thanks, -Aleksey From zgu at redhat.com Fri Jul 3 10:48:27 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Fri, 3 Jul 2020 06:48:27 -0400 Subject: RFR: JDK-8248725: Shenandoah: Claim verifier thread roots for parallel processing In-Reply-To: <72d487f1cd592e36d0adca9827af1a93c5aa76d8.camel@redhat.com> References: <72d487f1cd592e36d0adca9827af1a93c5aa76d8.camel@redhat.com> Message-ID: <8a5aa0d7-96f8-fcad-533e-099f27403532@redhat.com> On 7/2/20 5:03 PM, rkennke at redhat.com wrote: > We see occasional crashes during CI testing: > > # Internal Error (/home/jenkins/workspace/nightly/jdk- > jdk/src/hotspot/cpu/x86/frame_x86.cpp:684), pid=25982, tid=26009 > # assert(_last_Java_pc == NULL) failed: already walkable > > This seems to be caused by calls to > Threads::possibly_parallel_oops_do() with par=false even though it's > called by multiple workers. This leads to threads being visited by more > than one worker, and thus trip this assert. You need to change thread claim token for parallel iteration in root verifier constructor. -Zhengyu > > Bug: > https://bugs.openjdk.java.net/browse/JDK-8248725 > Webrev: > http://cr.openjdk.java.net/~rkennke/JDK-8248725/ > > Testing: multiple runs of hotspot_gc_shenandoah (which exposed the bug > before) > > Is the right way to push this to: http://hg.openjdk.java.net/jdk/jdk15/ > ? > > Roman > From rkennke at redhat.com Fri Jul 3 12:11:50 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Fri, 03 Jul 2020 14:11:50 +0200 Subject: RFR: JDK-8248725: Shenandoah: Claim verifier thread roots for parallel processing In-Reply-To: <8a5aa0d7-96f8-fcad-533e-099f27403532@redhat.com> References: <72d487f1cd592e36d0adca9827af1a93c5aa76d8.camel@redhat.com> <8a5aa0d7-96f8-fcad-533e-099f27403532@redhat.com> Message-ID: <9b78c5e192955eaa374c8a325598592d638da037.camel@redhat.com> On Fri, 2020-07-03 at 06:48 -0400, Zhengyu Gu wrote: > > On 7/2/20 5:03 PM, rkennke at redhat.com wrote: > > We see occasional crashes during CI testing: > > > > # Internal Error (/home/jenkins/workspace/nightly/jdk- > > jdk/src/hotspot/cpu/x86/frame_x86.cpp:684), pid=25982, tid=26009 > > # assert(_last_Java_pc == NULL) failed: already walkable > > > > This seems to be caused by calls to > > Threads::possibly_parallel_oops_do() with par=false even though > > it's > > called by multiple workers. This leads to threads being visited by > > more > > than one worker, and thus trip this assert. > > You need to change thread claim token for parallel iteration in root > verifier constructor. Oh. I filed https://bugs.openjdk.java.net/browse/JDK-8248799 and I'm currently testing a fix. Thanks for pointing out! Roman > > -Zhengyu > > > Bug: > > https://bugs.openjdk.java.net/browse/JDK-8248725 > > Webrev: > > http://cr.openjdk.java.net/~rkennke/JDK-8248725/ > > > > Testing: multiple runs of hotspot_gc_shenandoah (which exposed the > > bug > > before) > > > > Is the right way to push this to: > > http://hg.openjdk.java.net/jdk/jdk15/ > > ? > > > > Roman > > From rkennke at redhat.com Fri Jul 3 12:34:06 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Fri, 03 Jul 2020 14:34:06 +0200 Subject: RFR(XS): 8248799: Shenandoah: Claim threads token in constructor of ShenandoahRootVerifier Message-ID: In JDK-8248725 I changed thread iteration in ShenandoahRootVerifier from serial to parallel, but we also need to change the thread-claim- token in order for it to work. Bug: https://bugs.openjdk.java.net/browse/JDK-8248799 Testing: hotspot_gc_shenandoah Fix: diff -r 505a6165d85a -r 8c64644e2619 src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp Fr i Jul 03 11:09:59 2020 +0200 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp Fri Jul 03 14:31:11 2020 +0200 @@ -46,6 +46,7 @@ STATIC_ASSERT((static_cast(ShenandoahRootVerifier::AllRoots) + 1) > static_cast(ShenandoahRootVerifier::AllRoots)); ShenandoahRootVerifier::ShenandoahRootVerifier(RootTypes types) : _types(types) { + Threads::change_thread_claim_token(); } void ShenandoahRootVerifier::excludes(RootTypes types) { From zgu at redhat.com Fri Jul 3 15:04:58 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Fri, 3 Jul 2020 11:04:58 -0400 Subject: RFR(XS): 8248799: Shenandoah: Claim threads token in constructor of ShenandoahRootVerifier In-Reply-To: References: Message-ID: <8A2E2F5D-F747-4685-8FEA-45B785F652FF@redhat.com> Looks good. -Zhengyu Sent from my iPad > On Jul 3, 2020, at 8:35 AM, rkennke at redhat.com wrote: > > ?In JDK-8248725 I changed thread iteration in ShenandoahRootVerifier > from serial to parallel, but we also need to change the thread-claim- > token in order for it to work. > > Bug: https://bugs.openjdk.java.net/browse/JDK-8248799 > Testing: hotspot_gc_shenandoah > > Fix: > > diff -r 505a6165d85a -r 8c64644e2619 > src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp > --- > a/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp Fr > i Jul 03 11:09:59 2020 +0200 > +++ > b/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp Fri > Jul 03 14:31:11 2020 +0200 > @@ -46,6 +46,7 @@ > STATIC_ASSERT((static_cast(ShenandoahRootVerifier::AllRoots) + > 1) > static_cast(ShenandoahRootVerifier::AllRoots)); > > ShenandoahRootVerifier::ShenandoahRootVerifier(RootTypes types) : > _types(types) { > + Threads::change_thread_claim_token(); > } > > void ShenandoahRootVerifier::excludes(RootTypes types) { > From gnu.andrew at redhat.com Fri Jul 3 17:46:45 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Fri, 3 Jul 2020 18:46:45 +0100 Subject: [RFR] [8u] 8u262-b09 Upstream Sync Message-ID: <0ba77aca-9c56-f253-8bda-e651f6e794af@redhat.com> Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/ Merge changesets: http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/corba/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jaxp/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jaxws/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jdk/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/hotspot/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/langtools/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/nashorn/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/root/merge.changeset Changes in aarch64-shenandoah-jdk8u262-b09: - S8248399: Build installs jfr binary when JFR is disabled Main issues of note: None, clean merge. diffstat for root b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for corba b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jaxp b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jaxws b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for langtools b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for nashorn b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jdk b/.hgtags | 1 + b/make/CompileLaunchers.gmk | 2 ++ 2 files changed, 3 insertions(+) diffstat for hotspot b/.hgtags | 1 + 1 file changed, 1 insertion(+) Successfully built on x86, x86_64, s390, s390x, ppc, ppc64, ppc64le & aarch64. Ok to push? Thanks, -- Andrew :) Senior Free Java Software Engineer Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From rkennke at redhat.com Fri Jul 3 22:06:00 2020 From: rkennke at redhat.com (Roman Kennke) Date: Sat, 04 Jul 2020 00:06:00 +0200 Subject: [RFR] [8u] 8u262-b09 Upstream Sync In-Reply-To: <0ba77aca-9c56-f253-8bda-e651f6e794af@redhat.com> References: <0ba77aca-9c56-f253-8bda-e651f6e794af@redhat.com> Message-ID: Hi Andrew, > Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/ > > Merge changesets: > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/corba/merge.changeset Trivially ok. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jaxp/merge.changeset Trivially ok. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jaxws/merge.changeset Trivially ok. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jdk/merge.changeset Ok. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/hotspot/merge.changeset Trivially ok. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/langtools/merge.changeset Trivially ok. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/nashorn/merge.changeset Trivially ok. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/root/merge.changeset Trivially ok. Go! Thanks, Roman From rkennke at redhat.com Mon Jul 6 15:40:05 2020 From: rkennke at redhat.com (Roman Kennke) Date: Mon, 06 Jul 2020 17:40:05 +0200 Subject: RFR: Refine ShenandoahEvacLockGranularity default and bounds check Message-ID: <9331aaf8bc70e588e195f717a4f8caf09a82df72.camel@redhat.com> I propose to raise the default of ShenandoahEvacLockGranularity to 6, this appears to be a sweet spot in almost all of the workloads that I've thrown at it. This patch also checks bounds of the option to avoid locks that span more than one region. Includes testcase. http://cr.openjdk.java.net/~rkennke/evac-lock-args/webrev.00/ Testing: hotspot_gc_shenandoah I intend to push this to shenandoah/jdk (if HG server lets me - it seems to be overdosed at the moment), then give it a few runs in CI, and if all is well, propose it for jdk/jdk. Roman From shade at redhat.com Mon Jul 6 17:56:00 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Mon, 6 Jul 2020 19:56:00 +0200 Subject: RFR: Refine ShenandoahEvacLockGranularity default and bounds check In-Reply-To: <9331aaf8bc70e588e195f717a4f8caf09a82df72.camel@redhat.com> References: <9331aaf8bc70e588e195f717a4f8caf09a82df72.camel@redhat.com> Message-ID: On 7/6/20 5:40 PM, Roman Kennke wrote: > http://cr.openjdk.java.net/~rkennke/evac-lock-args/webrev.00/ Couple of nits: *) Would you like to test -XX:ShenandoahEvacLockGranularity=0 as well, as long as you are testing the option boundaries? *) There is no need to declare "private static void test()", you might as well do it in "main()". *) Argument indent is a bit wrong, should be another space before "proper" at L618: 617 byte_size_in_proper_unit(locking_card_size), proper_unit_for_byte_size(locking_card_size), 618 byte_size_in_proper_unit(RegionSizeBytes), proper_unit_for_byte_size(RegionSizeBytes)); *) I think this message is too broad: 616 err_msg message("1 << (ShenandoahEvacLockGranularity + LogHeapWordSize) (" SIZE_FORMAT "%s) should be lower than region size (" SIZE_FORMAT "%s).", Suggestion: err_msg message("Evac locking card size (" SIZE_FORMAT "%s) should be lower than region size (" SIZE_FORMAT "%s).", No need for another review. -- Thanks, -Aleksey From rkennke at redhat.com Mon Jul 6 19:51:35 2020 From: rkennke at redhat.com (Roman Kennke) Date: Mon, 06 Jul 2020 21:51:35 +0200 Subject: RFR(sh/jdk11): Refactor/isolate critical pinning Message-ID: <230f5c7d89445563ab2c6924b01b4e659618a2be.camel@redhat.com> I'm running an effort to bring our upstream exposure vs jdk11u to (near) zero. Critical pinning support touches a few places in x86 assembly which are rather hairy, and should look much better when isolated such that builds without Shenandoah more obviously generate the same code as jdk11u upstream currently does. It moves the new method definitions to Shenandoah-files. It trades cleaner sharde code at the expense of slightly increased mess in Shenandoah files (I copied some helper methods there, seems harmless to me). The new inline code parts are now guarded with #if INCLUDE_SHENANDOAHGC and if (UseShenandoahGC) so are obviously guaranteed to not leak out into non-Shenandoah code. Webrev: http://cr.openjdk. java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.00/ Testing: hotspot_gc_shenandoah (x86_32, x86_64, builds with and without Shenandoah) Ok? Roman From kdnilsen at amazon.com Mon Jul 6 23:45:48 2020 From: kdnilsen at amazon.com (Nilsen, Kelvin) Date: Mon, 6 Jul 2020 23:45:48 +0000 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 3) Message-ID: Thank you for feedback from previously distributed draft patch. This new version of the patch differs from the June 30 version in the following regards: 1. I have removed the weak argument from the function, its prototype, and all invocation points. 2. I no longer assume that expected and the value held in memory are not NULL. See http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.04/ This patch addresses the problem described in https://bugs.openjdk.java.net/browse/JDK-8232782 The implementation mimics the behavior of the recently revised x86 implementation of cmpxchg_oop with slight refinements: X86 version: Step 1: Try CAS Step 2: if CAS fails, check if original memory holds equivalent from-space pointer Step 3: Use CAS to overwrite memory with equivalent to-space pointer Step 4: Try CAS again Step 5: Return boolean result to indicate success or failure AARCH64 version: Step 1: Try CAS Step 2: if CAS fails, check if original memory holds equivalent from-space pointer Step 3 (differs): Do not overwrite memory with equivalent to-space pointer, Instead, run the original CAS request with from-space pointer as the "expected" value. If this succeeds, we're done. If this fails, go back to step 1 and try that again. Step 5: Return boolean result to indicate success or failure This patch satisfies tier1, tier2, and hotspot_gc_shenandoah regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023-aws aarch64). I have also run an "extreme" garbage collection workload for 20 minutes without problem. Is this ok to merge? From rkennke at redhat.com Tue Jul 7 08:36:51 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 10:36:51 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate Shenandoah-specific aarch64 changes Message-ID: <24f36980da8360dd24b082f1ac3ff9ced905576a.camel@redhat.com> This isolates the remaining shared-code changes in aarch64 in shenandoah/jdk11. The intention is that when building without Shenandoah, it'd compile the exact same code as current upstream jdk11u, and that it is obvious from the vs-upstream-patch that it does. http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-aarch64/webrev.00/ Testing: hotspot_gc_shenandoah on aarch64, build without shenandoah. Good? Roman From rkennke at redhat.com Tue Jul 7 09:13:40 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 11:13:40 +0200 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 3) In-Reply-To: References: Message-ID: <8ba72d28425a2ef2b4efef6d036145ac30f68517.camel@redhat.com> Hi Kelvin, The patch looks good now! Thanks for doing this! I'll sponsor it. One note for future changes: for some reason, your webrev doesn't seem to carry the changeset metadata (your username, the commit message, etc). It'd be easier to sponsor if you could prepare that too. (Also, I've just checked https://openjdk.java.net/census#kdnilsen and I'm wondering why you're Shenandoah author, but not JDK author? I'll check this too) Thanks, Roman On Mon, 2020-07-06 at 23:45 +0000, Nilsen, Kelvin wrote: > Thank you for feedback from previously distributed draft patch. This > new version of the patch differs from the June 30 version in the > following regards: > > 1. I have removed the weak argument from the function, its prototype, > and all invocation points. > 2. I no longer assume that expected and the value held in memory are > not NULL. > > See http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.04/ > > This patch addresses the problem described in > https://bugs.openjdk.java.net/browse/JDK-8232782 > > The implementation mimics the behavior of the recently revised x86 > implementation of cmpxchg_oop with slight refinements: > > X86 version: > Step 1: Try CAS > Step 2: if CAS fails, check if original memory holds equivalent from- > space pointer > Step 3: Use CAS to overwrite memory with equivalent to-space pointer > Step 4: Try CAS again > Step 5: Return boolean result to indicate success or failure > > AARCH64 version: > Step 1: Try CAS > Step 2: if CAS fails, check if original memory holds equivalent from- > space pointer > Step 3 (differs): Do not overwrite memory with equivalent to-space > pointer, Instead, run the original CAS request with from-space > pointer as the "expected" value. If this succeeds, we're done. If > this fails, go back to step 1 and try that again. > > Step 5: Return boolean result to indicate success or failure > > This patch satisfies tier1, tier2, and hotspot_gc_shenandoah > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023-aws > aarch64). I have also run an "extreme" garbage collection workload > for 20 minutes without problem. > > Is this ok to merge? > > > From rkennke at redhat.com Tue Jul 7 09:42:41 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 11:42:41 +0200 Subject: RFR: 8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64) (version 3) In-Reply-To: References: Message-ID: <732e456fe0fe7d3db1e6e68d4b35b781829e3bba.camel@redhat.com> Pushed: https://hg.openjdk.java.net/jdk/jdk/rev/a0f6d9504107 One more suggestion: the jcheck script on the hg server complained about a couple of trailing whitespace and tabs. 1. Configure your editor to not use tabs (some can also eliminate trailing WS) 2. Install jcheck and run it locally before making a webrev: https://openjdk.java.net/projects/code-tools/jcheck/ Thanks again for working on this issue!! Roman On Mon, 2020-07-06 at 23:45 +0000, Nilsen, Kelvin wrote: > Thank you for feedback from previously distributed draft patch. This > new version of the patch differs from the June 30 version in the > following regards: > > 1. I have removed the weak argument from the function, its prototype, > and all invocation points. > 2. I no longer assume that expected and the value held in memory are > not NULL. > > See http://cr.openjdk.java.net/~kdnilsen/JDK-8232782/webrev.04/ > > This patch addresses the problem described in > https://bugs.openjdk.java.net/browse/JDK-8232782 > > The implementation mimics the behavior of the recently revised x86 > implementation of cmpxchg_oop with slight refinements: > > X86 version: > Step 1: Try CAS > Step 2: if CAS fails, check if original memory holds equivalent from- > space pointer > Step 3: Use CAS to overwrite memory with equivalent to-space pointer > Step 4: Try CAS again > Step 5: Return boolean result to indicate success or failure > > AARCH64 version: > Step 1: Try CAS > Step 2: if CAS fails, check if original memory holds equivalent from- > space pointer > Step 3 (differs): Do not overwrite memory with equivalent to-space > pointer, Instead, run the original CAS request with from-space > pointer as the "expected" value. If this succeeds, we're done. If > this fails, go back to step 1 and try that again. > > Step 5: Return boolean result to indicate success or failure > > This patch satisfies tier1, tier2, and hotspot_gc_shenandoah > regression tests on Ubuntu 18.04.4 LTS (GNU/Linux 5.3.0-1023-aws > aarch64). I have also run an "extreme" garbage collection workload > for 20 minutes without problem. > > Is this ok to merge? > > > From shade at redhat.com Tue Jul 7 12:03:06 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 7 Jul 2020 14:03:06 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate Shenandoah-specific aarch64 changes In-Reply-To: <24f36980da8360dd24b082f1ac3ff9ced905576a.camel@redhat.com> References: <24f36980da8360dd24b082f1ac3ff9ced905576a.camel@redhat.com> Message-ID: On 7/7/20 10:36 AM, Roman Kennke wrote: > This isolates the remaining shared-code changes in aarch64 in > shenandoah/jdk11. The intention is that when building without > Shenandoah, it'd compile the exact same code as current upstream > jdk11u, and that it is obvious from the vs-upstream-patch that it does. > > http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-aarch64/webrev.00/ This: 876 #if INCLUDE_SHENANDOAHGC 877 if (UseShenandoahGC) { 878 __ load_mirror(r10, rmethod); 879 __ stp(r10, zr, Address(sp, 4 * wordSize)); 880 } else { 881 #endif 882 __ load_mirror(rscratch1, rmethod); 883 __ stp(rscratch1, zr, Address(sp, 4 * wordSize)); 884 SHENANDOAHGC_ONLY(}) ...is probably just: #if INCLUDE_SHENANDOAHGC if (UseShenandoahGC) { __ load_mirror(r10, rmethod); __ stp(r10, zr, Address(sp, 4 * wordSize)); } else #else { __ load_mirror(rscratch1, rmethod); __ stp(rscratch1, zr, Address(sp, 4 * wordSize)); } #endif Otherwise looks good. -- Thanks, -Aleksey From rkennke at redhat.com Tue Jul 7 12:24:36 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 14:24:36 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate Shenandoah-specific aarch64 changes In-Reply-To: References: <24f36980da8360dd24b082f1ac3ff9ced905576a.camel@redhat.com> Message-ID: On Tue, 2020-07-07 at 14:03 +0200, Aleksey Shipilev wrote: > On 7/7/20 10:36 AM, Roman Kennke wrote: > > This isolates the remaining shared-code changes in aarch64 in > > shenandoah/jdk11. The intention is that when building without > > Shenandoah, it'd compile the exact same code as current upstream > > jdk11u, and that it is obvious from the vs-upstream-patch that it > > does. > > > > http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-aarch64/webrev.00/ > > This: > > 876 #if INCLUDE_SHENANDOAHGC > 877 if (UseShenandoahGC) { > 878 __ load_mirror(r10, rmethod); > 879 __ stp(r10, zr, Address(sp, 4 * wordSize)); > 880 } else { > 881 #endif > 882 __ load_mirror(rscratch1, rmethod); > 883 __ stp(rscratch1, zr, Address(sp, 4 * wordSize)); > 884 SHENANDOAHGC_ONLY(}) > > ...is probably just: > > #if INCLUDE_SHENANDOAHGC > if (UseShenandoahGC) { > __ load_mirror(r10, rmethod); > __ stp(r10, zr, Address(sp, 4 * wordSize)); > } else > #else > { > __ load_mirror(rscratch1, rmethod); > __ stp(rscratch1, zr, Address(sp, 4 * wordSize)); > } > #endif > > > Otherwise looks good. > Thanks! I pushed it with the suggested change. Roman From rkennke at redhat.com Tue Jul 7 13:00:10 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Tue, 07 Jul 2020 15:00:10 +0200 Subject: RFR(sh/jdk): Fix Windows build Message-ID: CI found a little type conversion hiccup in my last change to evac- locking. Also, one line is a duplicate. Testing: hotspot_gc_shenandoah (sorry, no Windows build-machine at hand) diff -r 1bb1b0d2acce src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp Mon Jul 06 22:43:04 2020 +0200 +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp Tue Jul 07 14:58:30 2020 +0200 @@ -610,9 +610,8 @@ MaxTLABSizeBytes = MaxTLABSizeWords * HeapWordSize; assert (MaxTLABSizeBytes > MinTLABSize, "should be larger"); - size_t locking_card_size = 1 << (ShenandoahEvacLockGranularity + LogHeapWordSize); + size_t locking_card_size = ((size_t)1) << (ShenandoahEvacLockGranularity + LogHeapWordSize); if (locking_card_size > RegionSizeBytes) { - size_t locking_card_size = 1 << (ShenandoahEvacLockGranularity + LogHeapWordSize); err_msg message("Evac locking card size (" SIZE_FORMAT "%s) should be lower than region size (" SIZE_FORMAT "%s).", byte_size_in_proper_unit(locking_card_size), proper_unit_for_byte_size(locking_card_size), byte_size_in_proper_unit(RegionSizeBytes), prope r_unit_for_byte_size(RegionSizeBytes)); [ From shade at redhat.com Tue Jul 7 13:06:23 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 7 Jul 2020 15:06:23 +0200 Subject: RFR(sh/jdk): Fix Windows build In-Reply-To: References: Message-ID: <4fa06172-773a-9f29-beac-351fa3e42fb7@redhat.com> On 7/7/20 3:00 PM, rkennke at redhat.com wrote: > CI found a little type conversion hiccup in my last change to evac- > locking. Also, one line is a duplicate. > > Testing: hotspot_gc_shenandoah (sorry, no Windows build-machine at > hand) > > diff -r 1bb1b0d2acce > src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp > --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp Mon Jul > 06 22:43:04 2020 +0200 > +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp Tue Jul > 07 14:58:30 2020 +0200 > @@ -610,9 +610,8 @@ > MaxTLABSizeBytes = MaxTLABSizeWords * HeapWordSize; > assert (MaxTLABSizeBytes > MinTLABSize, "should be larger"); > > - size_t locking_card_size = 1 << (ShenandoahEvacLockGranularity + > LogHeapWordSize); > + size_t locking_card_size = ((size_t)1) << > (ShenandoahEvacLockGranularity + LogHeapWordSize); > if (locking_card_size > RegionSizeBytes) { > - size_t locking_card_size = 1 << (ShenandoahEvacLockGranularity + > LogHeapWordSize); > err_msg message("Evac locking card size (" SIZE_FORMAT "%s) should > be lower than region size (" SIZE_FORMAT "%s).", > byte_size_in_proper_unit(locking_card_size), > proper_unit_for_byte_size(locking_card_size), > byte_size_in_proper_unit(RegionSizeBytes), prope > r_unit_for_byte_size(RegionSizeBytes)); > [ Looks fine. -- Thanks, -Aleksey From rkennke at redhat.com Tue Jul 7 13:06:39 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 15:06:39 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes Message-ID: Here comes a batch of isolating remaining simple Shenandoah-induced shared-code changes. The idea is that when building without Shenandoah GC should compile exactly as it would with current jdk11u. Webrev: http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-shared/webrev.00/ Testing: hotspot_gc_shenandoah, with/without Shenandoah, 32/64 bit. Ok? From rkennke at redhat.com Tue Jul 7 13:10:09 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 15:10:09 +0200 Subject: RFR(sh/jdk11): Shenandoah: Move markBitMap.* and parallelCleaning.* to shenandoah subdirectory Message-ID: <65784ae241a6ebd5193850225668082c543744e8.camel@redhat.com> This moves markBitMap.* and parallelCleaning.* into shenandoah subdirectory, so that they don't get compiled when Shenandoah is excluded from build. They are only used by Shenandoah anyway (in sh/jdk11). http://cr.openjdk.java.net/~rkennke/isolate-markbitmap-parcleaning/webrev.00/ Testing: hotspot_gc_shenandoah Ok? From rkennke at redhat.com Tue Jul 7 13:14:06 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 15:14:06 +0200 Subject: RFR: Shenandoah: Isolate changes around BarrierSetC2::enqueue_useful_gc_barrier() Message-ID: This reverts the changes that we did to BarrierSetC2::enqueue_useful_gc_barrier() and implements the equivalent with the interfaces that exist in jdk11u. In ShenandoahBarrierSetC2::enqueue_useful_gc_barrier() we establish that we only have users that are SATB-pre-barrier-calls (CallLeafNode). PhaseIGVN::add_users_to_worklist() would add the call *and* its outgoing projections to the worklist. This is what I'm trying to do here, so that we don't have to touch any shared code for that. http://cr.openjdk.java.net/~rkennke/shjdk11-c2-enqueue-barrier/webrev.00/ Testing: hotspot_gc_shenandoah all good Ok? Roman From rkennke at redhat.com Tue Jul 7 13:07:30 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Tue, 07 Jul 2020 13:07:30 +0000 Subject: hg: shenandoah/jdk: Shenandoah: Fix Windows build Message-ID: <202007071307.067D7Um8002798@aojmv0008.oracle.com> Changeset: a5cf4ea526d9 Author: rkennke Date: 2020-07-07 15:07 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk/rev/a5cf4ea526d9 Shenandoah: Fix Windows build ! src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp From shade at redhat.com Tue Jul 7 14:15:20 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 7 Jul 2020 16:15:20 +0200 Subject: RFR(sh/jdk11): Shenandoah: Move markBitMap.* and parallelCleaning.* to shenandoah subdirectory In-Reply-To: <65784ae241a6ebd5193850225668082c543744e8.camel@redhat.com> References: <65784ae241a6ebd5193850225668082c543744e8.camel@redhat.com> Message-ID: <0f1c6388-95b0-36c8-e5db-427fc666b9eb@redhat.com> On 7/7/20 3:10 PM, Roman Kennke wrote: > This moves markBitMap.* and parallelCleaning.* into shenandoah > subdirectory, so that they don't get compiled when Shenandoah is > excluded from build. They are only used by Shenandoah anyway (in > sh/jdk11). > > http://cr.openjdk.java.net/~rkennke/isolate-markbitmap-parcleaning/webrev.00/ *) In other JDKs, we have: ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.inline.hpp ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.hpp ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.cpp It is probably better to match the names? *) markBitMap is something introduced by JDK-8180193 here: https://hg.openjdk.java.net/jdk/jdk/rev/31b159f30fb2 It might not be worth a separate backport to 11u, though. Otherwise fine. -- Thanks, -Aleksey From rkennke at redhat.com Tue Jul 7 14:29:42 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 16:29:42 +0200 Subject: RFR(sh/jdk11): Shenandoah: Move markBitMap.* and parallelCleaning.* to shenandoah subdirectory In-Reply-To: <0f1c6388-95b0-36c8-e5db-427fc666b9eb@redhat.com> References: <65784ae241a6ebd5193850225668082c543744e8.camel@redhat.com> <0f1c6388-95b0-36c8-e5db-427fc666b9eb@redhat.com> Message-ID: > On 7/7/20 3:10 PM, Roman Kennke wrote: > > This moves markBitMap.* and parallelCleaning.* into shenandoah > > subdirectory, so that they don't get compiled when Shenandoah is > > excluded from build. They are only used by Shenandoah anyway (in > > sh/jdk11). > > > > http://cr.openjdk.java.net/~rkennke/isolate-markbitmap-parcleaning/webrev.00/ > > *) In other JDKs, we have: > ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.inline. > hpp > ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.hpp > ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.cpp > > It is probably better to match the names? > We *also* have them in sh/jdk11. No I'd rather keep them named as they are, also makes backporting a little easier (if we ever need to backport from gc/shared variants of later JDKs). > *) markBitMap is something introduced by JDK-8180193 here: > https://hg.openjdk.java.net/jdk/jdk/rev/31b159f30fb2 > > It might not be worth a separate backport to 11u, though. Hmm, no, I don't think so. Can I push webrev.00 ? Thanks, Roman From shade at redhat.com Tue Jul 7 14:32:29 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 7 Jul 2020 16:32:29 +0200 Subject: RFR(sh/jdk11): Shenandoah: Move markBitMap.* and parallelCleaning.* to shenandoah subdirectory In-Reply-To: References: <65784ae241a6ebd5193850225668082c543744e8.camel@redhat.com> <0f1c6388-95b0-36c8-e5db-427fc666b9eb@redhat.com> Message-ID: <6b2fa96a-7d3e-b25f-534b-801f1651b4fd@redhat.com> On 7/7/20 4:29 PM, Roman Kennke wrote: >> *) In other JDKs, we have: >> ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.inline. >> hpp >> ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.hpp >> ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.cpp >> >> It is probably better to match the names? > > We *also* have them in sh/jdk11. No I'd rather keep them named as they > are, also makes backporting a little easier (if we ever need to > backport from gc/shared variants of later JDKs). Oh... Eh... It looks to me those need to be merged together at some point. > Can I push webrev.00 ? All right, fine. (Although I think sh/jdk11 is still closed for 11u release) -- Thanks, -Aleksey From rkennke at redhat.com Tue Jul 7 14:40:19 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 16:40:19 +0200 Subject: RFR(sh/jdk11): Shenandoah: Move markBitMap.* and parallelCleaning.* to shenandoah subdirectory In-Reply-To: <6b2fa96a-7d3e-b25f-534b-801f1651b4fd@redhat.com> References: <65784ae241a6ebd5193850225668082c543744e8.camel@redhat.com> <0f1c6388-95b0-36c8-e5db-427fc666b9eb@redhat.com> <6b2fa96a-7d3e-b25f-534b-801f1651b4fd@redhat.com> Message-ID: > On 7/7/20 4:29 PM, Roman Kennke wrote: > > > *) In other JDKs, we have: > > > ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.inl > > > ine. > > > hpp > > > ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.hpp > > > ./src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.cpp > > > > > > It is probably better to match the names? > > > > We *also* have them in sh/jdk11. No I'd rather keep them named as > > they > > are, also makes backporting a little easier (if we ever need to > > backport from gc/shared variants of later JDKs). > > Oh... Eh... It looks to me those need to be merged together at some > point. > I don't know. Seems to make more sense to leave them as they are in sh/jdk11, they are still separate in later JDKs (and really shared with G1). > > Can I push webrev.00 ? > > All right, fine. (Although I think sh/jdk11 is still closed for 11u > release) Ok, I will clarify with Andrew Hughes before pushing. (I already pushed the aarch64-cleanup so if that's a problem, we need to be careful with tags or back-out that change). Thanks, Roman From rkennke at redhat.com Tue Jul 7 15:02:22 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Tue, 07 Jul 2020 15:02:22 +0000 Subject: hg: shenandoah/jdk11: Shenandoah: Move markBitMap.* and parallelCleaning.* to shenandoah subdirectory Message-ID: <202007071502.067F2NWv002096@aojmv0008.oracle.com> Changeset: 44b318933487 Author: rkennke Date: 2020-07-07 17:02 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk11/rev/44b318933487 Shenandoah: Move markBitMap.* and parallelCleaning.* to shenandoah subdirectory Reviewed-by: shade - src/hotspot/share/gc/shared/markBitMap.cpp - src/hotspot/share/gc/shared/markBitMap.hpp - src/hotspot/share/gc/shared/markBitMap.inline.hpp - src/hotspot/share/gc/shared/parallelCleaning.cpp - src/hotspot/share/gc/shared/parallelCleaning.hpp + src/hotspot/share/gc/shenandoah/markBitMap.cpp + src/hotspot/share/gc/shenandoah/markBitMap.hpp + src/hotspot/share/gc/shenandoah/markBitMap.inline.hpp + src/hotspot/share/gc/shenandoah/parallelCleaning.cpp + src/hotspot/share/gc/shenandoah/parallelCleaning.hpp ! src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp ! src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp ! src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp ! src/hotspot/share/gc/shenandoah/shenandoahMarkingContext.cpp ! src/hotspot/share/gc/shenandoah/shenandoahMarkingContext.hpp ! src/hotspot/share/gc/shenandoah/shenandoahParallelCleaning.hpp ! src/hotspot/share/gc/shenandoah/shenandoahVerifier.hpp From gnu.andrew at redhat.com Tue Jul 7 16:22:20 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Tue, 7 Jul 2020 17:22:20 +0100 Subject: [RFR] [8u] 8u262-b09 Upstream Sync In-Reply-To: References: <0ba77aca-9c56-f253-8bda-e651f6e794af@redhat.com> Message-ID: On 03/07/2020 23:06, Roman Kennke wrote: > Hi Andrew, > > >> Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/ >> >> Merge changesets: >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/corba/merge.changeset > > Trivially ok. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jaxp/merge.changeset > > Trivially ok. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jaxws/merge.changeset > > Trivially ok. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/jdk/merge.changeset > > Ok. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/hotspot/merge.changeset > > Trivially ok. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/langtools/merge.changeset > > Trivially ok. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/nashorn/merge.changeset > > Trivially ok. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b09/root/merge.changeset > > Trivially ok. > > Go! > > Thanks, > Roman > > Thanks. I've pushed the changes. I tagged the result of merging with your Shenandoah backports as: aarch64-shenandoah-jdk8u262-b09-shenandoah-merge-2020-07-03 Thanks, -- Andrew :) Senior Free Java Software Engineer Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From zgu at redhat.com Tue Jul 7 18:49:07 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Tue, 7 Jul 2020 14:49:07 -0400 Subject: RFR(sh/jdk11): Refactor/isolate critical pinning In-Reply-To: <230f5c7d89445563ab2c6924b01b4e659618a2be.camel@redhat.com> References: <230f5c7d89445563ab2c6924b01b4e659618a2be.camel@redhat.com> Message-ID: <45d924ee-75a3-3c64-f972-7c757acc2367@redhat.com> Hi Roman, sharedRuntime_x86_32.cpp/sharedRuntime_x86_64.cpp 1846 if (is_critical_native SHENANDOAHGC_ONLY(&& !Universe::heap()->supports_object_pinning())) { why they are different (32 vs. 64)? This refactor is awkward, since Epsilon also support object pinning ... -Zhengyu On 7/6/20 3:51 PM, Roman Kennke wrote: > I'm running an effort to bring our upstream exposure vs jdk11u to > (near) zero. > > Critical pinning support touches a few places in x86 assembly which are > rather hairy, and should look much better when isolated such that > builds without Shenandoah more obviously generate the same code as > jdk11u upstream currently does. > > It moves the new method definitions to Shenandoah-files. It trades > cleaner sharde code at the expense of slightly increased mess in > Shenandoah files (I copied some helper methods there, seems harmless to > me). The new inline code parts are now guarded with #if > INCLUDE_SHENANDOAHGC and if (UseShenandoahGC) so are obviously > guaranteed to not leak out into non-Shenandoah code. > > Webrev: > http://cr.openjdk. > java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.00/ > > Testing: hotspot_gc_shenandoah (x86_32, x86_64, builds with and without > Shenandoah) > > Ok? > > Roman > From rkennke at redhat.com Tue Jul 7 19:02:49 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 21:02:49 +0200 Subject: RFR(sh/jdk11): Refactor/isolate critical pinning In-Reply-To: <45d924ee-75a3-3c64-f972-7c757acc2367@redhat.com> References: <230f5c7d89445563ab2c6924b01b4e659618a2be.camel@redhat.com> <45d924ee-75a3-3c64-f972-7c757acc2367@redhat.com> Message-ID: Hi Zhengyu, Thanks for reviewing. > sharedRuntime_x86_32.cpp/sharedRuntime_x86_64.cpp > > 1846 if (is_critical_native SHENANDOAHGC_ONLY(&& > !Universe::heap()->supports_object_pinning())) { > > why they are different (32 vs. 64)? Oops that is a mistake. It should just check !UseShenandoahGC on 32bit too. Good catch. > This refactor is awkward, since Epsilon also support object pinning Well, it's exactly the point. Epsilon does *not* support critical- native object pinning in jdk11u, and we don't want to silently change behaviour of anything by introducing Shenandoah, and it must be very obviously isolated. The status-quo for Epsilon in jdk11u is preserved by this patch. If we consider it important, we can improve it later. http://cr.openjdk.java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.01/ Ok now? Thanks, Roman > ... > > -Zhengyu > > > > > > On 7/6/20 3:51 PM, Roman Kennke wrote: > > I'm running an effort to bring our upstream exposure vs jdk11u to > > (near) zero. > > > > Critical pinning support touches a few places in x86 assembly which > > are > > rather hairy, and should look much better when isolated such that > > builds without Shenandoah more obviously generate the same code as > > jdk11u upstream currently does. > > > > It moves the new method definitions to Shenandoah-files. It trades > > cleaner sharde code at the expense of slightly increased mess in > > Shenandoah files (I copied some helper methods there, seems > > harmless to > > me). The new inline code parts are now guarded with #if > > INCLUDE_SHENANDOAHGC and if (UseShenandoahGC) so are obviously > > guaranteed to not leak out into non-Shenandoah code. > > > > Webrev: > > http://cr.openjdk. > > java.net/~rkennke/shjdk11-refactor-isolate-critical- > > pinning/webrev.00/ > > > > Testing: hotspot_gc_shenandoah (x86_32, x86_64, builds with and > > without > > Shenandoah) > > > > Ok? > > > > Roman > > From zgu at redhat.com Tue Jul 7 19:09:30 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Tue, 7 Jul 2020 15:09:30 -0400 Subject: RFR(sh/jdk11): Refactor/isolate critical pinning In-Reply-To: References: <230f5c7d89445563ab2c6924b01b4e659618a2be.camel@redhat.com> <45d924ee-75a3-3c64-f972-7c757acc2367@redhat.com> Message-ID: <10504b34-fa14-f56d-790e-2acf39f60b95@redhat.com> > > Well, it's exactly the point. Epsilon does *not* support critical- > native object pinning in jdk11u, and we don't want to silently change > behaviour of anything by introducing Shenandoah, and it must be very > obviously isolated. The status-quo for Epsilon in jdk11u is preserved > by this patch. If we consider it important, we can improve it later. Actually, it does: http://hg.openjdk.java.net/jdk-updates/jdk11u-dev/file/c87a19aed831/src/hotspot/share/gc/epsilon/epsilonHeap.hpp#l125 Although, Epsilon pins nothing, but logically seems wrong. -Zhengyu > > > http://cr.openjdk.java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.01/ > > Ok now? > > Thanks, > Roman > > >> ... >> >> -Zhengyu >> >> >> >> >> >> On 7/6/20 3:51 PM, Roman Kennke wrote: >>> I'm running an effort to bring our upstream exposure vs jdk11u to >>> (near) zero. >>> >>> Critical pinning support touches a few places in x86 assembly which >>> are >>> rather hairy, and should look much better when isolated such that >>> builds without Shenandoah more obviously generate the same code as >>> jdk11u upstream currently does. >>> >>> It moves the new method definitions to Shenandoah-files. It trades >>> cleaner sharde code at the expense of slightly increased mess in >>> Shenandoah files (I copied some helper methods there, seems >>> harmless to >>> me). The new inline code parts are now guarded with #if >>> INCLUDE_SHENANDOAHGC and if (UseShenandoahGC) so are obviously >>> guaranteed to not leak out into non-Shenandoah code. >>> >>> Webrev: >>> http://cr.openjdk. >>> java.net/~rkennke/shjdk11-refactor-isolate-critical- >>> pinning/webrev.00/ >>> >>> Testing: hotspot_gc_shenandoah (x86_32, x86_64, builds with and >>> without >>> Shenandoah) >>> >>> Ok? >>> >>> Roman >>> > From rkennke at redhat.com Tue Jul 7 19:27:13 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 07 Jul 2020 21:27:13 +0200 Subject: RFR(sh/jdk11): Refactor/isolate critical pinning In-Reply-To: <10504b34-fa14-f56d-790e-2acf39f60b95@redhat.com> References: <230f5c7d89445563ab2c6924b01b4e659618a2be.camel@redhat.com> <45d924ee-75a3-3c64-f972-7c757acc2367@redhat.com> <10504b34-fa14-f56d-790e-2acf39f60b95@redhat.com> Message-ID: > > Well, it's exactly the point. Epsilon does *not* support critical- > > native object pinning in jdk11u, and we don't want to silently > > change > > behaviour of anything by introducing Shenandoah, and it must be > > very > > obviously isolated. The status-quo for Epsilon in jdk11u is > > preserved > > by this patch. If we consider it important, we can improve it > > later. > > Actually, it does: > > http://hg.openjdk.java.net/jdk-updates/jdk11u-dev/file/c87a19aed831/src/hotspot/share/gc/epsilon/epsilonHeap.hpp#l125 > > Although, Epsilon pins nothing, but logically seems wrong. Well yes, as I said, that is the status quo, and *we are not going to change that* at least not silently with the big Shenandoah upstreaming? http://cr.openjdk.java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.01/ Ok to push that? Roman > -Zhengyu > > > > > http://cr.openjdk.java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.01/ > > > > Ok now? > > > > Thanks, > > Roman > > > > > > > ... > > > > > > -Zhengyu > > > > > > > > > > > > > > > > > > On 7/6/20 3:51 PM, Roman Kennke wrote: > > > > I'm running an effort to bring our upstream exposure vs jdk11u > > > > to > > > > (near) zero. > > > > > > > > Critical pinning support touches a few places in x86 assembly > > > > which > > > > are > > > > rather hairy, and should look much better when isolated such > > > > that > > > > builds without Shenandoah more obviously generate the same code > > > > as > > > > jdk11u upstream currently does. > > > > > > > > It moves the new method definitions to Shenandoah-files. It > > > > trades > > > > cleaner sharde code at the expense of slightly increased mess > > > > in > > > > Shenandoah files (I copied some helper methods there, seems > > > > harmless to > > > > me). The new inline code parts are now guarded with #if > > > > INCLUDE_SHENANDOAHGC and if (UseShenandoahGC) so are obviously > > > > guaranteed to not leak out into non-Shenandoah code. > > > > > > > > Webrev: > > > > http://cr.openjdk. > > > > java.net/~rkennke/shjdk11-refactor-isolate-critical- > > > > pinning/webrev.00/ > > > > > > > > Testing: hotspot_gc_shenandoah (x86_32, x86_64, builds with and > > > > without > > > > Shenandoah) > > > > > > > > Ok? > > > > > > > > Roman > > > > From zgu at redhat.com Tue Jul 7 19:37:01 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Tue, 7 Jul 2020 15:37:01 -0400 Subject: RFR(sh/jdk11): Refactor/isolate critical pinning In-Reply-To: References: <230f5c7d89445563ab2c6924b01b4e659618a2be.camel@redhat.com> <45d924ee-75a3-3c64-f972-7c757acc2367@redhat.com> <10504b34-fa14-f56d-790e-2acf39f60b95@redhat.com> Message-ID: okay. -Zhengyu On 7/7/20 3:27 PM, Roman Kennke wrote: > >>> Well, it's exactly the point. Epsilon does *not* support critical- >>> native object pinning in jdk11u, and we don't want to silently >>> change >>> behaviour of anything by introducing Shenandoah, and it must be >>> very >>> obviously isolated. The status-quo for Epsilon in jdk11u is >>> preserved >>> by this patch. If we consider it important, we can improve it >>> later. >> >> Actually, it does: >> >> http://hg.openjdk.java.net/jdk-updates/jdk11u-dev/file/c87a19aed831/src/hotspot/share/gc/epsilon/epsilonHeap.hpp#l125 >> >> Although, Epsilon pins nothing, but logically seems wrong. > > Well yes, as I said, that is the status quo, and *we are not going to > change that* at least not silently with the big Shenandoah upstreaming? > > http://cr.openjdk.java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.01/ > > Ok to push that? > > Roman > > >> -Zhengyu >> >>> >>> http://cr.openjdk.java.net/~rkennke/shjdk11-refactor-isolate-critical-pinning/webrev.01/ >>> >>> Ok now? >>> >>> Thanks, >>> Roman >>> >>> >>>> ... >>>> >>>> -Zhengyu >>>> >>>> >>>> >>>> >>>> >>>> On 7/6/20 3:51 PM, Roman Kennke wrote: >>>>> I'm running an effort to bring our upstream exposure vs jdk11u >>>>> to >>>>> (near) zero. >>>>> >>>>> Critical pinning support touches a few places in x86 assembly >>>>> which >>>>> are >>>>> rather hairy, and should look much better when isolated such >>>>> that >>>>> builds without Shenandoah more obviously generate the same code >>>>> as >>>>> jdk11u upstream currently does. >>>>> >>>>> It moves the new method definitions to Shenandoah-files. It >>>>> trades >>>>> cleaner sharde code at the expense of slightly increased mess >>>>> in >>>>> Shenandoah files (I copied some helper methods there, seems >>>>> harmless to >>>>> me). The new inline code parts are now guarded with #if >>>>> INCLUDE_SHENANDOAHGC and if (UseShenandoahGC) so are obviously >>>>> guaranteed to not leak out into non-Shenandoah code. >>>>> >>>>> Webrev: >>>>> http://cr.openjdk. >>>>> java.net/~rkennke/shjdk11-refactor-isolate-critical- >>>>> pinning/webrev.00/ >>>>> >>>>> Testing: hotspot_gc_shenandoah (x86_32, x86_64, builds with and >>>>> without >>>>> Shenandoah) >>>>> >>>>> Ok? >>>>> >>>>> Roman >>>>> > From rwestrel at redhat.com Wed Jul 8 07:12:44 2020 From: rwestrel at redhat.com (Roland Westrelin) Date: Wed, 08 Jul 2020 09:12:44 +0200 Subject: RFR: Shenandoah: Isolate changes around BarrierSetC2::enqueue_useful_gc_barrier() In-Reply-To: References: Message-ID: <87lfjupjir.fsf@redhat.com> > http://cr.openjdk.java.net/~rkennke/shjdk11-c2-enqueue-barrier/webrev.00/ Why enqueue the uses of the uses as well? 924 for (DUIterator_Fast imax, i = node->fast_outs(imax); i < imax; i++) { 925 Node* use = node->fast_out(i); // Get use 926 add_users_to_worklist0(worklist, use); 927 } Roland. From rwestrel at redhat.com Wed Jul 8 07:18:32 2020 From: rwestrel at redhat.com (Roland Westrelin) Date: Wed, 08 Jul 2020 09:18:32 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: References: Message-ID: <87imeypj93.fsf@redhat.com> > http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-shared/webrev.00/ The code that's guarded by #if in ifnode.cpp is dead code that was removed in subsequent jdk versions. It feels silly to make that change when that line of code should really go away. Anyway C2 changes look good. Roland. From rkennke at redhat.com Wed Jul 8 08:05:54 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 8 Jul 2020 10:05:54 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: <87imeypj93.fsf@redhat.com> References: <87imeypj93.fsf@redhat.com> Message-ID: Roland Westrelin schrieb am Mi., 8. Juli 2020, 09:18: > > > http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-shared/webrev.00/ > > The code that's guarded by #if in ifnode.cpp is dead code that was > removed in subsequent jdk versions. It feels silly to make that change > when that line of code should really go away. Hmm, but it does exist in jdk11u and the intention here was to make clear that we don't possibly change existing behaviour by introducing Shenandoah to upstream jdk11u. E.g. compile to the exact same code when building with -shenandoahgc. Anyway C2 changes look > good. > Ok, thanks for the review of those parts! Roman > Roland. > > From rkennke at redhat.com Wed Jul 8 08:07:37 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 8 Jul 2020 10:07:37 +0200 Subject: RFR: Shenandoah: Isolate changes around BarrierSetC2::enqueue_useful_gc_barrier() In-Reply-To: <87lfjupjir.fsf@redhat.com> References: <87lfjupjir.fsf@redhat.com> Message-ID: Roland Westrelin schrieb am Mi., 8. Juli 2020, 09:12: > > > > http://cr.openjdk.java.net/~rkennke/shjdk11-c2-enqueue-barrier/webrev.00/ > > Why enqueue the uses of the uses as well? > > 924 for (DUIterator_Fast imax, i = node->fast_outs(imax); i < imax; > i++) { > 925 Node* use = node->fast_out(i); // Get use > 926 add_users_to_worklist0(worklist, use); > 927 } > As far as I can tell, that's what the previous call to PhaseIGVN::add_users_to_worklist() did for CallLeafNodes there, I tried to mimic that. Do you agree? Thanks, Roman > > Roland. > > From rwestrel at redhat.com Wed Jul 8 08:14:02 2020 From: rwestrel at redhat.com (Roland Westrelin) Date: Wed, 08 Jul 2020 10:14:02 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: References: <87imeypj93.fsf@redhat.com> Message-ID: <87fta2pgol.fsf@redhat.com> > Hmm, but it does exist in jdk11u and the intention here was to make clear > that we don't possibly change existing behaviour by introducing Shenandoah > to upstream jdk11u. E.g. compile to the exact same code when building with > -shenandoahgc. I understand but it feels silly that the easiest forward is to make a useless change. Anyway, I don't object to it. Roland. From rkennke at redhat.com Wed Jul 8 08:22:58 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 08 Jul 2020 10:22:58 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: <87fta2pgol.fsf@redhat.com> References: <87imeypj93.fsf@redhat.com> <87fta2pgol.fsf@redhat.com> Message-ID: <8cce5fa33d7c584893bcae5e4196eb1798147b7b.camel@redhat.com> On Wed, 2020-07-08 at 10:14 +0200, Roland Westrelin wrote: > > Hmm, but it does exist in jdk11u and the intention here was to make > > clear > > that we don't possibly change existing behaviour by introducing > > Shenandoah > > to upstream jdk11u. E.g. compile to the exact same code when > > building with > > -shenandoahgc. > > I understand but it feels silly that the easiest forward is to make a > useless change. Anyway, I don't object to it. Yeah. That is unfortunately the condition on which we have a chance to take it upstream. And it doesn't look quite as silly when seen in the context of the actual upstream diff: https://cr.openjdk.java.net/~rkennke/shenandoah-jdk11u-upstream/webrev.05-shared/src/hotspot/share/opto/ifnode.cpp.udiff.html I believe if we wanted to remove that line altogether in jdk11u, it should be discussed separately from Shenandoah upstreaming. Thanks, Roman From rwestrel at redhat.com Wed Jul 8 08:25:06 2020 From: rwestrel at redhat.com (Roland Westrelin) Date: Wed, 08 Jul 2020 10:25:06 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: References: <87imeypj93.fsf@redhat.com> Message-ID: <87blkqpg65.fsf@redhat.com> > Hmm, but it does exist in jdk11u and the intention here was to make clear > that we don't possibly change existing behaviour by introducing Shenandoah > to upstream jdk11u. E.g. compile to the exact same code when building with > -shenandoahgc. What we need is to have IGVN process the call so enqueueing the call is all we need. Not need to enqueue the uses of the call. Roland. From rkennke at redhat.com Wed Jul 8 08:58:48 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 08 Jul 2020 10:58:48 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: <87blkqpg65.fsf@redhat.com> References: <87imeypj93.fsf@redhat.com> <87blkqpg65.fsf@redhat.com> Message-ID: On Wed, 2020-07-08 at 10:25 +0200, Roland Westrelin wrote: > > Hmm, but it does exist in jdk11u and the intention here was to make > > clear > > that we don't possibly change existing behaviour by introducing > > Shenandoah > > to upstream jdk11u. E.g. compile to the exact same code when > > building with > > -shenandoahgc. > > What we need is to have IGVN process the call so enqueueing the call > is > all we need. Not need to enqueue the uses of the call. > Alright, so that is sufficient then? http://cr.openjdk.java.net/~rkennke/shjdk11-c2-enqueue-barrier/webrev.01/ Roman From rwestrel at redhat.com Wed Jul 8 09:57:24 2020 From: rwestrel at redhat.com (Roland Westrelin) Date: Wed, 08 Jul 2020 11:57:24 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: <8cce5fa33d7c584893bcae5e4196eb1798147b7b.camel@redhat.com> References: <87imeypj93.fsf@redhat.com> <87fta2pgol.fsf@redhat.com> <8cce5fa33d7c584893bcae5e4196eb1798147b7b.camel@redhat.com> Message-ID: <878sfupbwb.fsf@redhat.com> > https://cr.openjdk.java.net/~rkennke/shenandoah-jdk11u-upstream/webrev.05-shared/src/hotspot/share/opto/ifnode.cpp.udiff.html Actually we can put: (req() == 3 && dom->in(2) != in(2)) || // Not same input 2? back in and have no diff with upstream. It's dead code so it doesn't matter if it's there or not. Roland. From rwestrel at redhat.com Wed Jul 8 10:01:18 2020 From: rwestrel at redhat.com (Roland Westrelin) Date: Wed, 08 Jul 2020 12:01:18 +0200 Subject: RFR: Shenandoah: Isolate changes around BarrierSetC2::enqueue_useful_gc_barrier() In-Reply-To: References: <87lfjupjir.fsf@redhat.com> Message-ID: <874kqipbpt.fsf@redhat.com> (I commented on the wrong RFR before. Back to the right one) > Alright, so that is sufficient then? > > http://cr.openjdk.java.net/~rkennke/shjdk11-c2-enqueue-barrier/webrev.01/ Yes, I think it's good. If this is somehow wrong, testing should catch it. Roland. From rkennke at redhat.com Wed Jul 8 10:19:43 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 08 Jul 2020 12:19:43 +0200 Subject: RFR: Shenandoah: Isolate changes around BarrierSetC2::enqueue_useful_gc_barrier() In-Reply-To: <874kqipbpt.fsf@redhat.com> References: <87lfjupjir.fsf@redhat.com> <874kqipbpt.fsf@redhat.com> Message-ID: <2307763536e0adb378301b514d8ec3748e445335.camel@redhat.com> On Wed, 2020-07-08 at 12:01 +0200, Roland Westrelin wrote: > (I commented on the wrong RFR before. Back to the right one) > > > Alright, so that is sufficient then? > > > > http://cr.openjdk.java.net/~rkennke/shjdk11-c2-enqueue-barrier/webrev.01/ > > Yes, I think it's good. If this is somehow wrong, testing should > catch it. Alright, I've run hotspot_gc_shenandoah, and I'll also run CTW tests before pushing it. Thanks, Roman From rkennke at redhat.com Wed Jul 8 10:24:49 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 08 Jul 2020 12:24:49 +0200 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: <878sfupbwb.fsf@redhat.com> References: <87imeypj93.fsf@redhat.com> <87fta2pgol.fsf@redhat.com> <8cce5fa33d7c584893bcae5e4196eb1798147b7b.camel@redhat.com> <878sfupbwb.fsf@redhat.com> Message-ID: On Wed, 2020-07-08 at 11:57 +0200, Roland Westrelin wrote: > > https://cr.openjdk.java.net/~rkennke/shenandoah-jdk11u-upstream/webrev.05-shared/src/hotspot/share/opto/ifnode.cpp.udiff.html > > Actually we can put: > > (req() == 3 && dom->in(2) != in(2)) || // Not same input 2? > > back in and have no diff with upstream. It's dead code so it doesn't > matter if it's there or not. Ok, so I did: http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-shared/webrev.01/ Thanks for reviewing! I still need a review of the rest of the patch. Roman From rkennke at redhat.com Wed Jul 8 10:55:02 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Wed, 08 Jul 2020 10:55:02 +0000 Subject: hg: shenandoah/jdk11: Shenandoah: Isolate changes around BarrierSetC2::enqueue_useful_gc_barrier() Message-ID: <202007081055.068At3RM027015@aojmv0008.oracle.com> Changeset: 804dc32ba262 Author: rkennke Date: 2020-07-08 12:54 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk11/rev/804dc32ba262 Shenandoah: Isolate changes around BarrierSetC2::enqueue_useful_gc_barrier() Reviewed-by: roland ! src/hotspot/share/gc/shared/c2/barrierSetC2.hpp ! src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp ! src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp ! src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp ! src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp ! src/hotspot/share/opto/node.cpp ! src/hotspot/share/opto/phaseX.cpp From zgu at redhat.com Wed Jul 8 14:32:31 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 8 Jul 2020 10:32:31 -0400 Subject: RFR(sh/jdk11): Shenandoah: Isolate shared-code changes In-Reply-To: References: Message-ID: <2cecfa5c-72bd-3212-1adc-8479cd662fd4@redhat.com> Looks ok to me. Thanks, -Zhengyu On 7/7/20 9:06 AM, Roman Kennke wrote: > Here comes a batch of isolating remaining simple Shenandoah-induced > shared-code changes. The idea is that when building without Shenandoah > GC should compile exactly as it would with current jdk11u. > > Webrev: > http://cr.openjdk.java.net/~rkennke/shjdk11-isolate-shared/webrev.00/ > > Testing: hotspot_gc_shenandoah, with/without Shenandoah, 32/64 bit. > > Ok? > From shade at redhat.com Fri Jul 10 06:41:43 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 10 Jul 2020 08:41:43 +0200 Subject: RFR (S) 8248652: Shenandoah: SATB buffer handling may assume no forwarded objects In-Reply-To: <046b7175-361c-e823-06c5-90d053d4c47d@redhat.com> References: <046b7175-361c-e823-06c5-90d053d4c47d@redhat.com> Message-ID: On 7/1/20 7:56 PM, Aleksey Shipilev wrote: > RFE: > https://bugs.openjdk.java.net/browse/JDK-8248652 > > Since CM-with-UR is gone, SATB may assume no forwarded objects are ever exposed through it. The only > way marking code can experience forwarded objects is due to Full GC marking. In that case, SATB > should be inactive. > > Fix: > https://cr.openjdk.java.net/~shade/8248652/webrev.01/ > > Testing: hotspot_gc_shenandoah Ping? :) -- Thanks, -Aleksey From rkennke at redhat.com Fri Jul 10 08:31:26 2020 From: rkennke at redhat.com (Roman Kennke) Date: Fri, 10 Jul 2020 10:31:26 +0200 Subject: RFR (S) 8248652: Shenandoah: SATB buffer handling may assume no forwarded objects In-Reply-To: References: <046b7175-361c-e823-06c5-90d053d4c47d@redhat.com> Message-ID: <5d27e6809fc16574fbd7a6ae536b23ea91a55acf.camel@redhat.com> On Fri, 2020-07-10 at 08:41 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > On 7/1/20 7:56 PM, Aleksey Shipilev wrote: > > RFE: > > https://bugs.openjdk.java.net/browse/JDK-8248652 > > > > Since CM-with-UR is gone, SATB may assume no forwarded objects are > > ever exposed through it. The only > > way marking code can experience forwarded objects is due to Full GC > > marking. In that case, SATB > > should be inactive. > > > > Fix: > > https://cr.openjdk.java.net/~shade/8248652/webrev.01/ > > > > Testing: hotspot_gc_shenandoah > > Ping? :) Sorry, that one slipped through. The patch looks good! Thank you! Roman From zgu at redhat.com Fri Jul 10 18:04:16 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Fri, 10 Jul 2020 14:04:16 -0400 Subject: RFR 8247670: Shenandoah: deadlock during class unloading OOME In-Reply-To: References: Message-ID: <271ad565-c60d-5741-eb91-ebd52213a6ca@redhat.com> The deadlock is caused by JDK-8245288. After further investigation, JDK-8245288 change does not seem to provide improvement that reported in original bug. Let's just backout JDK-8245288. Webrev: http://cr.openjdk.java.net/~zgu/JDK-8247670/webrev.01/ Test: hotspot_gc_shenandoah Thanks, -Zhengyu On 6/16/20 3:48 PM, Zhengyu Gu wrote: > The deadlock is caused by one thread holding per-nmethod lock, then > encountering evac-oom. At the same time, another thread entering > evac-oom scope, then acquiring the same per-nmethod lock. > > The first thread expects the second thread to see evac-oom and exit the > scope, but the second thread is blocked on acquiring per-nmethod lock. > > The solution is to introduce an abortable locker on per-nmethod lock. If > the second thread can not acquire the lock, but see evac-oom, it simply > aborts, so it can exit evac-oom scope. > > The solution does come with penalties: > > If the second thread is a Java thread (via nmethod entry barrier), the > nmethod will be deopt. > > If the second thread is worker, it causes current code root processing > to abort, then restart. > > > Bug: https://bugs.openjdk.java.net/browse/JDK-8247670 > Webrev: http://cr.openjdk.java.net/~zgu/JDK-8247670/webrev.00/ > > Test: > ? hotspot_gc_shenandoah (x86_64 and aarch64) > > Thanks, > > -Zhengyu From rkennke at redhat.com Fri Jul 10 18:14:08 2020 From: rkennke at redhat.com (Roman Kennke) Date: Fri, 10 Jul 2020 20:14:08 +0200 Subject: RFR 8247670: Shenandoah: deadlock during class unloading OOME In-Reply-To: <271ad565-c60d-5741-eb91-ebd52213a6ca@redhat.com> References: <271ad565-c60d-5741-eb91-ebd52213a6ca@redhat.com> Message-ID: Ok, let's do that. Thanks, Roman On Fri, 2020-07-10 at 14:04 -0400, Zhengyu Gu wrote: > The deadlock is caused by JDK-8245288. After further investigation, > JDK-8245288 change does not seem to provide improvement that reported > in > original bug. Let's just backout JDK-8245288. > > Webrev: http://cr.openjdk.java.net/~zgu/JDK-8247670/webrev.01/ > > Test: > hotspot_gc_shenandoah > > Thanks, > > -Zhengyu > > > On 6/16/20 3:48 PM, Zhengyu Gu wrote: > > The deadlock is caused by one thread holding per-nmethod lock, > > then > > encountering evac-oom. At the same time, another thread entering > > evac-oom scope, then acquiring the same per-nmethod lock. > > > > The first thread expects the second thread to see evac-oom and exit > > the > > scope, but the second thread is blocked on acquiring per-nmethod > > lock. > > > > The solution is to introduce an abortable locker on per-nmethod > > lock. If > > the second thread can not acquire the lock, but see evac-oom, it > > simply > > aborts, so it can exit evac-oom scope. > > > > The solution does come with penalties: > > > > If the second thread is a Java thread (via nmethod entry barrier), > > the > > nmethod will be deopt. > > > > If the second thread is worker, it causes current code root > > processing > > to abort, then restart. > > > > > > Bug: https://bugs.openjdk.java.net/browse/JDK-8247670 > > Webrev: http://cr.openjdk.java.net/~zgu/JDK-8247670/webrev.00/ > > > > Test: > > hotspot_gc_shenandoah (x86_64 and aarch64) > > > > Thanks, > > > > -Zhengyu From zgu at redhat.com Fri Jul 10 18:17:39 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Fri, 10 Jul 2020 14:17:39 -0400 Subject: RFR 8247670: Shenandoah: deadlock during class unloading OOME In-Reply-To: References: <271ad565-c60d-5741-eb91-ebd52213a6ca@redhat.com> Message-ID: <24d46c08-8f11-6ace-a216-f14eeefe18bd@redhat.com> Thanks and pushed. -Zhengyu On 7/10/20 2:14 PM, Roman Kennke wrote: > > Ok, let's do that. > > Thanks, > Roman > > > On Fri, 2020-07-10 at 14:04 -0400, Zhengyu Gu wrote: >> The deadlock is caused by JDK-8245288. After further investigation, >> JDK-8245288 change does not seem to provide improvement that reported >> in >> original bug. Let's just backout JDK-8245288. >> >> Webrev: http://cr.openjdk.java.net/~zgu/JDK-8247670/webrev.01/ >> >> Test: >> hotspot_gc_shenandoah >> >> Thanks, >> >> -Zhengyu >> >> >> On 6/16/20 3:48 PM, Zhengyu Gu wrote: >>> The deadlock is caused by one thread holding per-nmethod lock, >>> then >>> encountering evac-oom. At the same time, another thread entering >>> evac-oom scope, then acquiring the same per-nmethod lock. >>> >>> The first thread expects the second thread to see evac-oom and exit >>> the >>> scope, but the second thread is blocked on acquiring per-nmethod >>> lock. >>> >>> The solution is to introduce an abortable locker on per-nmethod >>> lock. If >>> the second thread can not acquire the lock, but see evac-oom, it >>> simply >>> aborts, so it can exit evac-oom scope. >>> >>> The solution does come with penalties: >>> >>> If the second thread is a Java thread (via nmethod entry barrier), >>> the >>> nmethod will be deopt. >>> >>> If the second thread is worker, it causes current code root >>> processing >>> to abort, then restart. >>> >>> >>> Bug: https://bugs.openjdk.java.net/browse/JDK-8247670 >>> Webrev: http://cr.openjdk.java.net/~zgu/JDK-8247670/webrev.00/ >>> >>> Test: >>> hotspot_gc_shenandoah (x86_64 and aarch64) >>> >>> Thanks, >>> >>> -Zhengyu > From shade at redhat.com Mon Jul 13 09:09:46 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Mon, 13 Jul 2020 11:09:46 +0200 Subject: RFR (S) 8249230: Shenandoah: assertion failure with -XX:-ResizeTLAB Message-ID: <2d76df7b-d88f-061b-5aad-c2dfad47c94c@redhat.com> Bug: https://bugs.openjdk.java.net/browse/JDK-8249230 This is caused by recent refactoring, JDK-8247845. Fix: https://cr.openjdk.java.net/~shade/8249230/webrev.01/ Testing: hotspot_gc_shenandoah; new test; tier1 with Shenandoah -- Thanks, -Aleksey From rkennke at redhat.com Mon Jul 13 09:37:55 2020 From: rkennke at redhat.com (Roman Kennke) Date: Mon, 13 Jul 2020 11:37:55 +0200 Subject: RFR (S) 8249230: Shenandoah: assertion failure with -XX:-ResizeTLAB In-Reply-To: <2d76df7b-d88f-061b-5aad-c2dfad47c94c@redhat.com> References: <2d76df7b-d88f-061b-5aad-c2dfad47c94c@redhat.com> Message-ID: Very good! Thank you! Roman Aleksey Shipilev schrieb am Mo., 13. Juli 2020, 11:10: > Bug: > https://bugs.openjdk.java.net/browse/JDK-8249230 > > This is caused by recent refactoring, JDK-8247845. Fix: > https://cr.openjdk.java.net/~shade/8249230/webrev.01/ > > Testing: hotspot_gc_shenandoah; new test; tier1 with Shenandoah > > -- > Thanks, > -Aleksey > > From shade at redhat.com Mon Jul 13 09:56:03 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Mon, 13 Jul 2020 11:56:03 +0200 Subject: RFR (S) 8249230: Shenandoah: assertion failure with -XX:-ResizeTLAB In-Reply-To: References: <2d76df7b-d88f-061b-5aad-c2dfad47c94c@redhat.com> Message-ID: <65732704-e3e2-1cee-a367-97019e814992@redhat.com> On 7/13/20 11:37 AM, Roman Kennke wrote: > Very good! Thank you! No problem, I should have done that in the original change. Pushed. -- Thanks, -Aleksey From gnu.andrew at redhat.com Tue Jul 14 21:43:30 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Tue, 14 Jul 2020 22:43:30 +0100 Subject: [RFR] [8u] 8u262-b10 Upstream Sync Message-ID: <6dab6f27-f9cd-fa1c-8b72-c7c1a069c45f@redhat.com> Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/ Merge changesets: http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/corba/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jaxp/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jaxws/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jdk/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/hotspot/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/langtools/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/nashorn/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/root/merge.changeset Changes in aarch64-shenandoah-jdk8u262-b10: - JDK-8230613: Better ASCII conversions - JDK-8231800: Better listing of arrays - JDK-8232014: Expand DTD support - JDK-8233255: Better Swing Buttons - JDK-8234032: Improve basic calendar services - JDK-8234042: Better factory production of certificates - JDK-8234418: Better parsing with CertificateFactory - JDK-8234836: Improve serialization handling - JDK-8236191: Enhance OID processing - JDK-8237117: Better ForkJoinPool behavior - JDK-8237592: Enhance certificate verification - JDK-8238002: Better matrix operations - JDK-8238804: Enhance key handling process - JDK-8238843: Enhanced font handing - JDK-8238920: Better Buffer support - JDK-8238925: Enhance WAV file playback - JDK-8240119: Less Affine Transformations - JDK-8240482: Improved WAV file playback - JDK-8241379: Update JCEKS support - JDK-8241522: Manifest improved jar headers redux - JDK-8242136: Better XML namespace handling - JDK-8248715: New JavaTimeSupplementary localisation for 'in' installed in wrong package Main issues of note: None, clean merge (no HotSpot changes). diffstat for root b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for corba b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jaxp b/.hgtags | 1 b/src/com/sun/org/apache/xerces/internal/impl/XMLDTDScannerImpl.java | 17 ++++------ b/src/com/sun/org/apache/xerces/internal/impl/XMLScanner.java | 9 ++--- b/src/com/sun/org/apache/xerces/internal/impl/dtd/DTDGrammar.java | 12 ++++--- b/src/com/sun/org/apache/xerces/internal/impl/xs/XMLSchemaValidator.java | 5 +- 5 files changed, 25 insertions(+), 19 deletions(-) diffstat for jaxws b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for langtools b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for nashorn b/.hgtags | 1 + 1 file changed, 1 insertion(+) diffstat for jdk b/.hgtags | 1 b/make/lib/Awt2dLibraries.gmk | 7 b/src/macosx/classes/apple/security/KeychainStore.java | 5 b/src/macosx/native/com/sun/media/sound/PLATFORM_API_MacOSX_PCM.cpp | 8 b/src/share/classes/com/sun/crypto/provider/JceKeyStore.java | 33 ++- b/src/share/classes/com/sun/media/sound/DirectAudioDevice.java | 28 ++- b/src/share/classes/com/sun/media/sound/Toolkit.java | 15 + b/src/share/classes/java/io/ObjectInputStream.java | 22 ++ b/src/share/classes/java/nio/Buffer.java | 30 ++- b/src/share/classes/java/security/MessageDigest.java | 24 ++ b/src/share/classes/java/security/PKCS12Attribute.java | 5 b/src/share/classes/java/util/ArrayList.java | 14 - b/src/share/classes/java/util/PriorityQueue.java | 3 b/src/share/classes/java/util/Vector.java | 12 - b/src/share/classes/java/util/concurrent/CopyOnWriteArrayList.java | 12 - b/src/share/classes/java/util/concurrent/ForkJoinWorkerThread.java | 13 - b/src/share/classes/java/util/concurrent/PriorityBlockingQueue.java | 3 b/src/share/classes/sun/font/TrueTypeFont.java | 4 b/src/share/classes/sun/net/idn/Punycode.java | 2 b/src/share/classes/sun/security/pkcs/ContentInfo.java | 8 b/src/share/classes/sun/security/pkcs/SignerInfo.java | 3 b/src/share/classes/sun/security/pkcs12/MacData.java | 8 b/src/share/classes/sun/security/pkcs12/PKCS12KeyStore.java | 24 ++ b/src/share/classes/sun/security/provider/certpath/OCSPResponse.java | 4 b/src/share/classes/sun/security/util/DerInputBuffer.java | 85 ++++++---- b/src/share/classes/sun/security/util/HostnameChecker.java | 10 - b/src/share/classes/sun/security/util/ObjectIdentifier.java | 63 ++++++- b/src/share/classes/sun/text/resources/in/JavaTimeSupplementary_in.java | 2 b/src/share/instrument/EncodingSupport.c | 6 b/src/share/instrument/InvocationAdapter.c | 8 b/src/share/native/sun/awt/medialib/mlib_ImageScanPoly.c | 16 + b/src/share/native/sun/java2d/cmm/lcms/cmsps2.c | 2 b/src/solaris/native/sun/awt/gtk2_interface.c | 69 ++++---- b/src/solaris/native/sun/awt/gtk3_interface.c | 26 +-- b/src/solaris/native/sun/awt/gtk3_interface.h | 44 +++++ b/src/solaris/native/sun/awt/swing_GTKEngine.c | 10 - b/test/java/io/Serializable/serialFilter/SerialFilterTest.java | 46 +++++ 37 files changed, 498 insertions(+), 177 deletions(-) diffstat for hotspot b/.hgtags | 1 + 1 file changed, 1 insertion(+) Successfully built on x86, x86_64, s390, s390x, ppc, ppc64, ppc64le & aarch64. Ok to push? Thanks, -- Andrew :) Senior Free Java Software Engineer OpenJDK Package Owner Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From gnu.andrew at redhat.com Tue Jul 14 23:34:59 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Wed, 15 Jul 2020 00:34:59 +0100 Subject: RFR: [11u] shenandoah-jdk-11.0.8+9 & shenandoah-jdk-11.0.8+10 Message-ID: <290fcf7c-bb90-b5cc-5c53-3a84cae6e5e8@redhat.com> Webrev: https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/ Merge changeset for b09: https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/11.0.8+9.merge Merge changeset for b10: https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/11.0.8+10.merge Changes in shenandoah-jdk-11.0.8+9: - JDK-8230613: Better ASCII conversions - JDK-8231800: Better listing of arrays - JDK-8232014: Expand DTD support - JDK-8233234: Better Zip Naming - JDK-8233239: Enhance TIFF support - JDK-8233255: Better Swing Buttons - JDK-8234032: Improve basic calendar services - JDK-8234042: Better factory production of certificates - JDK-8234418: Better parsing with CertificateFactory - JDK-8234836: Improve serialization handling - JDK-8236191: Enhance OID processing - JDK-8236867: Enhance Graal interface handling - JDK-8237117: Better ForkJoinPool behavior - JDK-8237592: Enhance certificate verification - JDK-8238002: Better matrix operations - JDK-8238013: Enhance String writing - JDK-8238804: Enhance key handling process - JDK-8238843: Enhanced font handing - JDK-8238920: Better Buffer support - JDK-8238925: Enhance WAV file playback - JDK-8240119: Less Affine Transformations - JDK-8240482: Improved WAV file playback - JDK-8241379: Update JCEKS support - JDK-8241522: Manifest improved jar headers redux - JDK-8242136: Better XML namespace handling Changes in shenandoah-jdk-11.0.8+10: - JDK-8248505: Unexpected NoSuchAlgorithmException when using secure random impl from BCFIPS provider Successfully built on x86, x86_64, s390, s390x, ppc, ppc64, ppc64le & aarch64. (with Shenandoah only built on x86_64 & aarch64 through configure logic) Ok to push? Thanks, -- Andrew :) Senior Free Java Software Engineer OpenJDK Package Owner Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From shade at redhat.com Wed Jul 15 05:39:12 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 15 Jul 2020 07:39:12 +0200 Subject: [RFR] [8u] 8u262-b10 Upstream Sync In-Reply-To: <6dab6f27-f9cd-fa1c-8b72-c7c1a069c45f@redhat.com> References: <6dab6f27-f9cd-fa1c-8b72-c7c1a069c45f@redhat.com> Message-ID: On 7/14/20 11:43 PM, Andrew Hughes wrote: > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/corba/merge.changeset Looks trivially good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jaxp/merge.changeset Looks good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jaxws/merge.changeset Looks trivially good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jdk/merge.changeset Looks good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/hotspot/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/langtools/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/nashorn/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/root/merge.changeset Look trivially good. > Ok to push? Yes. -- Thanks, -Aleksey From shade at redhat.com Wed Jul 15 05:40:43 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 15 Jul 2020 07:40:43 +0200 Subject: RFR: [11u] shenandoah-jdk-11.0.8+9 & shenandoah-jdk-11.0.8+10 In-Reply-To: <290fcf7c-bb90-b5cc-5c53-3a84cae6e5e8@redhat.com> References: <290fcf7c-bb90-b5cc-5c53-3a84cae6e5e8@redhat.com> Message-ID: <087aa70b-aa1d-b915-9bd8-a0e46293d9a0@redhat.com> On 7/15/20 1:34 AM, Andrew Hughes wrote: > Webrev: https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/ > Merge changeset for b09: > https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/11.0.8+9.merge > Merge changeset for b10: > https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/11.0.8+10.merge Looks good. > Ok to push? Yes. -- Thanks, -Aleksey From shade at redhat.com Wed Jul 15 08:51:55 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 15 Jul 2020 10:51:55 +0200 Subject: [11] RFR: Fix Windows build failure due to nesting macro Message-ID: It seems MSVC dislikes the nested #if within the assert macro. Current Windows builds fail in sh/jdk11 with: c:/buildbot/worker/build-shenandoah-jdk11-windows/build/src/hotspot/share/opto/escape.cpp(2436) : error C2121: '#' : invalid character : possibly the result of a macro expansion c:/buildbot/worker/build-shenandoah-jdk11-windows/build/src/hotspot/share/opto/escape.cpp(2436) : error C2143: syntax error : missing ')' before 'if' c:/buildbot/worker/build-shenandoah-jdk11-windows/build/src/hotspot/share/opto/escape.cpp(2436) : error C2059: syntax error : ')' c:/buildbot/worker/build-shenandoah-jdk11-windows/build/src/hotspot/share/opto/escape.cpp(2436) : error C2143: syntax error : missing ';' before '{' Fix: --- a/src/hotspot/share/opto/escape.cpp Wed Jul 08 17:15:01 2020 +0200 +++ b/src/hotspot/share/opto/escape.cpp Wed Jul 15 10:48:43 2020 +0200 @@ -2428,13 +2428,11 @@ int opcode = uncast_base->Opcode(); assert(opcode == Op_ConP || opcode == Op_ThreadLocal || opcode == Op_CastX2P || uncast_base->is_DecodeNarrowPtr() || (uncast_base->is_Mem() && (uncast_base->bottom_type()->isa_rawptr() != NULL)) || (uncast_base->is_Proj() && uncast_base->in(0)->is_Allocate()) -#if INCLUDE_SHENANDOAHGC - || uncast_base->Opcode() == Op_ShenandoahLoadReferenceBarrier -#endif + SHENANDOAHGC_ONLY(|| (uncast_base->Opcode() == Op_ShenandoahLoadReferenceBarrier)) , "sanity"); } } return base; } I am going to push this after Andrew lands his CPU merges. Testing: {Windows, Linux} builds -- Thanks, -Aleksey From rkennke at redhat.com Wed Jul 15 14:36:21 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 15 Jul 2020 16:36:21 +0200 Subject: [11] RFR: Fix Windows build failure due to nesting macro In-Reply-To: References: Message-ID: <997ca3f2154f5b78d5c2653c7ff6cf7634f0d774.camel@redhat.com> Ok, looks good. I also did the same in my latest upstreaming proposal. Thanks, Roman On Wed, 2020-07-15 at 10:51 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > It seems MSVC dislikes the nested #if within the assert macro. > Current Windows builds fail in > sh/jdk11 with: > > c:/buildbot/worker/build-shenandoah-jdk11- > windows/build/src/hotspot/share/opto/escape.cpp(2436) : > error C2121: '#' : invalid character : possibly the result of a macro > expansion > c:/buildbot/worker/build-shenandoah-jdk11- > windows/build/src/hotspot/share/opto/escape.cpp(2436) : > error C2143: syntax error : missing ')' before 'if' > c:/buildbot/worker/build-shenandoah-jdk11- > windows/build/src/hotspot/share/opto/escape.cpp(2436) : > error C2059: syntax error : ')' > c:/buildbot/worker/build-shenandoah-jdk11- > windows/build/src/hotspot/share/opto/escape.cpp(2436) : > error C2143: syntax error : missing ';' before '{' > > Fix: > > --- a/src/hotspot/share/opto/escape.cpp Wed Jul 08 17:15:01 2020 > +0200 > +++ b/src/hotspot/share/opto/escape.cpp Wed Jul 15 10:48:43 2020 > +0200 > @@ -2428,13 +2428,11 @@ > int opcode = uncast_base->Opcode(); > assert(opcode == Op_ConP || opcode == Op_ThreadLocal || > opcode == Op_CastX2P || uncast_base- > >is_DecodeNarrowPtr() || > (uncast_base->is_Mem() && (uncast_base->bottom_type()- > >isa_rawptr() != NULL)) || > (uncast_base->is_Proj() && uncast_base->in(0)- > >is_Allocate()) > -#if INCLUDE_SHENANDOAHGC > - || uncast_base->Opcode() == > Op_ShenandoahLoadReferenceBarrier > -#endif > + SHENANDOAHGC_ONLY(|| (uncast_base->Opcode() == > Op_ShenandoahLoadReferenceBarrier)) > , "sanity"); > } > } > return base; > } > > > I am going to push this after Andrew lands his CPU merges. > > Testing: {Windows, Linux} builds > From rkennke at redhat.com Wed Jul 15 17:20:21 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Wed, 15 Jul 2020 19:20:21 +0200 Subject: RFR: 8249543: Force DirectBufferAllocTest to run with -ExplicitGCInvokesConcurrent Message-ID: <751a1935bfcbc92d3c29acaeb8846a85a0ce1664.camel@redhat.com> DirectBufferAllocTest is unreliable when running with +ExplicitGCInvokesConcurrent, because allocating DBB spreads System.gc() calls all over concurrent GC cycles. It becomes more reliable when running with -ExplicitGCInvokesConcurrent. (Shenandoah defaults to +ExplicitGCInvokesConcurrent, other GCs don't as far as I know.) Bug: https://bugs.openjdk.java.net/browse/JDK-8249543 Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249543/webrev.00/ Ok? Thanks, Roman From Alan.Bateman at oracle.com Wed Jul 15 18:43:57 2020 From: Alan.Bateman at oracle.com (Alan Bateman) Date: Wed, 15 Jul 2020 19:43:57 +0100 Subject: RFR: 8249543: Force DirectBufferAllocTest to run with -ExplicitGCInvokesConcurrent In-Reply-To: <751a1935bfcbc92d3c29acaeb8846a85a0ce1664.camel@redhat.com> References: <751a1935bfcbc92d3c29acaeb8846a85a0ce1664.camel@redhat.com> Message-ID: <82bb8596-0f7b-6266-45d3-80f042ed4db0@oracle.com> On 15/07/2020 18:20, rkennke at redhat.com wrote: > DirectBufferAllocTest is unreliable when running with > +ExplicitGCInvokesConcurrent, because allocating DBB spreads > System.gc() calls all over concurrent GC cycles. It becomes more > reliable when running with -ExplicitGCInvokesConcurrent. (Shenandoah > defaults to +ExplicitGCInvokesConcurrent, other GCs don't as far as I > know.) > > Bug: > https://bugs.openjdk.java.net/browse/JDK-8249543 > Webrev: > http://cr.openjdk.java.net/~rkennke/JDK-8249543/webrev.00/ > > Ok? > I guess this is okay but if -ExplicitGCInvokesConcurrent is the default then doesn't it break RMI DGC?? Are you sure this is the only test that fails? -Alan From rkennke at redhat.com Wed Jul 15 19:47:21 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 15 Jul 2020 21:47:21 +0200 Subject: RFR: 8249543: Force DirectBufferAllocTest to run with -ExplicitGCInvokesConcurrent In-Reply-To: <82bb8596-0f7b-6266-45d3-80f042ed4db0@oracle.com> References: <751a1935bfcbc92d3c29acaeb8846a85a0ce1664.camel@redhat.com> <82bb8596-0f7b-6266-45d3-80f042ed4db0@oracle.com> Message-ID: <8c6d42af3f36953d8b680c7c07c112e7ff75574b.camel@redhat.com> Hi Alan, > On 15/07/2020 18:20, rkennke at redhat.com wrote: > > DirectBufferAllocTest is unreliable when running with > > +ExplicitGCInvokesConcurrent, because allocating DBB spreads > > System.gc() calls all over concurrent GC cycles. It becomes more > > reliable when running with -ExplicitGCInvokesConcurrent. > > (Shenandoah > > defaults to +ExplicitGCInvokesConcurrent, other GCs don't as far as > > I > > know.) > > > > Bug: > > https://bugs.openjdk.java.net/browse/JDK-8249543 > > Webrev: > > http://cr.openjdk.java.net/~rkennke/JDK-8249543/webrev.00/ > > > > Ok? > > > I guess this is okay but if -ExplicitGCInvokesConcurrent is the > default > then doesn't it break RMI DGC? Why would it? Can you explain? (-ExplicitGCInvokesConcurrent is the default for all GCs but Shenandoah, and has been that way forever. Do you mean +ExplicitGCInvokesConcurrent?) Here's some context from our perspective: Normally, when System.gc() is called, it invokes a STW garbage collection. For most GCs that has been that way forever. This is what -ExplicitGCInvokesConcurrent implies. In Shenandoah, we opted to do +ExplicitGCInvokesConcurrent instead. This means that when System.gc() is called, a *concurrent* collection cycle is started, and the calling thread will wait for that to complete (and other threads will keep on running - unless they also call System.gc() ). It breaks this test because all test threads are hammering the GC with System.gc(), the first one will trigger the start of a concurrent GC, and the other ones will line up while concurrent GC is running. This is normally ok. However, the test (or even DirectByteBuffer allocation routine in Bits.java) is also over-assuming that when System.gc() returns (and Cleaner thread did its thing), it could now allocate native memory. However, when lots of test threads are competing for this, the last one could already been outrun by the first ones that are rescheduled already. The additional concurrency introduced by concurrent GC, plus a bunch of wrinkles in our implementation (e.g. the cleaner can run concurrently with ongoing GC, and not after the GC as it would do with STW GC) makes this test spuriously fail with Shenandoah. Forcing it to -ExplicitGCInvokesConcurrent makes it more reliable. But as far as I can tell, the test is intrinsically unreliable, but I'm also not sure how it could be made better (or the DBB allocator even). > Are you sure this is the only test that > fails? So far, yes. Can you point me to specific tests that you would expect to fail? Roman From rkennke at redhat.com Wed Jul 15 21:40:03 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Wed, 15 Jul 2020 23:40:03 +0200 Subject: RFR: 8249560: Shenandoah: Fix racy GC request handling Message-ID: <62c8518e702fd11d7be34cc591f3c48bb9ad9ea7.camel@redhat.com> See discussion in bug: https://bugs.openjdk.java.net/browse/JDK-8249560 Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249560/webrev.00/ Testing: hotspot_gc_shenandoah, tier1&tier2 tests with Shenandoah Ok? Thanks, Roman From mathiske at amazon.com Thu Jul 16 00:20:23 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Thu, 16 Jul 2020 00:20:23 +0000 Subject: First cut at a card table for Shenandoah Message-ID: Just having edited some card table operations into Shenandoah GC, I have uploaded a webrev for perusal: http://cr.openjdk.java.net/~bmathiske/cardshen/webrev.00/ The general idea of this patch is to mark some cards on the side as if we needed a generational remembered set barrier, without changing how Shenandoah works otherwise. Then we should be able to measure how much mutator overhead this sort of extra barrier might cause. My expected result was "similar to CMS or Parallel compared to Epsilon", but that's not what I am seeing in first attempts to run SPECjvm2008. Some of those benchmarks go way south, 5x and more. So I guess I made a mistake somewhere, but here it is anyway, so you can see the my approach, which is reparenting barrier classes to shared card table barrier classes and then hoping that everything falls into place. ( The array copying barrier for C1/C2 is switched off for now. For now, this patch is based on 11.0.7. I will rebase this to the latest soon. From Alan.Bateman at oracle.com Thu Jul 16 07:09:10 2020 From: Alan.Bateman at oracle.com (Alan Bateman) Date: Thu, 16 Jul 2020 08:09:10 +0100 Subject: RFR: 8249543: Force DirectBufferAllocTest to run with -ExplicitGCInvokesConcurrent In-Reply-To: <8c6d42af3f36953d8b680c7c07c112e7ff75574b.camel@redhat.com> References: <751a1935bfcbc92d3c29acaeb8846a85a0ce1664.camel@redhat.com> <82bb8596-0f7b-6266-45d3-80f042ed4db0@oracle.com> <8c6d42af3f36953d8b680c7c07c112e7ff75574b.camel@redhat.com> Message-ID: <95f93bec-6dde-d532-ca49-2739be463dd5@oracle.com> On 15/07/2020 20:47, Roman Kennke wrote: > : > Why would it? Can you explain? (-ExplicitGCInvokesConcurrent is the > default for all GCs but Shenandoah, and has been that way forever. Do > you mean +ExplicitGCInvokesConcurrent?) > Just surprised that more tests aren't impacted. RMI DGC wouldn't work with a STW collector if explicit GC were disabled. I haven't heard of deployment using it with a concurrent GC but maybe it's okay. I'm just surprised that the RMI tests in the jdk repo are robust enough to pass, I would have guessed they might need attention (the test group is jdk_rmi but it sounds like you might be running those already). -Alan From rkennke at redhat.com Thu Jul 16 08:16:47 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 16 Jul 2020 10:16:47 +0200 Subject: First cut at a card table for Shenandoah In-Reply-To: References: Message-ID: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> Hi Bernd, could the performance drop come from the fact that we have no generations yet, and thus the barrier would track *all* stores (all the time) rather than only old->young stores? I'll give the patch a spin soon! Thank you! Roman On Thu, 2020-07-16 at 00:20 +0000, Mathiske, Bernd wrote: > Just having edited some card table operations into Shenandoah GC, > I have uploaded a webrev for perusal: > http://cr.openjdk.java.net/~bmathiske/cardshen/webrev.00/ > > The general idea of this patch is to mark some cards on the side as > if we needed a generational remembered set barrier, without changing > how Shenandoah works otherwise. Then we should be able to measure how > much mutator overhead this sort of extra barrier might cause. My > expected result was "similar to CMS or Parallel compared to Epsilon", > but that's not what I am seeing in first attempts to run SPECjvm2008. > Some of those benchmarks go way south, 5x and more. So I guess I made > a mistake somewhere, but here it is anyway, so you can see the my > approach, which is reparenting barrier classes to shared card table > barrier classes and then hoping that everything falls into place. ( > The array copying barrier for C1/C2 is switched off for now. > > For now, this patch is based on 11.0.7. I will rebase this to the > latest soon. > > From shade at redhat.com Thu Jul 16 08:17:22 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Thu, 16 Jul 2020 10:17:22 +0200 Subject: RFR: 8249560: Shenandoah: Fix racy GC request handling In-Reply-To: <62c8518e702fd11d7be34cc591f3c48bb9ad9ea7.camel@redhat.com> References: <62c8518e702fd11d7be34cc591f3c48bb9ad9ea7.camel@redhat.com> Message-ID: <28e3cd9f-fc34-99e0-2e16-3c905ae1c9a7@redhat.com> On 7/15/20 11:40 PM, rkennke at redhat.com wrote: > See discussion in bug: > > https://bugs.openjdk.java.net/browse/JDK-8249560 > > Webrev: > http://cr.openjdk.java.net/~rkennke/JDK-8249560/webrev.00/ OK, looks good. -- Thanks, -Aleksey From rkennke at redhat.com Thu Jul 16 09:19:21 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 16 Jul 2020 11:19:21 +0200 Subject: RFR: 8249543: Force DirectBufferAllocTest to run with -ExplicitGCInvokesConcurrent In-Reply-To: <95f93bec-6dde-d532-ca49-2739be463dd5@oracle.com> References: <751a1935bfcbc92d3c29acaeb8846a85a0ce1664.camel@redhat.com> <82bb8596-0f7b-6266-45d3-80f042ed4db0@oracle.com> <8c6d42af3f36953d8b680c7c07c112e7ff75574b.camel@redhat.com> <95f93bec-6dde-d532-ca49-2739be463dd5@oracle.com> Message-ID: On Thu, 2020-07-16 at 08:09 +0100, Alan Bateman wrote: > On 15/07/2020 20:47, Roman Kennke wrote: > > : > > Why would it? Can you explain? (-ExplicitGCInvokesConcurrent is the > > default for all GCs but Shenandoah, and has been that way forever. > > Do > > you mean +ExplicitGCInvokesConcurrent?) > > > Just surprised that more tests aren't impacted. RMI DGC wouldn't > work > with a STW collector if explicit GC were disabled. Yeah, but +ExplicitGCInvokesConcurrent doesn't disable System.gc(), it only turns it into a concurrent cycle with the calling thread waiting for it to comlete. That is semantically very close to what STW System.gc() does. DirectBufferAllocTest is only problematic because it is not reliable as it is, and the added concurrency makes it worse, as far as I can tell. > I haven't heard of > deployment using it with a concurrent GC but maybe it's okay. I'm > just > surprised that the RMI tests in the jdk repo are robust enough to > pass, > I would have guessed they might need attention (the test group is > jdk_rmi but it sounds like you might be running those already). I've just run it again with my setup and it all passes. You ok with the patch? Thanks, Roman From Alan.Bateman at oracle.com Thu Jul 16 10:57:34 2020 From: Alan.Bateman at oracle.com (Alan Bateman) Date: Thu, 16 Jul 2020 11:57:34 +0100 Subject: RFR: 8249543: Force DirectBufferAllocTest to run with -ExplicitGCInvokesConcurrent In-Reply-To: References: <751a1935bfcbc92d3c29acaeb8846a85a0ce1664.camel@redhat.com> <82bb8596-0f7b-6266-45d3-80f042ed4db0@oracle.com> <8c6d42af3f36953d8b680c7c07c112e7ff75574b.camel@redhat.com> <95f93bec-6dde-d532-ca49-2739be463dd5@oracle.com> Message-ID: <3b6fb18f-9ac5-e982-8c19-1c55da5ea560@oracle.com> On 16/07/2020 10:19, Roman Kennke wrote: > : > I've just run it again with my setup and it all passes. > > You ok with the patch? > Yes, I think it's okay to push. -Alan From gnu.andrew at redhat.com Thu Jul 16 14:52:13 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Thu, 16 Jul 2020 15:52:13 +0100 Subject: [RFR] [8u] 8u262-b10 Upstream Sync In-Reply-To: References: <6dab6f27-f9cd-fa1c-8b72-c7c1a069c45f@redhat.com> Message-ID: On 15/07/2020 06:39, Aleksey Shipilev wrote: > On 7/14/20 11:43 PM, Andrew Hughes wrote: >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/corba/merge.changeset > > Looks trivially good. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jaxp/merge.changeset > > Looks good. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jaxws/merge.changeset > > Looks trivially good. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/jdk/merge.changeset > > Looks good. > >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/hotspot/merge.changeset >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/langtools/merge.changeset >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/nashorn/merge.changeset >> http://cr.openjdk.java.net/~andrew/shenandoah-8/u262-b10/root/merge.changeset > > Look trivially good. > >> Ok to push? > > Yes. > Thanks, pushed. Sorry for the delay. -- Andrew :) Senior Free Java Software Engineer OpenJDK Package Owner Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From gnu.andrew at redhat.com Thu Jul 16 15:43:14 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Thu, 16 Jul 2020 16:43:14 +0100 Subject: RFR: [11u] shenandoah-jdk-11.0.8+9 & shenandoah-jdk-11.0.8+10 In-Reply-To: <087aa70b-aa1d-b915-9bd8-a0e46293d9a0@redhat.com> References: <290fcf7c-bb90-b5cc-5c53-3a84cae6e5e8@redhat.com> <087aa70b-aa1d-b915-9bd8-a0e46293d9a0@redhat.com> Message-ID: <1c3280e0-033c-6798-3839-c7f2721a1bb7@redhat.com> On 15/07/2020 06:40, Aleksey Shipilev wrote: > On 7/15/20 1:34 AM, Andrew Hughes wrote: >> Webrev: https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/ >> Merge changeset for b09: >> https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/11.0.8+9.merge >> Merge changeset for b10: >> https://cr.openjdk.java.net/~andrew/shenandoah-11/11.0.8/11.0.8+10.merge > > Looks good. > >> Ok to push? > > Yes. > Thanks. Pushed. -- Andrew :) Senior Free Java Software Engineer OpenJDK Package Owner Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From shade at redhat.com Thu Jul 16 16:32:32 2020 From: shade at redhat.com (shade at redhat.com) Date: Thu, 16 Jul 2020 16:32:32 +0000 Subject: hg: shenandoah/jdk11: Fix Windows build failure due to nesting macro Message-ID: <202007161632.06GGWXxx008507@aojmv0008.oracle.com> Changeset: 3e11142d9acf Author: shade Date: 2020-07-16 18:32 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk11/rev/3e11142d9acf Fix Windows build failure due to nesting macro ! src/hotspot/share/opto/escape.cpp From mathiske at amazon.com Thu Jul 16 16:57:32 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Thu, 16 Jul 2020 16:57:32 +0000 Subject: First cut at a card table for Shenandoah In-Reply-To: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> Message-ID: <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> Roman, That there is too much tracking is quite possible. I'll figure out a way to simulate a young generation. I'll also switch those individual barrier parts on one-by-one to see which one does the most damage. (Benchmarks that cratered: derby, mmpegaudio, scimark, serial. "compress" was fine.) Bernd ?On 7/16/20, 1:17 AM, "Roman Kennke" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Hi Bernd, could the performance drop come from the fact that we have no generations yet, and thus the barrier would track *all* stores (all the time) rather than only old->young stores? I'll give the patch a spin soon! Thank you! Roman On Thu, 2020-07-16 at 00:20 +0000, Mathiske, Bernd wrote: > Just having edited some card table operations into Shenandoah GC, > I have uploaded a webrev for perusal: > http://cr.openjdk.java.net/~bmathiske/cardshen/webrev.00/ > > The general idea of this patch is to mark some cards on the side as > if we needed a generational remembered set barrier, without changing > how Shenandoah works otherwise. Then we should be able to measure how > much mutator overhead this sort of extra barrier might cause. My > expected result was "similar to CMS or Parallel compared to Epsilon", > but that's not what I am seeing in first attempts to run SPECjvm2008. > Some of those benchmarks go way south, 5x and more. So I guess I made > a mistake somewhere, but here it is anyway, so you can see the my > approach, which is reparenting barrier classes to shared card table > barrier classes and then hoping that everything falls into place. ( > The array copying barrier for C1/C2 is switched off for now. > > For now, this patch is based on 11.0.7. I will rebase this to the > latest soon. > > From rkennke at redhat.com Thu Jul 16 17:38:03 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 16 Jul 2020 19:38:03 +0200 Subject: First cut at a card table for Shenandoah In-Reply-To: <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> Message-ID: <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> Instead of simulating, you could probably use my earlier generations- prototype? http://cr.openjdk.java.net/~rkennke/generation.patch Not sure if that easily fits, though, because it's dynamically shuffling young and old regions, instead of having a fixed boundary. Might be worth though. I haven't gotten around to try your stuff yet, but will do so soon! Thanks, Roman > That there is too much tracking is quite possible. I'll figure out a > way to simulate a young generation. I'll also switch those individual > barrier parts on one-by-one to see which one does the most damage. > (Benchmarks that cratered: derby, mmpegaudio, scimark, serial. > "compress" was fine.) > > Bernd > > ?On 7/16/20, 1:17 AM, "Roman Kennke" wrote: > > CAUTION: This email originated from outside of the organization. > Do not click links or open attachments unless you can confirm the > sender and know the content is safe. > > > > Hi Bernd, > > could the performance drop come from the fact that we have no > generations yet, and thus the barrier would track *all* stores > (all the > time) rather than only old->young stores? > > I'll give the patch a spin soon! > > Thank you! > Roman > > > On Thu, 2020-07-16 at 00:20 +0000, Mathiske, Bernd wrote: > > Just having edited some card table operations into Shenandoah > GC, > > I have uploaded a webrev for perusal: > > http://cr.openjdk.java.net/~bmathiske/cardshen/webrev.00/ > > > > The general idea of this patch is to mark some cards on the > side as > > if we needed a generational remembered set barrier, without > changing > > how Shenandoah works otherwise. Then we should be able to > measure how > > much mutator overhead this sort of extra barrier might cause. > My > > expected result was "similar to CMS or Parallel compared to > Epsilon", > > but that's not what I am seeing in first attempts to run > SPECjvm2008. > > Some of those benchmarks go way south, 5x and more. So I guess > I made > > a mistake somewhere, but here it is anyway, so you can see the > my > > approach, which is reparenting barrier classes to shared card > table > > barrier classes and then hoping that everything falls into > place. ( > > The array copying barrier for C1/C2 is switched off for now. > > > > For now, this patch is based on 11.0.7. I will rebase this to > the > > latest soon. > > > > > > From conniall at amazon.com Fri Jul 17 04:56:19 2020 From: conniall at amazon.com (Connaughton, Niall) Date: Fri, 17 Jul 2020 04:56:19 +0000 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 Message-ID: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. Happy to take any pointers or provide any more info that would help. Thanks, Niall From rkennke at redhat.com Fri Jul 17 05:10:48 2020 From: rkennke at redhat.com (Roman Kennke) Date: Fri, 17 Jul 2020 07:10:48 +0200 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> Message-ID: Hi Niall, Thanks for reporting this! Which version of jdk11u/Shenandoah are you using? Thanks, Roman Connaughton, Niall schrieb am Fr., 17. Juli 2020, 06:57: > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for > its potential for us. > > I?ve been doing some testing and have run across a couple of issues that > have me scratching my head. The first is Final Mark pauses are increasing > steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By > day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither > are the concurrent cycle times, so I?m not sure what?s behind this. The > application is running a 20GB heap, peaking around 35% live data heap > occupancy, and allocating ~1.3-1.5GB/s. > > What angles can I look at to dig into the cause of increasing Final Mark > pauses? I don?t see a lot of details on the Final Mark in the gc logs, and > there doesn?t seem to be much difference in the logs over time, except for > the pause duration increasing. Here?s an example of a Final Mark log for > before/after comparison: > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final > marking > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target > Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% > of total), 176M CSet, 1548 CSet regions > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of > total), 11 regions > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final > marking > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target > Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% > of total), 184M CSet, 1535 CSet regions > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of > total), 5 regions > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > The second issue I ran into was that the pacer seemed to be adding a lot > of latency. I couldn?t find any traces in the logs of the pacer?s activity. > The summary at shutdown from gc+stats is useful, but having some signs of > the pacer in the gc logs as the application runs would help correlate > against other logs showing latency spikes. Is there any way to get more > visibility on the pacer? Disabling the pacer removed the latency impact and > we started seeing some really positive signs on the latency. I was > expecting that we?d just see more degenerated GC and the latency would be > similar, but this wasn?t the case. > > I?m generally happy running with the pacer disabled, especially as it > seems we?ll have more visibility into degenerated GCs than we will over the > pacer, so we can track regression more easily. So I?m asking this more for > understanding than solving a blocking issue. > > Happy to take any pointers or provide any more info that would help. > > Thanks, > Niall > > From shade at redhat.com Fri Jul 17 06:47:49 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 17 Jul 2020 08:47:49 +0200 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> Message-ID: <234d3214-3f85-f86c-fce9-c1d955899ef0@redhat.com> On 7/17/20 6:56 AM, Connaughton, Niall wrote: > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a > lot of details on the Final Mark in the gc logs, -Xlog:gc+stats would give you verbose per-cycle pause details on a recent enough sh/jdk11 (including the one released with 11.0.8). > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t > find any traces in the logs of the pacer?s activity. There is no bindings to per-cycle stats, alas. I had a patch before, but never actually pushed it to inclusion: https://bugs.openjdk.java.net/browse/JDK-8249649 > Disabling the pacer removed the latency impact > and we started seeing some really positive signs on the latency. I was expecting that we?d just > see more degenerated GC and the latency would be similar, but this wasn?t the case. Again, recent improvements in pacer might resolve this. Recent sh/jdk11 (including the one released with 11.0.8) has most of them. Please try with 11.0.8+? -- Thanks, -Aleksey From shade at redhat.com Fri Jul 17 07:16:20 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 17 Jul 2020 09:16:20 +0200 Subject: RFR (S) 8249649: Shenandoah: provide per-cycle pacing stats Message-ID: RFE: https://bugs.openjdk.java.net/browse/JDK-8249649 Current pacer statistics is printing only at the end of the run. It would be convenient to have it in per-cycle statistics too. Webrev: https://cr.openjdk.java.net/~shade/8249649/webrev.01/ Testing: hotspot_gc_shenandoah; tier{1,2} with Shenandoah; eyeballing gc logs -- Thanks, -Aleksey From shade at redhat.com Fri Jul 17 09:01:47 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 17 Jul 2020 11:01:47 +0200 Subject: [11] 2020-07-17, Bulk backports to sh/jdk11 Message-ID: <539f4e25-4791-49d9-f991-ab60619e042c@redhat.com> https://cr.openjdk.java.net/~shade/shenandoah/backports/jdk11-20200717/webrev.01/ This backports simple fixes, mostly improvements in tests: [backport] 8248041: Shenandoah: pre-Full GC root updates may miss some roots [backport] 8247860: Shenandoah: add update watermark line in rich assert failure message [backport] 8247751: Shenandoah: options tests should run with smaller heaps [backport] 8247754: Shenandoah: mxbeans tests can be shorter [backport] 8247757: Shenandoah: split heavy tests by heuristics to improve parallelism Testing: hotspot_gc_shenandoah {fastdebug,release} -- Thanks, -Aleksey From rkennke at redhat.com Fri Jul 17 09:56:26 2020 From: rkennke at redhat.com (Roman Kennke) Date: Fri, 17 Jul 2020 11:56:26 +0200 Subject: RFR (S) 8249649: Shenandoah: provide per-cycle pacing stats In-Reply-To: References: Message-ID: Looks good to me. Thank you! Roman On Fri, 2020-07-17 at 09:16 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > RFE: > https://bugs.openjdk.java.net/browse/JDK-8249649 > > Current pacer statistics is printing only at the end of the run. It > would be convenient to have it > in per-cycle statistics too. > > Webrev: > https://cr.openjdk.java.net/~shade/8249649/webrev.01/ > > Testing: hotspot_gc_shenandoah; tier{1,2} with Shenandoah; eyeballing > gc logs > From rkennke at redhat.com Fri Jul 17 10:56:39 2020 From: rkennke at redhat.com (Roman Kennke) Date: Fri, 17 Jul 2020 12:56:39 +0200 Subject: [11] 2020-07-17, Bulk backports to sh/jdk11 In-Reply-To: <539f4e25-4791-49d9-f991-ab60619e042c@redhat.com> References: <539f4e25-4791-49d9-f991-ab60619e042c@redhat.com> Message-ID: <79984f3ca2d8043c32fdf7465623f72639c364d7.camel@redhat.com> This looks good to me! Thanks, Roman On Fri, 2020-07-17 at 11:01 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > https://cr.openjdk.java.net/~shade/shenandoah/backports/jdk11-20200717/webrev.01/ > > This backports simple fixes, mostly improvements in tests: > > [backport] 8248041: Shenandoah: pre-Full GC root updates may miss > some roots > [backport] 8247860: Shenandoah: add update watermark line in rich > assert failure message > [backport] 8247751: Shenandoah: options tests should run with > smaller heaps > [backport] 8247754: Shenandoah: mxbeans tests can be shorter > [backport] 8247757: Shenandoah: split heavy tests by heuristics to > improve parallelism > > Testing: hotspot_gc_shenandoah {fastdebug,release} > From shade at redhat.com Fri Jul 17 11:21:41 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 17 Jul 2020 13:21:41 +0200 Subject: [11] 2020-07-17, Bulk backports to sh/jdk11 In-Reply-To: <79984f3ca2d8043c32fdf7465623f72639c364d7.camel@redhat.com> References: <539f4e25-4791-49d9-f991-ab60619e042c@redhat.com> <79984f3ca2d8043c32fdf7465623f72639c364d7.camel@redhat.com> Message-ID: <193d5032-e158-23db-8c27-fef3b829fea9@redhat.com> On 7/17/20 12:56 PM, Roman Kennke wrote: > This looks good to me! Pushed! -- Thanks, -Aleksey From zgu at redhat.com Fri Jul 17 13:34:46 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Fri, 17 Jul 2020 09:34:46 -0400 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> Message-ID: <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> Hi Niall, By default, class unloading is disabled for Shenandoah in JDK11. Accumulated class loaders may prolong final mark. You may want to try: jcmd VM.classloader_stats If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark Thanks, -Zhengyu On 7/17/20 12:56 AM, Connaughton, Niall wrote: > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > Happy to take any pointers or provide any more info that would help. > > Thanks, > Niall > From conniall at amazon.com Fri Jul 17 15:13:53 2020 From: conniall at amazon.com (Connaughton, Niall) Date: Fri, 17 Jul 2020 15:13:53 +0000 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> Message-ID: <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. Thanks, Niall ?On 7/17/20, 06:35, "Zhengyu Gu" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Hi Niall, By default, class unloading is disabled for Shenandoah in JDK11. Accumulated class loaders may prolong final mark. You may want to try: jcmd VM.classloader_stats If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark Thanks, -Zhengyu On 7/17/20 12:56 AM, Connaughton, Niall wrote: > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > Happy to take any pointers or provide any more info that would help. > > Thanks, > Niall > From zgu at redhat.com Fri Jul 17 15:37:51 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Fri, 17 Jul 2020 11:37:51 -0400 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> Message-ID: <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> On 7/17/20 11:13 AM, Connaughton, Niall wrote: > Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? class unloading is also performed during final mark, so it impacts final mark pause as well, but probably without this accumulation effect. We have a parameter, ShenandoahUnloadClassesFrequency, to control frequency of class unloading when it is enabled, default is once every 5 GC cycles. You may want to tune the parameter to achieve some sort of balance. Thanks, -Zhengyu > > Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. > > Thanks, > Niall > > ?On 7/17/20, 06:35, "Zhengyu Gu" wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > Hi Niall, > > By default, class unloading is disabled for Shenandoah in JDK11. > Accumulated class loaders may prolong final mark. You may want to try: > > jcmd VM.classloader_stats > > If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark > > Thanks, > > -Zhengyu > > > > On 7/17/20 12:56 AM, Connaughton, Niall wrote: > > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > > > Happy to take any pointers or provide any more info that would help. > > > > Thanks, > > Niall > > > > From kdnilsen at amazon.com Sat Jul 18 23:09:17 2020 From: kdnilsen at amazon.com (Nilsen, Kelvin) Date: Sat, 18 Jul 2020 23:09:17 +0000 Subject: Question about SATB barrier Message-ID: I've observed that the various SATB barrier implementations have two different forms. In one form, the address to be overwritten is supplied as an argument and the previous content held at that address is fetched and stored into a SATB buffer. In the other form, the address to be overwritten is not supplied and the "keep-alive" value is supplied as the pre-initialized content of the pre-val register. In this second form, the content of the pre-val register is stored into the SATB buffer. My question is whether the second form is used with write operations for which the "compiler" is able to determine through static analysis the previous content of the value to be overwritten, or is it used only for other situations that require the logging of a keep-alive pointer value? I have been assuming the latter, but I need to make sure I understand how these two forms are used. Thanks. ? From rkennke at redhat.com Sun Jul 19 11:15:01 2020 From: rkennke at redhat.com (Roman Kennke) Date: Sun, 19 Jul 2020 13:15:01 +0200 Subject: Question about SATB barrier In-Reply-To: References: Message-ID: Hi Kelvin, The form where 'pre-val' is passed as an input instead of loaded from a field is used by the cases where the previous-value needs to be explicitely kept-alive: - Most prominently, in the intrinsics for Reference.get(): we need to keep the referent alive because Referent.get() makes it strongly reachable, and it may otherwise get lost by SATB - Stuff like xchg and cmpxchg, where the previous value is loaded anyway, and we only need to enqueue it. - a few other cases, especially in the runtime impl It is not meant for the compiler-optimization. I believe the compiler would figure it out if it can deduce the previously-stored-value in a safe way. There is also the scenario of the I-U mode. This is basically the inverse of SATB, and we always enqueue the update-value of a store. There we only ever need the simpler form of the enqueueing barrier. As a bonus we don't need to keep-alive Referent.get() there, which means we can even reclaim weakrefs that are accessed during conc-mark. Some users love that feature ;-) Cheers, Roman On Sat, 2020-07-18 at 23:09 +0000, Nilsen, Kelvin wrote: > I've observed that the various SATB barrier implementations have two > different forms. > > In one form, the address to be overwritten is supplied as an argument > and the previous content held at that address is fetched and stored > into a SATB buffer. In the other form, the address to be overwritten > is not supplied and the "keep-alive" value is supplied as the pre- > initialized content of the pre-val register. In this second form, > the content of the pre-val register is stored into the SATB buffer. > > My question is whether the second form is used with write operations > for which the "compiler" is able to determine through static analysis > the previous content of the value to be overwritten, or is it used > only for other situations that require the logging of a keep-alive > pointer value? > > I have been assuming the latter, but I need to make sure I understand > how these two forms are used. > > Thanks. > > > > ? > From kdnilsen at amazon.com Sun Jul 19 17:13:00 2020 From: kdnilsen at amazon.com (Nilsen, Kelvin) Date: Sun, 19 Jul 2020 17:13:00 +0000 Subject: Question about SATB barrier Message-ID: <1D4139CC-0E84-4AE6-9E10-4AB955FADFCB@amazon.com> Thanks for this explanation. It sounds like I'm mostly on the right path still (or at least on a legitimate path). I'm in the midst of crafting an intergenerational write-barrier implementation that builds on the existing SATB logging code. I'm adding to the existing log each address that is overwritten in addition to the overwritten pointer values. I tag the new values so they won't be confused by the GC when it processes the content of the SATB log. (I've got a prototype working but have not yet covered all of the different code generation models. I'm currently working on those.) I probably do not yet fully appreciate all of the subtle nuances associated with the pre-val passed in scenarios. It looks like some of these situations are going to require some special handling that is not covered by my existing approach. But I believe those will be relatively rare operations and initial performance measurements to characterize the "cost" of this approach can ignore the special cases. Please let me know if you anticipate any problems with my general approach. ?On 7/19/20, 4:15 AM, "Roman Kennke" wrote: Hi Kelvin, The form where 'pre-val' is passed as an input instead of loaded from a field is used by the cases where the previous-value needs to be explicitely kept-alive: - Most prominently, in the intrinsics for Reference.get(): we need to keep the referent alive because Referent.get() makes it strongly reachable, and it may otherwise get lost by SATB - Stuff like xchg and cmpxchg, where the previous value is loaded anyway, and we only need to enqueue it. - a few other cases, especially in the runtime impl It is not meant for the compiler-optimization. I believe the compiler would figure it out if it can deduce the previously-stored-value in a safe way. There is also the scenario of the I-U mode. This is basically the inverse of SATB, and we always enqueue the update-value of a store. There we only ever need the simpler form of the enqueueing barrier. As a bonus we don't need to keep-alive Referent.get() there, which means we can even reclaim weakrefs that are accessed during conc-mark. Some users love that feature ;-) Cheers, Roman On Sat, 2020-07-18 at 23:09 +0000, Nilsen, Kelvin wrote: > I've observed that the various SATB barrier implementations have two > different forms. > > In one form, the address to be overwritten is supplied as an argument > and the previous content held at that address is fetched and stored > into a SATB buffer. In the other form, the address to be overwritten > is not supplied and the "keep-alive" value is supplied as the pre- > initialized content of the pre-val register. In this second form, > the content of the pre-val register is stored into the SATB buffer. > > My question is whether the second form is used with write operations > for which the "compiler" is able to determine through static analysis > the previous content of the value to be overwritten, or is it used > only for other situations that require the logging of a keep-alive > pointer value? > > I have been assuming the latter, but I need to make sure I understand > how these two forms are used. > > Thanks. > > > > > From shade at redhat.com Mon Jul 20 09:07:32 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Mon, 20 Jul 2020 11:07:32 +0200 Subject: [11u] 2020-07-20, Bulk backports to sh/jdk11 Message-ID: <3ae9fc75-993d-6d21-4dfa-eba35411e7e7@redhat.com> http://cr.openjdk.java.net/~shade/shenandoah/backports/jdk11-20200720/webrev.01/ This backports a few improvements, notably in Pacer: [backport] 8248652: Shenandoah: SATB buffer handling may assume no forwarded objects [backport] 8247367: Shenandoah: pacer should wait on lock instead of exponential backoff [backport] 8247593: Shenandoah: should not block pacing reporters [backport] 8249649: Shenandoah: provide per-cycle pacing stats Testing: hotspot_gc_shenandoah {fastdebug,release}, tier{1,2} with Shenandoah (running, expecting no new failures) -- Thanks, -Aleksey From rkennke at redhat.com Mon Jul 20 10:00:41 2020 From: rkennke at redhat.com (Roman Kennke) Date: Mon, 20 Jul 2020 12:00:41 +0200 Subject: [11u] 2020-07-20, Bulk backports to sh/jdk11 In-Reply-To: <3ae9fc75-993d-6d21-4dfa-eba35411e7e7@redhat.com> References: <3ae9fc75-993d-6d21-4dfa-eba35411e7e7@redhat.com> Message-ID: <67a9cd7523e56b0a4a53822fb9349423636d9fc5.camel@redhat.com> Hi Aleksey, the changes look good to me! Thank you! Roman On Mon, 2020-07-20 at 11:07 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > http://cr.openjdk.java.net/~shade/shenandoah/backports/jdk11-20200720/webrev.01/ > > This backports a few improvements, notably in Pacer: > [backport] 8248652: Shenandoah: SATB buffer handling may assume no > forwarded objects > [backport] 8247367: Shenandoah: pacer should wait on lock instead of > exponential backoff > [backport] 8247593: Shenandoah: should not block pacing reporters > [backport] 8249649: Shenandoah: provide per-cycle pacing stats > > Testing: hotspot_gc_shenandoah {fastdebug,release}, tier{1,2} with > Shenandoah (running, expecting no > new failures) > From shade at redhat.com Mon Jul 20 10:28:14 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Mon, 20 Jul 2020 12:28:14 +0200 Subject: [11u] 2020-07-20, Bulk backports to sh/jdk11 In-Reply-To: <67a9cd7523e56b0a4a53822fb9349423636d9fc5.camel@redhat.com> References: <3ae9fc75-993d-6d21-4dfa-eba35411e7e7@redhat.com> <67a9cd7523e56b0a4a53822fb9349423636d9fc5.camel@redhat.com> Message-ID: <80658fc4-d868-3dc1-57d5-2fc754e4ff8c@redhat.com> On 7/20/20 12:00 PM, Roman Kennke wrote: > the changes look good to me! Pushed! -- Thanks, -Aleksey From shade at redhat.com Mon Jul 20 14:21:04 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Mon, 20 Jul 2020 16:21:04 +0200 Subject: [8u, 11u] RFR: Shenandoah: enable low-frequency STW class unloading Message-ID: Hi, I think we have gathered enough real world data to consider enabling STW class unloading by default, but on much lower frequency. The concurrent class unloading is enabled by default since JDK 14, because we have enough infrastructure there. In 8u and 11u, we don't have such luck, and so we can only do STW class unloading, which adds to Final Mark time when it happens. This is why it was disabled in 8u and 11u by default, asking users to explicitly enable it. There are still good reasons to do it, even with STW: a) String Table cruft accumulates over time; b) CLDG cruft accumulates over time; c) The connection between the two above and the disabled -CUwCM is very opaque; The long-run performance stability would be improved if we accepted a longer Final Mark pause every once in a while. It seems doing this every 100th cycle is a good spot? In many cases I've seen, the GC runs a few times per minute, which means we would get the several class unloading cycles every hour. That should be enough to amortize costs, and also be frequent enough to see as regular outlier in users' monitoring, if they want to turn it off. 8u patch: https://cr.openjdk.java.net/~shade/shenandoah/regular-stw-cu/webrev.01.8u/ Passes: hotspot_gc_shenandoah, ad-hoc runs 11u patch: https://cr.openjdk.java.net/~shade/shenandoah/regular-stw-cu/webrev.01.11u/ Passes: hotspot_gc_shenandoah, tier{1,2} with Shenandoah Thoughts? -- Thanks, -Aleksey From rkennke at redhat.com Mon Jul 20 16:25:36 2020 From: rkennke at redhat.com (Roman Kennke) Date: Mon, 20 Jul 2020 18:25:36 +0200 Subject: [8u, 11u] RFR: Shenandoah: enable low-frequency STW class unloading In-Reply-To: References: Message-ID: <381aee40eeaabbeda130ae3fa12c03b5dbf4f10f.camel@redhat.com> Ok I guess this should work for the start. It varies *a lot* depending on the workload though: some workloads might never need it, some might need it at higher frequency. It would be good to have a better trigger like some threshold(s) on number of loaded classes or stringtable occupancy. Anyway, let's go with your patches for the start. Roman On Mon, 2020-07-20 at 16:21 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > Hi, > > I think we have gathered enough real world data to consider enabling > STW class unloading by default, > but on much lower frequency. The concurrent class unloading is > enabled by default since JDK 14, > because we have enough infrastructure there. In 8u and 11u, we don't > have such luck, and so we can > only do STW class unloading, which adds to Final Mark time when it > happens. This is why it was > disabled in 8u and 11u by default, asking users to explicitly enable > it. > > There are still good reasons to do it, even with STW: > a) String Table cruft accumulates over time; > b) CLDG cruft accumulates over time; > c) The connection between the two above and the disabled -CUwCM is > very opaque; > > The long-run performance stability would be improved if we accepted a > longer Final Mark pause every > once in a while. It seems doing this every 100th cycle is a good > spot? In many cases I've seen, the > GC runs a few times per minute, which means we would get the several > class unloading cycles every > hour. That should be enough to amortize costs, and also be frequent > enough to see as regular outlier > in users' monitoring, if they want to turn it off. > > 8u patch: > > https://cr.openjdk.java.net/~shade/shenandoah/regular-stw-cu/webrev.01.8u/ > Passes: hotspot_gc_shenandoah, ad-hoc runs > > 11u patch: > > https://cr.openjdk.java.net/~shade/shenandoah/regular-stw-cu/webrev.01.11u/ > Passes: hotspot_gc_shenandoah, tier{1,2} with Shenandoah > > Thoughts? > From shade at redhat.com Tue Jul 21 06:15:31 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 21 Jul 2020 08:15:31 +0200 Subject: [8u, 11u] RFR: Shenandoah: enable low-frequency STW class unloading In-Reply-To: <381aee40eeaabbeda130ae3fa12c03b5dbf4f10f.camel@redhat.com> References: <381aee40eeaabbeda130ae3fa12c03b5dbf4f10f.camel@redhat.com> Message-ID: <776c244e-bee3-9ef6-111a-f972afa6e4d4@redhat.com> On 7/20/20 6:25 PM, Roman Kennke wrote: > Ok I guess this should work for the start. > > It varies *a lot* depending on the workload though: some workloads > might never need it, some might need it at higher frequency. It would > be good to have a better trigger like some threshold(s) on number of > loaded classes or stringtable occupancy. Anyway, let's go with your > patches for the start. Right. Pushed. -- Thanks, -Aleksey From shade at redhat.com Tue Jul 21 06:14:59 2020 From: shade at redhat.com (shade at redhat.com) Date: Tue, 21 Jul 2020 06:14:59 +0000 Subject: hg: shenandoah/jdk11: Shenandoah: enable low-frequency STW class unloading Message-ID: <202007210615.06L6F0Jd001645@aojmv0008.oracle.com> Changeset: 1def25c0a60e Author: shade Date: 2020-07-20 16:05 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk11/rev/1def25c0a60e Shenandoah: enable low-frequency STW class unloading ! src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp ! src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp ! test/hotspot/jtreg/gc/shenandoah/options/TestClassUnloadingArguments.java From shade at redhat.com Tue Jul 21 08:20:40 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 21 Jul 2020 10:20:40 +0200 Subject: [8u] 2020-07-21, Bulk backports to sh/jdk8 Message-ID: <887c9967-0c8d-4dad-f301-b62e58b390d4@redhat.com> https://cr.openjdk.java.net/~shade/shenandoah/backports/jdk8u-20200721/webrev.01/ This backports a few changes to 8u, mostly test improvements: [backport] 8242375: Shenandoah: Remove ShenandoahHeuristic::record_gc_start/end methods [backport] 8247860: Shenandoah: add update watermark line in rich assert failure message [backport] 8247751: Shenandoah: options tests should run with smaller heaps [backport] 8247754: Shenandoah: mxbeans tests can be shorter [backport] 8247757: Shenandoah: split heavy tests by heuristics to improve parallelism Testing: hotspot_gc_shenandoah {fastdebug,release} -- Thanks, -Aleksey From rkennke at redhat.com Tue Jul 21 10:36:29 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 21 Jul 2020 12:36:29 +0200 Subject: RFR: 8249787: Make TestGCLocker more resilient with concurrent GCs Message-ID: <7af66d4a831ca2af89584ce9b99f478aaee434e7.camel@redhat.com> TestGCLocker seems to be made with the assumption that GCs are triggered on allocation failure. It has a somewhat complicated machinery to generate some GC pressure and especially to free up some memory in its artificial MemoryUser. This leads to some weird interactions with Shenandoah control machinery. For example, it frees memory when heap usage is >75% AND a certain time has passed since it last freed memory (500ms). In those 500ms, it will keep on allocating chunks of memory, eventually running OOM because it keeps holding on to those chunks, while the GC is running like mad trying to free up memory, but can't because the stupid up doesn't let go. However, it will still keep resetting timeSinceLastGC because of some tiny objects getting freed-up since last time (not enough to prevent OOM though). Bug: https://bugs.openjdk.java.net/browse/JDK-8249787 Proposed solution is an option to pass-in minFreeCriticalWaitMS. This way we can let Shenandoah pass-in 0 and disable that check, which seems pointless anyway with a concurrent GC that might be triggered earlier than on allocation-failure. Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249787/webrev.00/ Testing: TestGCLockerWithShenandoah with various settings, hotspot_gc_shenandoah What do you think? Does it make sense? Thanks, Roman From rkennke at redhat.com Tue Jul 21 10:40:00 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 21 Jul 2020 12:40:00 +0200 Subject: [8u] 2020-07-21, Bulk backports to sh/jdk8 In-Reply-To: <887c9967-0c8d-4dad-f301-b62e58b390d4@redhat.com> References: <887c9967-0c8d-4dad-f301-b62e58b390d4@redhat.com> Message-ID: Yep, looks good! Thanks! Roman On Tue, 2020-07-21 at 10:20 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > https://cr.openjdk.java.net/~shade/shenandoah/backports/jdk8u-20200721/webrev.01/ > > This backports a few changes to 8u, mostly test improvements: > [backport] 8242375: Shenandoah: Remove > ShenandoahHeuristic::record_gc_start/end methods > [backport] 8247860: Shenandoah: add update watermark line in rich > assert failure message > [backport] 8247751: Shenandoah: options tests should run with > smaller heaps > [backport] 8247754: Shenandoah: mxbeans tests can be shorter > [backport] 8247757: Shenandoah: split heavy tests by heuristics to > improve parallelism > > Testing: hotspot_gc_shenandoah {fastdebug,release} > From rkennke at redhat.com Tue Jul 21 10:45:50 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 21 Jul 2020 12:45:50 +0200 Subject: RFR: 8249787: Make TestGCLocker more resilient with concurrent GCs Message-ID: (Resending because wrong recipient in previous attempt) TestGCLocker seems to be made with the assumption that GCs are triggered on allocation failure. It has a somewhat complicated machinery to generate some GC pressure and especially to free up some memory in its artificial MemoryUser. This leads to some weird interactions with Shenandoah control machinery. For example, it frees memory when heap usage is >75% AND a certain time has passed since it last freed memory (500ms). In those 500ms, it will keep on allocating chunks of memory, eventually running OOM because it keeps holding on to those chunks, while the GC is running like mad trying to free up memory, but can't because the stupid up doesn't let go. However, it will still keep resetting timeSinceLastGC because of some tiny objects getting freed-up since last time (not enough to prevent OOM though). Bug: https://bugs.openjdk.java.net/browse/JDK-8249787 Proposed solution is an option to pass-in minFreeCriticalWaitMS. This way we can let Shenandoah pass-in 0 and disable that check, which seems pointless anyway with a concurrent GC that might be triggered earlier than on allocation-failure. Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249787/webrev.00/ Testing: TestGCLockerWithShenandoah with various settings, hotspot_gc_shenandoah What do you think? Does it make sense? Thanks, Roman From thomas.stuefe at gmail.com Tue Jul 21 10:56:26 2020 From: thomas.stuefe at gmail.com (=?UTF-8?Q?Thomas_St=C3=BCfe?=) Date: Tue, 21 Jul 2020 12:56:26 +0200 Subject: [8u, 11u] RFR: Shenandoah: enable low-frequency STW class unloading In-Reply-To: References: Message-ID: Out of curiosity, the GCs induced by bumping against the Metaspace GC threshold when loading classes would still work and unload stuff? ..Thomas On Mon, Jul 20, 2020 at 4:21 PM Aleksey Shipilev wrote: > Hi, > > I think we have gathered enough real world data to consider enabling STW > class unloading by default, > but on much lower frequency. The concurrent class unloading is enabled by > default since JDK 14, > because we have enough infrastructure there. In 8u and 11u, we don't have > such luck, and so we can > only do STW class unloading, which adds to Final Mark time when it > happens. This is why it was > disabled in 8u and 11u by default, asking users to explicitly enable it. > > There are still good reasons to do it, even with STW: > a) String Table cruft accumulates over time; > b) CLDG cruft accumulates over time; > c) The connection between the two above and the disabled -CUwCM is very > opaque; > > The long-run performance stability would be improved if we accepted a > longer Final Mark pause every > once in a while. It seems doing this every 100th cycle is a good spot? In > many cases I've seen, the > GC runs a few times per minute, which means we would get the several class > unloading cycles every > hour. That should be enough to amortize costs, and also be frequent enough > to see as regular outlier > in users' monitoring, if they want to turn it off. > > 8u patch: > > https://cr.openjdk.java.net/~shade/shenandoah/regular-stw-cu/webrev.01.8u/ > Passes: hotspot_gc_shenandoah, ad-hoc runs > > 11u patch: > > https://cr.openjdk.java.net/~shade/shenandoah/regular-stw-cu/webrev.01.11u/ > Passes: hotspot_gc_shenandoah, tier{1,2} with Shenandoah > > Thoughts? > > -- > Thanks, > -Aleksey > > From shade at redhat.com Tue Jul 21 12:01:58 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 21 Jul 2020 14:01:58 +0200 Subject: [8u] 2020-07-21, Bulk backports to sh/jdk8 In-Reply-To: References: <887c9967-0c8d-4dad-f301-b62e58b390d4@redhat.com> Message-ID: On 7/21/20 12:40 PM, Roman Kennke wrote: > Yep, looks good! Thanks! Pushed. -- Thanks, -Aleksey From shade at redhat.com Tue Jul 21 12:01:52 2020 From: shade at redhat.com (shade at redhat.com) Date: Tue, 21 Jul 2020 12:01:52 +0000 Subject: hg: shenandoah/jdk8/hotspot: 5 new changesets Message-ID: <202007211201.06LC1qBg001815@aojmv0008.oracle.com> Changeset: 5c1d47eb139e Author: zgu Date: 2020-04-08 11:21 -0400 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/5c1d47eb139e [backport] 8242375: Shenandoah: Remove ShenandoahHeuristic::record_gc_start/end methods Reviewed-by: shade, rkennke ! src/share/vm/gc_implementation/shenandoah/heuristics/shenandoahHeuristics.cpp ! src/share/vm/gc_implementation/shenandoah/heuristics/shenandoahHeuristics.hpp ! src/share/vm/gc_implementation/shenandoah/shenandoahUtils.cpp Changeset: 78b721a8ce6b Author: shade Date: 2020-06-18 19:14 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/78b721a8ce6b [backport] 8247860: Shenandoah: add update watermark line in rich assert failure message Reviewed-by: rkennke ! src/share/vm/gc_implementation/shenandoah/shenandoahAsserts.cpp Changeset: 1712d04788e7 Author: shade Date: 2020-06-17 17:21 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/1712d04788e7 [backport] 8247751: Shenandoah: options tests should run with smaller heaps Reviewed-by: zgu, rkennke ! test/gc/shenandoah/options/TestArgumentRanges.java ! test/gc/shenandoah/options/TestClassUnloadingArguments.java ! test/gc/shenandoah/options/TestExplicitGC.java ! test/gc/shenandoah/options/TestExplicitGCNoConcurrent.java ! test/gc/shenandoah/options/TestHeuristicsUnlock.java ! test/gc/shenandoah/options/TestHumongousThresholdArgs.java ! test/gc/shenandoah/options/TestModeUnlock.java ! test/gc/shenandoah/options/TestThreadCounts.java ! test/gc/shenandoah/options/TestThreadCountsOverride.java ! test/gc/shenandoah/options/TestWrongBarrierDisable.java ! test/gc/shenandoah/options/TestWrongBarrierEnable.java Changeset: 654f36595763 Author: shade Date: 2020-06-17 17:21 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/654f36595763 [backport] 8247754: Shenandoah: mxbeans tests can be shorter Reviewed-by: rkennke ! test/gc/shenandoah/mxbeans/TestChurnNotifications.java ! test/gc/shenandoah/mxbeans/TestPauseNotifications.java Changeset: e276bbfff22f Author: shade Date: 2020-06-17 17:22 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/e276bbfff22f [backport] 8247757: Shenandoah: split heavy tests by heuristics to improve parallelism Reviewed-by: rkennke ! test/gc/shenandoah/TestAllocHumongousFragment.java ! test/gc/shenandoah/TestAllocIntArrays.java ! test/gc/shenandoah/TestAllocObjectArrays.java ! test/gc/shenandoah/TestAllocObjects.java ! test/gc/shenandoah/TestLotsOfCycles.java ! test/gc/shenandoah/TestRegionSampling.java ! test/gc/shenandoah/TestRetainObjects.java ! test/gc/shenandoah/TestSieveObjects.java ! test/gc/shenandoah/mxbeans/TestChurnNotifications.java ! test/gc/shenandoah/mxbeans/TestPauseNotifications.java From rkennke at redhat.com Tue Jul 21 14:38:19 2020 From: rkennke at redhat.com (rkennke at redhat.com) Date: Tue, 21 Jul 2020 16:38:19 +0200 Subject: RFR: 8249801: Shenandoah: Clear soft-refs on requested GC cycle Message-ID: <03a76972dec4222285cf7aff307714448b6029a3.camel@redhat.com> Shenandoah is not clearing soft-refs on a requested (by System.gc() or implicitely) GC. We should do that. It is breaking java/util/logging/Logger/bundleLeak/BundleTest.java that exects soft- refs to evevtually be cleared by System.gc(). Bug: https://bugs.openjdk.java.net/browse/JDK-8249801 Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249801/webrev.01/ Testing: hotspot_gc_shenandoah, tier1&2 with Shenandoah Ok? Thanks, Roman From shade at redhat.com Tue Jul 21 14:44:48 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 21 Jul 2020 16:44:48 +0200 Subject: RFR: 8249801: Shenandoah: Clear soft-refs on requested GC cycle In-Reply-To: <03a76972dec4222285cf7aff307714448b6029a3.camel@redhat.com> References: <03a76972dec4222285cf7aff307714448b6029a3.camel@redhat.com> Message-ID: <5115c525-afdf-b16a-ffa0-71ac227985ba@redhat.com> On 7/21/20 4:38 PM, rkennke at redhat.com wrote: > Webrev: > http://cr.openjdk.java.net/~rkennke/JDK-8249801/webrev.01/ Looks fine. Let's leave the _gc_requested flag alone (the intent to read it at the beginning), and instead poll explicit/implicit gc requests, plus fix the comment: diff -r e0788e02e0de src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp Tue Jul 21 14:07:45 2020 +0200 +++ b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp Tue Jul 21 16:43:46 2020 +0200 @@ -175,6 +175,6 @@ // Blow all soft references on this cycle, if handling allocation failure, - // or we are requested to do so unconditionally. - if (alloc_failure_pending || ShenandoahAlwaysClearSoftRefs) { + // either implicit or explicit GC request, or we are requested to do so unconditionally. + if (alloc_failure_pending || implicit_gc_requested || explicit_gc_requested || ShenandoahAlwaysClearSoftRefs) { heap->soft_ref_policy()->set_should_clear_all_soft_refs(true); } -- Thanks, -Aleksey From rkennke at redhat.com Tue Jul 21 20:17:54 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 21 Jul 2020 22:17:54 +0200 Subject: RFR: 8249787: Make TestGCLocker more resilient with concurrent GCs Message-ID: <0c1b87939518f89f4d056d71471cdd3fe1a4a275.camel@redhat.com> (Re-sending because the original hasn't gotten through to hotspot-gc- dev *sigh*) TestGCLocker seems to be made with the assumption that GCs are triggered on allocation failure. It has a somewhat complicated machinery to generate some GC pressure and especially to free up some memory in its artificial MemoryUser. This leads to some weird interactions with Shenandoah control machinery. For example, it frees memory when heap usage is >75% AND a certain time has passed since it last freed memory (500ms). In those 500ms, it will keep on allocating chunks of memory, eventually running OOM because it keeps holding on to those chunks, while the GC is running like mad trying to free up memory, but can't because the stupid up doesn't let go. However, it will still keep resetting timeSinceLastGC because of some tiny objects getting freed-up since last time (not enough to prevent OOM though). Bug: https://bugs.openjdk.java.net/browse/JDK-8249787 Proposed solution is an option to pass-in minFreeCriticalWaitMS. This way we can let Shenandoah pass-in 0 and disable that check, which seems pointless anyway with a concurrent GC that might be triggered earlier than on allocation-failure. Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249787/webrev.00/ Testing: TestGCLockerWithShenandoah with various settings, hotspot_gc_shenandoah What do you think? Does it make sense? Thanks, Roman From rkennke at redhat.com Wed Jul 22 12:55:50 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 22 Jul 2020 14:55:50 +0200 Subject: RFR: 8249877: Shenandoah: Report number of dead weak oops during STW weak roots Message-ID: <39987ea8921745992ec89a80c890d52606925877.camel@redhat.com> MemberNameLeak test is failing when running in STW mode, because ResolvedMethodTable never receives a gc_notification() call, which is usually done by calling report_num_dead() on the corresponding oop- storage. We do that in concurrent-weak-processing, but apparently not during STW weak processing. Bug: https://bugs.openjdk.java.net/browse/JDK-8249877 Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249877/webrev.00/ Testing: hotspot_gc_shenandoah, MemberNameLeak.java, tier1 & tier2 without regressions. Ok? From zgu at redhat.com Wed Jul 22 13:15:41 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 22 Jul 2020 09:15:41 -0400 Subject: RFR: 8249877: Shenandoah: Report number of dead weak oops during STW weak roots In-Reply-To: <39987ea8921745992ec89a80c890d52606925877.camel@redhat.com> References: <39987ea8921745992ec89a80c890d52606925877.camel@redhat.com> Message-ID: Looks good. Thanks, -Zhengyu On 7/22/20 8:55 AM, Roman Kennke wrote: > MemberNameLeak test is failing when running in STW mode, because > ResolvedMethodTable never receives a gc_notification() call, which is > usually done by calling report_num_dead() on the corresponding oop- > storage. We do that in concurrent-weak-processing, but apparently not > during STW weak processing. > > Bug: > https://bugs.openjdk.java.net/browse/JDK-8249877 > Webrev: > http://cr.openjdk.java.net/~rkennke/JDK-8249877/webrev.00/ > > Testing: hotspot_gc_shenandoah, MemberNameLeak.java, tier1 & tier2 > without regressions. > > Ok? > > > From zgu at redhat.com Wed Jul 22 14:07:49 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 22 Jul 2020 10:07:49 -0400 Subject: RFR: 8249877: Shenandoah: Report number of dead weak oops during STW weak roots In-Reply-To: References: <39987ea8921745992ec89a80c890d52606925877.camel@redhat.com> Message-ID: <71437e96-013b-ff99-c2db-40733446d17f@redhat.com> Second thought, you may just want to call _weak_processing_task.report_num_dead() in ShenandoahParallelWeakRootsCleaningTask's destructor. Thanks, -Zhengyu On 7/22/20 9:15 AM, Zhengyu Gu wrote: > Looks good. > > Thanks, > > -Zhengyu > > On 7/22/20 8:55 AM, Roman Kennke wrote: >> MemberNameLeak test is failing when running in STW mode, because >> ResolvedMethodTable never receives a gc_notification() call, which is >> usually done by calling report_num_dead() on the corresponding oop- >> storage. We do that in concurrent-weak-processing, but apparently not >> during STW weak processing. >> >> Bug: >> https://bugs.openjdk.java.net/browse/JDK-8249877 >> Webrev: >> http://cr.openjdk.java.net/~rkennke/JDK-8249877/webrev.00/ >> >> Testing: hotspot_gc_shenandoah, MemberNameLeak.java, tier1 & tier2 >> without regressions. >> >> Ok? >> >> >> From rkennke at redhat.com Wed Jul 22 14:49:37 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 22 Jul 2020 16:49:37 +0200 Subject: RFR: 8249877: Shenandoah: Report number of dead weak oops during STW weak roots In-Reply-To: <71437e96-013b-ff99-c2db-40733446d17f@redhat.com> References: <39987ea8921745992ec89a80c890d52606925877.camel@redhat.com> <71437e96-013b-ff99-c2db-40733446d17f@redhat.com> Message-ID: <50137546d4bea2112f18cbe996a1e0c82b7ed2d6.camel@redhat.com> On Wed, 2020-07-22 at 10:07 -0400, Zhengyu Gu wrote: > Second thought, you may just want to call > _weak_processing_task.report_num_dead() in > ShenandoahParallelWeakRootsCleaningTask's destructor. > Right, that would be cleaner. I already pushed the change, I filed a new one for this: https://bugs.openjdk.java.net/browse/JDK-8249884 Thanks, Roman > Thanks, > > -Zhengyu > > > > On 7/22/20 9:15 AM, Zhengyu Gu wrote: > > Looks good. > > > > Thanks, > > > > -Zhengyu > > > > On 7/22/20 8:55 AM, Roman Kennke wrote: > > > MemberNameLeak test is failing when running in STW mode, because > > > ResolvedMethodTable never receives a gc_notification() call, > > > which is > > > usually done by calling report_num_dead() on the corresponding > > > oop- > > > storage. We do that in concurrent-weak-processing, but apparently > > > not > > > during STW weak processing. > > > > > > Bug: > > > https://bugs.openjdk.java.net/browse/JDK-8249877 > > > Webrev: > > > http://cr.openjdk.java.net/~rkennke/JDK-8249877/webrev.00/ > > > > > > Testing: hotspot_gc_shenandoah, MemberNameLeak.java, tier1 & > > > tier2 > > > without regressions. > > > > > > Ok? > > > > > > > > > From rkennke at redhat.com Wed Jul 22 14:55:21 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 22 Jul 2020 16:55:21 +0200 Subject: RFR: 8249884: Shenandoah: Call report_num_dead() from ShParallelWeakRootsCleaningTask destructor Message-ID: <6f0dfef42ae0df93ae06e920cc1d292fc3bcce1e.camel@redhat.com> JDK-8249877 added a call to report_num_dead() after each call to ShParallelWeakRotsCleaningTask usage. It is cleaner to simply call it from the destructor of that task. Bug: https://bugs.openjdk.java.net/browse/JDK-8249884 Webrev: http://cr.openjdk.java.net/~rkennke/JDK-8249884/webrev.00/ Testing: hotspot_gc_shenandoah, MemberNameLeak.java Ok? Thanks, Roman From zgu at redhat.com Wed Jul 22 15:00:56 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 22 Jul 2020 11:00:56 -0400 Subject: RFR: 8249884: Shenandoah: Call report_num_dead() from ShParallelWeakRootsCleaningTask destructor In-Reply-To: <6f0dfef42ae0df93ae06e920cc1d292fc3bcce1e.camel@redhat.com> References: <6f0dfef42ae0df93ae06e920cc1d292fc3bcce1e.camel@redhat.com> Message-ID: Good! Thanks, -Zhengyu On 7/22/20 10:55 AM, Roman Kennke wrote: > JDK-8249877 added a call to report_num_dead() after each call to > ShParallelWeakRotsCleaningTask usage. It is cleaner to simply call it > from the destructor of that task. > > Bug: > https://bugs.openjdk.java.net/browse/JDK-8249884 > Webrev: > http://cr.openjdk.java.net/~rkennke/JDK-8249884/webrev.00/ > > Testing: hotspot_gc_shenandoah, MemberNameLeak.java > > Ok? > > Thanks, > Roman > From mathiske at amazon.com Wed Jul 22 17:58:18 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Wed, 22 Jul 2020 17:58:18 +0000 Subject: First cut at a card table for Shenandoah In-Reply-To: <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> Message-ID: <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> That limiting card table updates to part of the heap helps seems doubtful to me now: 1. I could not find code that does this for Parallel or CMS. Correct me if I got this wrong! 2. The young gen detection technique I did see in G1 was to read from the card table and compare the value there to a young gen indicator value. Very similarly, conditional marking reads the value and compares it to the dirty card value. So I ran the benchmarks with -XX:+UseCondCardMark and dirtied the whole card table to begin with. Only minimal improvement. ?On 7/16/20, 10:40 AM, "Roman Kennke" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Instead of simulating, you could probably use my earlier generations- prototype? http://cr.openjdk.java.net/~rkennke/generation.patch Not sure if that easily fits, though, because it's dynamically shuffling young and old regions, instead of having a fixed boundary. Might be worth though. I haven't gotten around to try your stuff yet, but will do so soon! Thanks, Roman > That there is too much tracking is quite possible. I'll figure out a > way to simulate a young generation. I'll also switch those individual > barrier parts on one-by-one to see which one does the most damage. > (Benchmarks that cratered: derby, mmpegaudio, scimark, serial. > "compress" was fine.) > > Bernd > > On 7/16/20, 1:17 AM, "Roman Kennke" wrote: > > CAUTION: This email originated from outside of the organization. > Do not click links or open attachments unless you can confirm the > sender and know the content is safe. > > > > Hi Bernd, > > could the performance drop come from the fact that we have no > generations yet, and thus the barrier would track *all* stores > (all the > time) rather than only old->young stores? > > I'll give the patch a spin soon! > > Thank you! > Roman > > > On Thu, 2020-07-16 at 00:20 +0000, Mathiske, Bernd wrote: > > Just having edited some card table operations into Shenandoah > GC, > > I have uploaded a webrev for perusal: > > http://cr.openjdk.java.net/~bmathiske/cardshen/webrev.00/ > > > > The general idea of this patch is to mark some cards on the > side as > > if we needed a generational remembered set barrier, without > changing > > how Shenandoah works otherwise. Then we should be able to > measure how > > much mutator overhead this sort of extra barrier might cause. > My > > expected result was "similar to CMS or Parallel compared to > Epsilon", > > but that's not what I am seeing in first attempts to run > SPECjvm2008. > > Some of those benchmarks go way south, 5x and more. So I guess > I made > > a mistake somewhere, but here it is anyway, so you can see the > my > > approach, which is reparenting barrier classes to shared card > table > > barrier classes and then hoping that everything falls into > place. ( > > The array copying barrier for C1/C2 is switched off for now. > > > > For now, this patch is based on 11.0.7. I will rebase this to > the > > latest soon. > > > > > > From mathiske at amazon.com Wed Jul 22 20:31:05 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Wed, 22 Jul 2020 20:31:05 +0000 Subject: First cut at a card table for Shenandoah In-Reply-To: <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> Message-ID: <8E19A3E7-4A1C-4D96-8599-1192C7A768C5@amazon.com> The async_profiler just told me that C2Compiler::compile_method is running ~60% of total runtime if my C2 card table code inserted. I'll look further into that. ?On 7/22/20, 10:58 AM, "Mathiske, Bernd" wrote: That limiting card table updates to part of the heap helps seems doubtful to me now: 1. I could not find code that does this for Parallel or CMS. Correct me if I got this wrong! 2. The young gen detection technique I did see in G1 was to read from the card table and compare the value there to a young gen indicator value. Very similarly, conditional marking reads the value and compares it to the dirty card value. So I ran the benchmarks with -XX:+UseCondCardMark and dirtied the whole card table to begin with. Only minimal improvement. On 7/16/20, 10:40 AM, "Roman Kennke" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Instead of simulating, you could probably use my earlier generations- prototype? http://cr.openjdk.java.net/~rkennke/generation.patch Not sure if that easily fits, though, because it's dynamically shuffling young and old regions, instead of having a fixed boundary. Might be worth though. I haven't gotten around to try your stuff yet, but will do so soon! Thanks, Roman > That there is too much tracking is quite possible. I'll figure out a > way to simulate a young generation. I'll also switch those individual > barrier parts on one-by-one to see which one does the most damage. > (Benchmarks that cratered: derby, mmpegaudio, scimark, serial. > "compress" was fine.) > > Bernd > > On 7/16/20, 1:17 AM, "Roman Kennke" wrote: > > CAUTION: This email originated from outside of the organization. > Do not click links or open attachments unless you can confirm the > sender and know the content is safe. > > > > Hi Bernd, > > could the performance drop come from the fact that we have no > generations yet, and thus the barrier would track *all* stores > (all the > time) rather than only old->young stores? > > I'll give the patch a spin soon! > > Thank you! > Roman > > > On Thu, 2020-07-16 at 00:20 +0000, Mathiske, Bernd wrote: > > Just having edited some card table operations into Shenandoah > GC, > > I have uploaded a webrev for perusal: > > http://cr.openjdk.java.net/~bmathiske/cardshen/webrev.00/ > > > > The general idea of this patch is to mark some cards on the > side as > > if we needed a generational remembered set barrier, without > changing > > how Shenandoah works otherwise. Then we should be able to > measure how > > much mutator overhead this sort of extra barrier might cause. > My > > expected result was "similar to CMS or Parallel compared to > Epsilon", > > but that's not what I am seeing in first attempts to run > SPECjvm2008. > > Some of those benchmarks go way south, 5x and more. So I guess > I made > > a mistake somewhere, but here it is anyway, so you can see the > my > > approach, which is reparenting barrier classes to shared card > table > > barrier classes and then hoping that everything falls into > place. ( > > The array copying barrier for C1/C2 is switched off for now. > > > > For now, this patch is based on 11.0.7. I will rebase this to > the > > latest soon. > > > > > > From rkennke at redhat.com Wed Jul 22 20:59:15 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 22 Jul 2020 22:59:15 +0200 Subject: First cut at a card table for Shenandoah In-Reply-To: <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> Message-ID: <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> I am not very familiar with all this stuff. You should check if the C2 optimizations for card-table-barriers kick in. IIRC, there was something that elides those barriers on stores into new objects altogether, which make up the majority of stores. Roman On Wed, 2020-07-22 at 17:58 +0000, Mathiske, Bernd wrote: > That limiting card table updates to part of the heap helps seems > doubtful to me now: > 1. I could not find code that does this for Parallel or CMS. Correct > me if I got this wrong! > 2. The young gen detection technique I did see in G1 was to read from > the card table and compare the value there to a young gen indicator > value. Very similarly, conditional marking reads the value and > compares it to the dirty card value. So I ran the benchmarks with > -XX:+UseCondCardMark and dirtied the whole card table to begin with. > Only minimal improvement. > > ?On 7/16/20, 10:40 AM, "Roman Kennke" wrote: > > CAUTION: This email originated from outside of the organization. > Do not click links or open attachments unless you can confirm the > sender and know the content is safe. > > > > Instead of simulating, you could probably use my earlier > generations- > prototype? > > http://cr.openjdk.java.net/~rkennke/generation.patch > > Not sure if that easily fits, though, because it's dynamically > shuffling young and old regions, instead of having a fixed > boundary. > Might be worth though. > > I haven't gotten around to try your stuff yet, but will do so > soon! > > Thanks, > Roman > > > > That there is too much tracking is quite possible. I'll figure > out a > > way to simulate a young generation. I'll also switch those > individual > > barrier parts on one-by-one to see which one does the most > damage. > > (Benchmarks that cratered: derby, mmpegaudio, scimark, serial. > > "compress" was fine.) > > > > Bernd > > > > On 7/16/20, 1:17 AM, "Roman Kennke" wrote: > > > > CAUTION: This email originated from outside of the > organization. > > Do not click links or open attachments unless you can confirm > the > > sender and know the content is safe. > > > > > > > > Hi Bernd, > > > > could the performance drop come from the fact that we have > no > > generations yet, and thus the barrier would track *all* > stores > > (all the > > time) rather than only old->young stores? > > > > I'll give the patch a spin soon! > > > > Thank you! > > Roman > > > > > > On Thu, 2020-07-16 at 00:20 +0000, Mathiske, Bernd wrote: > > > Just having edited some card table operations into > Shenandoah > > GC, > > > I have uploaded a webrev for perusal: > > > http://cr.openjdk.java.net/~bmathiske/cardshen/webrev.00/ > > > > > > The general idea of this patch is to mark some cards on > the > > side as > > > if we needed a generational remembered set barrier, > without > > changing > > > how Shenandoah works otherwise. Then we should be able to > > measure how > > > much mutator overhead this sort of extra barrier might > cause. > > My > > > expected result was "similar to CMS or Parallel compared > to > > Epsilon", > > > but that's not what I am seeing in first attempts to run > > SPECjvm2008. > > > Some of those benchmarks go way south, 5x and more. So I > guess > > I made > > > a mistake somewhere, but here it is anyway, so you can > see the > > my > > > approach, which is reparenting barrier classes to shared > card > > table > > > barrier classes and then hoping that everything falls > into > > place. ( > > > The array copying barrier for C1/C2 is switched off for > now. > > > > > > For now, this patch is based on 11.0.7. I will rebase > this to > > the > > > latest soon. > > > > > > > > > > > > From adityam at microsoft.com Wed Jul 22 21:05:05 2020 From: adityam at microsoft.com (Aditya Mandaleeka) Date: Wed, 22 Jul 2020 21:05:05 +0000 Subject: First cut at a card table for Shenandoah In-Reply-To: <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> Message-ID: FWIW, I looked at this briefly when Bernd sent the original email and couldn't find any place where CMS/Parallel are taking fast-outs in card marking for non-young-gen refs either. -Aditya -----Original Message----- From: shenandoah-dev On Behalf Of Roman Kennke Sent: Wednesday, July 22, 2020 1:59 PM To: Mathiske, Bernd Cc: shenandoah-dev Subject: Re: First cut at a card table for Shenandoah I am not very familiar with all this stuff. You should check if the C2 optimizations for card-table-barriers kick in. IIRC, there was something that elides those barriers on stores into new objects altogether, which make up the majority of stores. Roman On Wed, 2020-07-22 at 17:58 +0000, Mathiske, Bernd wrote: > That limiting card table updates to part of the heap helps seems > doubtful to me now: > 1. I could not find code that does this for Parallel or CMS. Correct > me if I got this wrong! > 2. The young gen detection technique I did see in G1 was to read from > the card table and compare the value there to a young gen indicator > value. Very similarly, conditional marking reads the value and > compares it to the dirty card value. So I ran the benchmarks with > -XX:+UseCondCardMark and dirtied the whole card table to begin with. > Only minimal improvement. > > ?On 7/16/20, 10:40 AM, "Roman Kennke" wrote: > > CAUTION: This email originated from outside of the organization. > Do not click links or open attachments unless you can confirm the > sender and know the content is safe. > > > > Instead of simulating, you could probably use my earlier > generations- > prototype? > > https://nam06.safelinks.protection.outlook.com/?url=http:%2F%2Fcr.openjdk.java.net%2F~rkennke%2Fgeneration.patch&data=02%7C01%7Cadityam%40microsoft.com%7C8513d6ee8bf4457ba7c108d82e826ea1%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C637310485034589775&sdata=h18f%2BvPaKXHmOsSBz%2FEqXWmvzU%2FxDRhclMnJldRkr%2Bo%3D&reserved=0 > > Not sure if that easily fits, though, because it's dynamically > shuffling young and old regions, instead of having a fixed > boundary. > Might be worth though. > > I haven't gotten around to try your stuff yet, but will do so > soon! > > Thanks, > Roman > > > > That there is too much tracking is quite possible. I'll figure > out a > > way to simulate a young generation. I'll also switch those > individual > > barrier parts on one-by-one to see which one does the most > damage. > > (Benchmarks that cratered: derby, mmpegaudio, scimark, serial. > > "compress" was fine.) > > > > Bernd > > > > On 7/16/20, 1:17 AM, "Roman Kennke" wrote: > > > > CAUTION: This email originated from outside of the > organization. > > Do not click links or open attachments unless you can confirm > the > > sender and know the content is safe. > > > > > > > > Hi Bernd, > > > > could the performance drop come from the fact that we have > no > > generations yet, and thus the barrier would track *all* > stores > > (all the > > time) rather than only old->young stores? > > > > I'll give the patch a spin soon! > > > > Thank you! > > Roman > > > > > > On Thu, 2020-07-16 at 00:20 +0000, Mathiske, Bernd wrote: > > > Just having edited some card table operations into > Shenandoah > > GC, > > > I have uploaded a webrev for perusal: > > > https://nam06.safelinks.protection.outlook.com/?url=http:%2F%2Fcr.openjdk.java.net%2F~bmathiske%2Fcardshen%2Fwebrev.00%2F&data=02%7C01%7Cadityam%40microsoft.com%7C8513d6ee8bf4457ba7c108d82e826ea1%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C637310485034589775&sdata=VjLl%2F7FLsdVMZ13ajtAhgzF%2Fz%2Bf%2FsGTXgXtaMZJSheQ%3D&reserved=0 > > > > > > The general idea of this patch is to mark some cards on > the > > side as > > > if we needed a generational remembered set barrier, > without > > changing > > > how Shenandoah works otherwise. Then we should be able to > > measure how > > > much mutator overhead this sort of extra barrier might > cause. > > My > > > expected result was "similar to CMS or Parallel compared > to > > Epsilon", > > > but that's not what I am seeing in first attempts to run > > SPECjvm2008. > > > Some of those benchmarks go way south, 5x and more. So I > guess > > I made > > > a mistake somewhere, but here it is anyway, so you can > see the > > my > > > approach, which is reparenting barrier classes to shared > card > > table > > > barrier classes and then hoping that everything falls > into > > place. ( > > > The array copying barrier for C1/C2 is switched off for > now. > > > > > > For now, this patch is based on 11.0.7. I will rebase > this to > > the > > > latest soon. > > > > > > > > > > > > From shade at redhat.com Thu Jul 23 09:38:45 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Thu, 23 Jul 2020 11:38:45 +0200 Subject: [8u] RFR: Shenandoah: fix forceful pacer claim Message-ID: <0e18acba-8b0d-e3c7-6a52-6f5ae4b149ee@redhat.com> Hey, Looks like we have a little day-1 bug in sh/jdk8. Compare: 8u: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/file/e276bbfff22f/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp#l216 head: https://hg.openjdk.java.net/jdk/jdk/file/6c6087cff3bb/src/hotspot/share/gc/shenandoah/shenandoahPacer.cpp#l209 So on the pacer blocking path, we do not forcefully claim the pacing budget, which means pacer allows more slack than it should. I believe this was the 8u backport where I added "force" parameter, but omitted actually using it: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/5114410c2684#l6.18 This went largely unnoticed, because current 8u code does not check the result of claim_for_alloc. But it does so with JDK-8247367 backport and it fails the assert: https://hg.openjdk.java.net/jdk/jdk/rev/a39eb5a4f1c1#l1.28 sh/jdk8 fix is to match jdk/jdk: diff -r 0067bf851998 src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp Wed Jun 17 09:43:16 2020 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp Thu Jul 23 11:08:17 2020 +0200 @@ -216,7 +216,7 @@ intptr_t new_val = 0; do { cur = OrderAccess::load_acquire(&_budget); - if (cur < tax) { + if (cur < tax && !force) { // Progress depleted, alas. return false; } Testing: hotspot_gc_shenandoah {fastdebug,release} -- Thanks, -Aleksey From rkennke at redhat.com Thu Jul 23 10:11:23 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 23 Jul 2020 12:11:23 +0200 Subject: [8u] RFR: Shenandoah: fix forceful pacer claim In-Reply-To: <0e18acba-8b0d-e3c7-6a52-6f5ae4b149ee@redhat.com> References: <0e18acba-8b0d-e3c7-6a52-6f5ae4b149ee@redhat.com> Message-ID: Ok. Good find! Thanks, Roman On Thu, 2020-07-23 at 11:38 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > Hey, > > Looks like we have a little day-1 bug in sh/jdk8. > > Compare: > 8u: > https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/file/e276bbfff22f/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp#l216 > head: > https://hg.openjdk.java.net/jdk/jdk/file/6c6087cff3bb/src/hotspot/share/gc/shenandoah/shenandoahPacer.cpp#l209 > > So on the pacer blocking path, we do not forcefully claim the pacing > budget, which means pacer > allows more slack than it should. I believe this was the 8u backport > where I added "force" > parameter, but omitted actually using it: > > https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/5114410c2684#l6.18 > > This went largely unnoticed, because current 8u code does not check > the result of claim_for_alloc. > But it does so with JDK-8247367 backport and it fails the assert: > https://hg.openjdk.java.net/jdk/jdk/rev/a39eb5a4f1c1#l1.28 > > sh/jdk8 fix is to match jdk/jdk: > > diff -r 0067bf851998 > src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp > --- > a/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp W > ed Jun 17 09:43:16 2020 +0200 > +++ > b/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp T > hu Jul 23 11:08:17 2020 +0200 > @@ -216,7 +216,7 @@ > intptr_t new_val = 0; > do { > cur = OrderAccess::load_acquire(&_budget); > - if (cur < tax) { > + if (cur < tax && !force) { > // Progress depleted, alas. > return false; > } > > Testing: hotspot_gc_shenandoah {fastdebug,release} > From shade at redhat.com Thu Jul 23 10:15:31 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Thu, 23 Jul 2020 12:15:31 +0200 Subject: [8u] RFR: Shenandoah: fix forceful pacer claim In-Reply-To: References: <0e18acba-8b0d-e3c7-6a52-6f5ae4b149ee@redhat.com> Message-ID: <4f570d3a-3593-ce32-a95c-78cedef027ad@redhat.com> On 7/23/20 12:11 PM, Roman Kennke wrote: > Ok. Good find! Thanks, pushed. -- -Aleksey From shade at redhat.com Thu Jul 23 10:12:38 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Thu, 23 Jul 2020 12:12:38 +0200 Subject: RFR (S) 8249953: Shenandoah: gc/shenandoah/mxbeans tests should account for corner cases Message-ID: Testbug: https://bugs.openjdk.java.net/browse/JDK-8249953 Fix: https://cr.openjdk.java.net/~shade/8249953/webrev.01/ Testing: hotspot_gc_shenandoah {fastdebug,release} -- Thanks, -Aleksey From thomas.schatzl at oracle.com Thu Jul 23 10:31:22 2020 From: thomas.schatzl at oracle.com (Thomas Schatzl) Date: Thu, 23 Jul 2020 12:31:22 +0200 Subject: First cut at a card table for Shenandoah In-Reply-To: <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> Message-ID: <6d712fa7-f4c0-338a-6130-70cdcd9b29ee@oracle.com> Hi, On 22.07.20 22:59, Roman Kennke wrote: > I am not very familiar with all this stuff. > > You should check if the C2 optimizations for card-table-barriers kick > in. IIRC, there was something that elides those barriers on stores into > new objects altogether, which make up the majority of stores. > if you are talking about eliding write barriers for new objects because they are "always" allocated in young gen, and no generational collector is interested in young->old references, there is no such thing afaik. No collector guarantees this "always" property: e.g. CMS may directly decide to put new objects into old gen for a few reasons, and for parallel (and g1) it e.g. can happen that a gc right after allocating that object (when e.g. transitioning from native slow-path code) will move that object into old gen. Or simply when the object is large. See e.g. https://bugs.openjdk.java.net/browse/JDK-8191342 That would still require the compiler to only apply that optimization if it can prove that the object is "small enough" to fit into young gen in any case (it is probably easy to get conservative enough values for that from somewhere). Thanks, Thomas From rkennke at redhat.com Thu Jul 23 10:25:07 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 23 Jul 2020 12:25:07 +0200 Subject: RFR (S) 8249953: Shenandoah: gc/shenandoah/mxbeans tests should account for corner cases In-Reply-To: References: Message-ID: <410d99ffb769fb65c8e42e8f173a2a89bf38b0e4.camel@redhat.com> Ok. Thanks, Roman On Thu, 2020-07-23 at 12:12 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > Testbug: > https://bugs.openjdk.java.net/browse/JDK-8249953 > > Fix: > https://cr.openjdk.java.net/~shade/8249953/webrev.01/ > > Testing: hotspot_gc_shenandoah {fastdebug,release} > From rkennke at redhat.com Thu Jul 23 11:31:05 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 23 Jul 2020 13:31:05 +0200 Subject: First cut at a card table for Shenandoah In-Reply-To: <6d712fa7-f4c0-338a-6130-70cdcd9b29ee@oracle.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> <6d712fa7-f4c0-338a-6130-70cdcd9b29ee@oracle.com> Message-ID: <73bdc00ad0501e82660e9b8bcd10fe7189ffd504.camel@redhat.com> On Thu, 2020-07-23 at 12:31 +0200, Thomas Schatzl wrote: > Hi, > > On 22.07.20 22:59, Roman Kennke wrote: > > I am not very familiar with all this stuff. > > > > You should check if the C2 optimizations for card-table-barriers > > kick > > in. IIRC, there was something that elides those barriers on stores > > into > > new objects altogether, which make up the majority of stores. > > > > if you are talking about eliding write barriers for new objects > because they are "always" allocated in young gen, and no > generational > collector is interested in young->old references, there is no such > thing > afaik. > > No collector guarantees this "always" property: e.g. CMS may > directly > decide to put new objects into old gen for a few reasons, and for > parallel (and g1) it e.g. can happen that a gc right after > allocating > that object (when e.g. transitioning from native slow-path code) > will > move that object into old gen. Or simply when the object is large. > > See e.g. https://bugs.openjdk.java.net/browse/JDK-8191342 > > That would still require the compiler to only apply that optimization > if > it can prove that the object is "small enough" to fit into young gen > in > any case (it is probably easy to get conservative enough values for > that > from somewhere). > Thanks Thomas for clarification! :-) Roman From shade at redhat.com Fri Jul 24 07:03:47 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 24 Jul 2020 09:03:47 +0200 Subject: [8u] 2020-07-24, Bulk backports to sh/jdk8 Message-ID: <741c8118-0645-5d3e-282e-dde18a639388@redhat.com> https://cr.openjdk.java.net/~shade/shenandoah/backports/jdk8u-20200724/webrev.01/ This backports a bunch of improvements to sh/jdk8: [backport] 8249953: Shenandoah: gc/shenandoah/mxbeans tests should account for corner cases [backport] 8248652: Shenandoah: SATB buffer handling may assume no forwarded objects [backport] 8247367: Shenandoah: pacer should wait on lock instead of exponential backoff [backport] 8247593: Shenandoah: should not block pacing reporters [backport] 8249649: Shenandoah: provide per-cycle pacing stats Testing: hotspot_gc_shenandoah {fastdebug,release} -- Thanks, -Aleksey From rkennke at redhat.com Fri Jul 24 09:18:49 2020 From: rkennke at redhat.com (Roman Kennke) Date: Fri, 24 Jul 2020 11:18:49 +0200 Subject: [8u] 2020-07-24, Bulk backports to sh/jdk8 In-Reply-To: <741c8118-0645-5d3e-282e-dde18a639388@redhat.com> References: <741c8118-0645-5d3e-282e-dde18a639388@redhat.com> Message-ID: <56b26f1db0bafda6e602b5187713373d49cc94f5.camel@redhat.com> Yup, looks good! Thanks you! Roman On Fri, 2020-07-24 at 09:03 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > https://cr.openjdk.java.net/~shade/shenandoah/backports/jdk8u-20200724/webrev.01/ > > This backports a bunch of improvements to sh/jdk8: > [backport] 8249953: Shenandoah: gc/shenandoah/mxbeans tests should > account for corner cases > [backport] 8248652: Shenandoah: SATB buffer handling may assume no > forwarded objects > [backport] 8247367: Shenandoah: pacer should wait on lock instead of > exponential backoff > [backport] 8247593: Shenandoah: should not block pacing reporters > [backport] 8249649: Shenandoah: provide per-cycle pacing stats > > Testing: hotspot_gc_shenandoah {fastdebug,release} > From zgu at redhat.com Sat Jul 25 23:13:51 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Sat, 25 Jul 2020 19:13:51 -0400 Subject: [15] RFR 8250588: Shenandoah: LRB needs to save/restore fp registers for runtime call Message-ID: <6610a96b-1bc8-16b3-ed61-cef5488b7f84@redhat.com> We recently experienced some test failures with VarHandleTestMethodHandleAccessDouble.java and VarHandleTestMethodHandleAccessfloat.java, where appeared to have corrupted fp registers. It turns out that we have to save fp registers in LRB when making runtime calls. Bug: https://bugs.openjdk.java.net/browse/JDK-8250588 Weberev: http://cr.openjdk.java.net/~zgu/JDK-8250588/webrev.00/ Test: hotspot_gc_shenandoah tier1 with Shenandoah Thanks, -Zhengyu From shade at redhat.com Sun Jul 26 06:54:20 2020 From: shade at redhat.com (shade at redhat.com) Date: Sun, 26 Jul 2020 06:54:20 +0000 Subject: hg: shenandoah/jdk8/hotspot: 5 new changesets Message-ID: <202007260654.06Q6sKm2027223@aojmv0008.oracle.com> Changeset: 2047aa418aa8 Author: shade Date: 2020-07-23 12:46 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/2047aa418aa8 [backport] 8249953: Shenandoah: gc/shenandoah/mxbeans tests should account for corner cases Reviewed-by: rkennke ! test/gc/shenandoah/mxbeans/TestChurnNotifications.java ! test/gc/shenandoah/mxbeans/TestPauseNotifications.java Changeset: a9d5e574e818 Author: shade Date: 2020-07-10 10:37 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/a9d5e574e818 [backport] 8248652: Shenandoah: SATB buffer handling may assume no forwarded objects Reviewed-by: rkennke ! src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Changeset: 118e09aa9462 Author: shade Date: 2020-06-11 18:16 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/118e09aa9462 [backport] 8247367: Shenandoah: pacer should wait on lock instead of exponential backoff Reviewed-by: zgu ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.inline.hpp Changeset: 226c8031111b Author: shade Date: 2020-06-17 09:43 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/226c8031111b [backport] 8247593: Shenandoah: should not block pacing reporters Reviewed-by: rkennke ! src/share/vm/gc_implementation/shenandoah/shenandoahControlThread.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahControlThread.hpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.hpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.inline.hpp Changeset: 1254144cf226 Author: shade Date: 2020-07-19 15:34 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/1254144cf226 [backport] 8249649: Shenandoah: provide per-cycle pacing stats Reviewed-by: rkennke ! src/share/vm/gc_implementation/shenandoah/shenandoahControlThread.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahNumberSeq.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahNumberSeq.hpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.hpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPhaseTimings.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPhaseTimings.hpp ! src/share/vm/runtime/thread.cpp ! src/share/vm/runtime/thread.hpp From rkennke at redhat.com Sun Jul 26 08:27:23 2020 From: rkennke at redhat.com (Roman Kennke) Date: Sun, 26 Jul 2020 10:27:23 +0200 Subject: [15] RFR 8250588: Shenandoah: LRB needs to save/restore fp registers for runtime call In-Reply-To: <6610a96b-1bc8-16b3-ed61-cef5488b7f84@redhat.com> References: <6610a96b-1bc8-16b3-ed61-cef5488b7f84@redhat.com> Message-ID: Hi Zhengyu, the patch looks good to me! Thank you, Roman Zhengyu Gu schrieb am So., 26. Juli 2020, 01:15: > We recently experienced some test failures with > VarHandleTestMethodHandleAccessDouble.java and > VarHandleTestMethodHandleAccessfloat.java, where appeared to have > corrupted fp registers. > > It turns out that we have to save fp registers in LRB when making > runtime calls. > > > Bug: https://bugs.openjdk.java.net/browse/JDK-8250588 > Weberev: http://cr.openjdk.java.net/~zgu/JDK-8250588/webrev.00/ > > Test: > hotspot_gc_shenandoah > tier1 with Shenandoah > > > Thanks, > > -Zhengyu > > From mathiske at amazon.com Mon Jul 27 17:48:43 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Mon, 27 Jul 2020 17:48:43 +0000 Subject: First cut at a card table for Shenandoah In-Reply-To: <73bdc00ad0501e82660e9b8bcd10fe7189ffd504.camel@redhat.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> <6d712fa7-f4c0-338a-6130-70cdcd9b29ee@oracle.com> <73bdc00ad0501e82660e9b8bcd10fe7189ffd504.camel@redhat.com> Message-ID: <2F5DBD58-9C78-4253-88EA-0CA092F42C9D@amazon.com> Aditya, Thomas, Roman, Thank you for providing these hints, which were helpful to rule out possible root causes! Looking at all this and at some initial profiling results, Volker Simonis suggested that -XX:-EliminateAllocations might help. And it does! When I use this flag, performance is "back to normal" in the short benchmark runs I have conducted so far. I'll run some more extensive tests, with repetitions, and report some numbers, soon. Bernd ?On 7/23/20, 4:32 AM, "shenandoah-dev on behalf of Roman Kennke" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. On Thu, 2020-07-23 at 12:31 +0200, Thomas Schatzl wrote: > Hi, > > On 22.07.20 22:59, Roman Kennke wrote: > > I am not very familiar with all this stuff. > > > > You should check if the C2 optimizations for card-table-barriers > > kick > > in. IIRC, there was something that elides those barriers on stores > > into > > new objects altogether, which make up the majority of stores. > > > > if you are talking about eliding write barriers for new objects > because they are "always" allocated in young gen, and no > generational > collector is interested in young->old references, there is no such > thing > afaik. > > No collector guarantees this "always" property: e.g. CMS may > directly > decide to put new objects into old gen for a few reasons, and for > parallel (and g1) it e.g. can happen that a gc right after > allocating > that object (when e.g. transitioning from native slow-path code) > will > move that object into old gen. Or simply when the object is large. > > See e.g. https://bugs.openjdk.java.net/browse/JDK-8191342 > > That would still require the compiler to only apply that optimization > if > it can prove that the object is "small enough" to fit into young gen > in > any case (it is probably easy to get conservative enough values for > that > from somewhere). > Thanks Thomas for clarification! :-) Roman From Charlie.Gracie at microsoft.com Mon Jul 27 21:57:07 2020 From: Charlie.Gracie at microsoft.com (Charlie Gracie) Date: Mon, 27 Jul 2020 21:57:07 +0000 Subject: First cut at a card table for Shenandoah In-Reply-To: <2F5DBD58-9C78-4253-88EA-0CA092F42C9D@amazon.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> <6d712fa7-f4c0-338a-6130-70cdcd9b29ee@oracle.com> <73bdc00ad0501e82660e9b8bcd10fe7189ffd504.camel@redhat.com> <2F5DBD58-9C78-4253-88EA-0CA092F42C9D@amazon.com> Message-ID: <36C4CD7A-180C-450C-906B-59B2FF4ACF81@microsoft.com> Hi Bernd, I applied your patch locally to play around and with a release build I was getting some wild performance results which were not consistent from one run to the next. When I ran with a fastdebug build I get this assertion 100% of the time running some DeCapo benchmarks: # Internal Error (../../src/hotspot/share/opto/node.cpp:268), pid=28283, tid=16131 # assert((int)num_edges > 0) failed: need non-zero edge count for loop progress When I ran with -XX:-EliminateAllocations the assertion went away and as you mentioned performance stabilized. Looking at your code changes I noticed you made ShenandoahBarrierSetC2 a subclass of CardTableBarrierSetC2. When an object is scalar replaced (-XX:+EliminateAllocations) the GC barriers that happen directly on the object are removed by the `eliminate_gc_barrier` calls. ShenandoahBarrierSetC2 already had an implementation of `eliminate_gc_barrier` so the super class implementation in CardTableBarrierSetC2 is being missed. I modified the Shenandoah impl as follows which resolved the performance and assertion issues for me. void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* n) const { if (is_shenandoah_wb_pre_call(n)) { shenandoah_eliminate_wb_pre(n, ¯o->igvn()); } if (n->Opcode() == Op_CastP2X) { CardTableBarrierSetC2::eliminate_gc_barrier(macro, n); } } I believe a few other APIs would need to also check with the super class implementation but for my runs to complete successfully this was the only change I needed to make. Cheers, Charlie Gracie ?On 2020-07-27, 2:02 PM, "shenandoah-dev on behalf of Mathiske, Bernd" wrote: Aditya, Thomas, Roman, Thank you for providing these hints, which were helpful to rule out possible root causes! Looking at all this and at some initial profiling results, Volker Simonis suggested that -XX:-EliminateAllocations might help. And it does! When I use this flag, performance is "back to normal" in the short benchmark runs I have conducted so far. I'll run some more extensive tests, with repetitions, and report some numbers, soon. Bernd On 7/23/20, 4:32 AM, "shenandoah-dev on behalf of Roman Kennke" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. On Thu, 2020-07-23 at 12:31 +0200, Thomas Schatzl wrote: > Hi, > > On 22.07.20 22:59, Roman Kennke wrote: > > I am not very familiar with all this stuff. > > > > You should check if the C2 optimizations for card-table-barriers > > kick > > in. IIRC, there was something that elides those barriers on stores > > into > > new objects altogether, which make up the majority of stores. > > > > if you are talking about eliding write barriers for new objects > because they are "always" allocated in young gen, and no > generational > collector is interested in young->old references, there is no such > thing > afaik. > > No collector guarantees this "always" property: e.g. CMS may > directly > decide to put new objects into old gen for a few reasons, and for > parallel (and g1) it e.g. can happen that a gc right after > allocating > that object (when e.g. transitioning from native slow-path code) > will > move that object into old gen. Or simply when the object is large. > > See e.g. https://bugs.openjdk.java.net/browse/JDK-8191342 > > That would still require the compiler to only apply that optimization > if > it can prove that the object is "small enough" to fit into young gen > in > any case (it is probably easy to get conservative enough values for > that > from somewhere). > Thanks Thomas for clarification! :-) Roman From mathiske at amazon.com Mon Jul 27 23:08:57 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Mon, 27 Jul 2020 23:08:57 +0000 Subject: First cut at a card table for Shenandoah In-Reply-To: <36C4CD7A-180C-450C-906B-59B2FF4ACF81@microsoft.com> References: <6c9bb10a84641ce87eb8c6db85bf2f6b588aaf14.camel@redhat.com> <3C143FB2-F38C-4C83-BE62-AC7943B0FF8F@amazon.com> <06280fd99f7fafb4c1c56affa868255715a7d557.camel@redhat.com> <3274834D-B6C9-495B-9679-A8C1426A2639@amazon.com> <479aff4233340940c2f36c63f470ab1d7423925d.camel@redhat.com> <6d712fa7-f4c0-338a-6130-70cdcd9b29ee@oracle.com> <73bdc00ad0501e82660e9b8bcd10fe7189ffd504.camel@redhat.com> <2F5DBD58-9C78-4253-88EA-0CA092F42C9D@amazon.com> <36C4CD7A-180C-450C-906B-59B2FF4ACF81@microsoft.com> Message-ID: <6F9E57D2-615E-49FA-81F8-5CEB3B8AF200@amazon.com> Charlie, This is highly appreciated. You pinpointed the mistake I made, not checking all facets of inheritance here. And yes, Op_CastP2X is implicated in the super class. Great progress. I'll check other inheritance avenues. Many thanks! Bernd ?On 7/27/20, 2:58 PM, "Charlie Gracie" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Hi Bernd, I applied your patch locally to play around and with a release build I was getting some wild performance results which were not consistent from one run to the next. When I ran with a fastdebug build I get this assertion 100% of the time running some DeCapo benchmarks: # Internal Error (../../src/hotspot/share/opto/node.cpp:268), pid=28283, tid=16131 # assert((int)num_edges > 0) failed: need non-zero edge count for loop progress When I ran with -XX:-EliminateAllocations the assertion went away and as you mentioned performance stabilized. Looking at your code changes I noticed you made ShenandoahBarrierSetC2 a subclass of CardTableBarrierSetC2. When an object is scalar replaced (-XX:+EliminateAllocations) the GC barriers that happen directly on the object are removed by the `eliminate_gc_barrier` calls. ShenandoahBarrierSetC2 already had an implementation of `eliminate_gc_barrier` so the super class implementation in CardTableBarrierSetC2 is being missed. I modified the Shenandoah impl as follows which resolved the performance and assertion issues for me. void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* n) const { if (is_shenandoah_wb_pre_call(n)) { shenandoah_eliminate_wb_pre(n, ¯o->igvn()); } if (n->Opcode() == Op_CastP2X) { CardTableBarrierSetC2::eliminate_gc_barrier(macro, n); } } I believe a few other APIs would need to also check with the super class implementation but for my runs to complete successfully this was the only change I needed to make. Cheers, Charlie Gracie On 2020-07-27, 2:02 PM, "shenandoah-dev on behalf of Mathiske, Bernd" wrote: Aditya, Thomas, Roman, Thank you for providing these hints, which were helpful to rule out possible root causes! Looking at all this and at some initial profiling results, Volker Simonis suggested that -XX:-EliminateAllocations might help. And it does! When I use this flag, performance is "back to normal" in the short benchmark runs I have conducted so far. I'll run some more extensive tests, with repetitions, and report some numbers, soon. Bernd On 7/23/20, 4:32 AM, "shenandoah-dev on behalf of Roman Kennke" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. On Thu, 2020-07-23 at 12:31 +0200, Thomas Schatzl wrote: > Hi, > > On 22.07.20 22:59, Roman Kennke wrote: > > I am not very familiar with all this stuff. > > > > You should check if the C2 optimizations for card-table-barriers > > kick > > in. IIRC, there was something that elides those barriers on stores > > into > > new objects altogether, which make up the majority of stores. > > > > if you are talking about eliding write barriers for new objects > because they are "always" allocated in young gen, and no > generational > collector is interested in young->old references, there is no such > thing > afaik. > > No collector guarantees this "always" property: e.g. CMS may > directly > decide to put new objects into old gen for a few reasons, and for > parallel (and g1) it e.g. can happen that a gc right after > allocating > that object (when e.g. transitioning from native slow-path code) > will > move that object into old gen. Or simply when the object is large. > > See e.g. https://bugs.openjdk.java.net/browse/JDK-8191342 > > That would still require the compiler to only apply that optimization > if > it can prove that the object is "small enough" to fit into young gen > in > any case (it is probably easy to get conservative enough values for > that > from somewhere). > Thanks Thomas for clarification! :-) Roman From shade at redhat.com Tue Jul 28 07:54:11 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 28 Jul 2020 09:54:11 +0200 Subject: [8] RFR: Shenandoah: pacer should use proper Atomics for intptr_t Message-ID: <402e0b83-c61b-f853-dce9-527f4de62d23@redhat.com> Hi, I am seeing x86_32 build failures in sh/jdk8: /home/buildbot/worker/build-shenandoah-jdk8-linux/build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp: In member function ?void ShenandoahPacer::pace_for_alloc(size_t)?: /home/buildbot/worker/build-shenandoah-jdk8-linux/build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp:281:51: error: no matching function for call to ?Atomic::load(volatile intptr_t*)? if (total_ms > max_ms || Atomic::load(&_budget) >= 0) { I believe that is because recent backports used the Atomic/OrderAccess methods that work only on x86_64. This is 8u-specific problem. 11u and upwards have the proper Atomic APIs. 8u patch: https://cr.openjdk.java.net/~shade/shenandoah/8u-atomic-ptrs/webrev.01/ Testing: hotspot_gc_shenandoah {x86_64, x86_32} -- Thanks, -Aleksey From rkennke at redhat.com Tue Jul 28 08:50:02 2020 From: rkennke at redhat.com (Roman Kennke) Date: Tue, 28 Jul 2020 10:50:02 +0200 Subject: [8] RFR: Shenandoah: pacer should use proper Atomics for intptr_t In-Reply-To: <402e0b83-c61b-f853-dce9-527f4de62d23@redhat.com> References: <402e0b83-c61b-f853-dce9-527f4de62d23@redhat.com> Message-ID: <480df1169c8eae586146ee7e7c1822d754065c1a.camel@redhat.com> Ok! Thank you! Roman On Tue, 2020-07-28 at 09:54 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > Hi, > > I am seeing x86_32 build failures in sh/jdk8: > > /home/buildbot/worker/build-shenandoah-jdk8- > linux/build/hotspot/src/share/vm/gc_implementation/shenandoah/shenand > oahPacer.cpp: > In member function ?void ShenandoahPacer::pace_for_alloc(size_t)?: > /home/buildbot/worker/build-shenandoah-jdk8- > linux/build/hotspot/src/share/vm/gc_implementation/shenandoah/shenand > oahPacer.cpp:281:51: > error: no matching function for call to ?Atomic::load(volatile > intptr_t*)? > if (total_ms > max_ms || Atomic::load(&_budget) >= 0) { > > I believe that is because recent backports used the > Atomic/OrderAccess methods that work only on > x86_64. This is 8u-specific problem. 11u and upwards have the proper > Atomic APIs. > > 8u patch: > > https://cr.openjdk.java.net/~shade/shenandoah/8u-atomic-ptrs/webrev.01/ > > Testing: hotspot_gc_shenandoah {x86_64, x86_32} > From shade at redhat.com Tue Jul 28 08:56:00 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Tue, 28 Jul 2020 10:56:00 +0200 Subject: [8] RFR: Shenandoah: pacer should use proper Atomics for intptr_t In-Reply-To: <480df1169c8eae586146ee7e7c1822d754065c1a.camel@redhat.com> References: <402e0b83-c61b-f853-dce9-527f4de62d23@redhat.com> <480df1169c8eae586146ee7e7c1822d754065c1a.camel@redhat.com> Message-ID: <62e34487-4a31-00dc-8c00-4dce7ea6c15a@redhat.com> On 7/28/20 10:50 AM, Roman Kennke wrote: > Ok! Cheers, pushed. -- Thanks, -Aleksey From shade at redhat.com Tue Jul 28 08:53:18 2020 From: shade at redhat.com (shade at redhat.com) Date: Tue, 28 Jul 2020 08:53:18 +0000 Subject: hg: shenandoah/jdk8/hotspot: Shenandoah: pacer should use proper Atomics for intptr_t Message-ID: <202007280853.06S8rIcJ014571@aojmv0008.oracle.com> Changeset: 144156910d02 Author: shade Date: 2020-07-28 09:28 +0200 URL: https://hg.openjdk.java.net/shenandoah/jdk8/hotspot/rev/144156910d02 Shenandoah: pacer should use proper Atomics for intptr_t ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp ! src/share/vm/gc_implementation/shenandoah/shenandoahPacer.inline.hpp From shade at redhat.com Wed Jul 29 06:54:07 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 29 Jul 2020 08:54:07 +0200 Subject: [8u] RFR: Shenandoah: Zero build fails after recent Atomic cleanup in Pacer Message-ID: <6121b54c-8099-9f3e-9c47-2bde1aaed122@redhat.com> sh/jdk8 build pipelines still fail, now in Zero builds: In file included from build/hotspot/src/share/vm/memory/specialized_oop_closures.hpp:28:0, from build/hotspot/src/share/vm/oops/oop.hpp:30, from build/hotspot/src/share/vm/oops/markOop.hpp:28, from build/hotspot/src/share/vm/gc_implementation/shared/ageTable.hpp:28, from build/hotspot/src/share/vm/oops/oop.inline.hpp:28, from build/hotspot/src/share/vm/gc_implementation/shared/markBitMap.hpp:29, from build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp:27, from build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.hpp:28, from build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp:26: build/hotspot/src/share/vm/runtime/atomic.hpp:52:22: error: inline function ?static void Atomic::store_ptr(intptr_t, volatile intptr_t*)? used but never defined [-Werror] inline static void store_ptr(intptr_t store_value, volatile intptr_t* dest); ^~~~~~~~~ This happens because Zero has CV-unqualified method only, so we need the other one: diff -r 144156910d02 src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp --- a/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp Tue Jul 28 09:28:15 2020 +0200 +++ b/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp Wed Jul 29 08:42:17 2020 +0200 @@ -168,8 +168,12 @@ inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { + *dest = store_value; +} + inline jint Atomic::add(jint add_value, volatile jint* dest) { #ifdef ARM return arm_add_and_fetch(dest, add_value); #else diff -r 144156910d02 src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp Tue Jul 28 09:28:15 2020 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp Wed Jul 29 08:42:17 2020 +0200 @@ -27,8 +27,9 @@ #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahPacer.hpp" #include "gc_implementation/shenandoah/shenandoahPhaseTimings.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/atomic.inline.hpp" /* * In normal concurrent cycle, we have to pace the application to let GC finish. * Testing: hotspot_gc_shenandoah; Linux x86_64 zero builds -- Thanks, -Aleksey From rkennke at redhat.com Wed Jul 29 07:12:37 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 29 Jul 2020 09:12:37 +0200 Subject: [8u] RFR: Shenandoah: Zero build fails after recent Atomic cleanup in Pacer In-Reply-To: <6121b54c-8099-9f3e-9c47-2bde1aaed122@redhat.com> References: <6121b54c-8099-9f3e-9c47-2bde1aaed122@redhat.com> Message-ID: Aww. Ok! Roman On Wed, 2020-07-29 at 08:54 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > sh/jdk8 build pipelines still fail, now in Zero builds: > > In file included from > build/hotspot/src/share/vm/memory/specialized_oop_closures.hpp:28:0, > from build/hotspot/src/share/vm/oops/oop.hpp:30, > from build/hotspot/src/share/vm/oops/markOop.hpp:28, > from > build/hotspot/src/share/vm/gc_implementation/shared/ageTable.hpp:28, > from > build/hotspot/src/share/vm/oops/oop.inline.hpp:28, > from > build/hotspot/src/share/vm/gc_implementation/shared/markBitMap.hpp:29 > , > from > build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahHea > p.hpp:27, > from > build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahFre > eSet.hpp:28, > from > build/hotspot/src/share/vm/gc_implementation/shenandoah/shenandoahPac > er.cpp:26: > build/hotspot/src/share/vm/runtime/atomic.hpp:52:22: error: inline > function ?static void > Atomic::store_ptr(intptr_t, volatile intptr_t*)? used but never > defined [-Werror] > inline static void store_ptr(intptr_t store_value, volatile > intptr_t* dest); > ^~~~~~~~~ > > This happens because Zero has CV-unqualified method only, so we need > the other one: > > diff -r 144156910d02 > src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp > --- a/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp Tue > Jul 28 09:28:15 2020 +0200 > +++ b/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp Wed > Jul 29 08:42:17 2020 +0200 > @@ -168,8 +168,12 @@ > inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) > { > *dest = store_value; > } > > +inline void Atomic::store_ptr(intptr_t store_value, volatile > intptr_t* dest) { > + *dest = store_value; > +} > + > inline jint Atomic::add(jint add_value, volatile jint* dest) { > #ifdef ARM > return arm_add_and_fetch(dest, add_value); > #else > diff -r 144156910d02 > src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp > --- > a/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp T > ue Jul 28 09:28:15 2020 +0200 > +++ > b/src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp W > ed Jul 29 08:42:17 2020 +0200 > @@ -27,8 +27,9 @@ > #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" > #include "gc_implementation/shenandoah/shenandoahPacer.hpp" > #include "gc_implementation/shenandoah/shenandoahPhaseTimings.hpp" > #include "runtime/mutexLocker.hpp" > +#include "runtime/atomic.inline.hpp" > > /* > * In normal concurrent cycle, we have to pace the application to > let GC finish. > * > > > Testing: hotspot_gc_shenandoah; Linux x86_64 zero builds > From shade at redhat.com Wed Jul 29 08:27:57 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 29 Jul 2020 10:27:57 +0200 Subject: [8u] RFR: Shenandoah: Zero build fails after recent Atomic cleanup in Pacer In-Reply-To: References: <6121b54c-8099-9f3e-9c47-2bde1aaed122@redhat.com> Message-ID: <2899e445-52f7-bea2-be8c-256685106475@redhat.com> On 7/29/20 9:12 AM, Roman Kennke wrote: > Aww. Ok! Yeah... Pushed! -- -Aleksey From shade at redhat.com Wed Jul 29 08:29:29 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 29 Jul 2020 10:29:29 +0200 Subject: [11u] Pick up jdk-11.0.9+1 to sh/jdk11 Message-ID: <6deed048-86ec-50c2-38df-786fa532585b@redhat.com> Upstream had published jdk-11.0.9+1, let's pick it up to sh/jdk11. Merge is trivial. I would tag the result shenandoah-jdk-11.0.9+1 immediately. Testing: hotspot_gc_shenandoah {fastdebug,release} -- Thanks, -Aleksey From rkennke at redhat.com Wed Jul 29 09:19:46 2020 From: rkennke at redhat.com (Roman Kennke) Date: Wed, 29 Jul 2020 11:19:46 +0200 Subject: [11u] Pick up jdk-11.0.9+1 to sh/jdk11 In-Reply-To: <6deed048-86ec-50c2-38df-786fa532585b@redhat.com> References: <6deed048-86ec-50c2-38df-786fa532585b@redhat.com> Message-ID: <39019f350f0169a9f390fe7b1b9337183b419dd7.camel@redhat.com> Ok, go! Thank you! Roman On Wed, 2020-07-29 at 10:29 +0200, Aleksey Shipilev wrote: > Error verifying signature: Cannot verify message signature: > Incorrect message format > Upstream had published jdk-11.0.9+1, let's pick it up to sh/jdk11. > > Merge is trivial. I would tag the result shenandoah-jdk-11.0.9+1 > immediately. > > Testing: hotspot_gc_shenandoah {fastdebug,release} > From shade at redhat.com Wed Jul 29 09:38:54 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Wed, 29 Jul 2020 11:38:54 +0200 Subject: [11u] Pick up jdk-11.0.9+1 to sh/jdk11 In-Reply-To: <39019f350f0169a9f390fe7b1b9337183b419dd7.camel@redhat.com> References: <6deed048-86ec-50c2-38df-786fa532585b@redhat.com> <39019f350f0169a9f390fe7b1b9337183b419dd7.camel@redhat.com> Message-ID: On 7/29/20 11:19 AM, Roman Kennke wrote: > Ok, go! Cheers, pushed. -- -Aleksey From conniall at amazon.com Wed Jul 29 19:14:54 2020 From: conniall at amazon.com (Connaughton, Niall) Date: Wed, 29 Jul 2020 19:14:54 +0000 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> Message-ID: We've tested with 11.0.8 and found that the additional final mark pause time is coming from string and symbol table cleanups. We identified this through successive timestamps in the GC logs. We didn't see a breakdown covering string/symbol table cleanup in the per-cycle stats logs, the time was allocated to "Unload Classes" under Final Mark. From the logs it appears string/symbol table cleanup is taking around 15-16ms consistently during final mark (unloading) pauses. Almost all cleanups removed 0 entries, a few removed 1 or 2 symbols. Over time the number of symbols was nearly constant, while the strings fluctuated up and down but did not appear to be accumulating. So it doesn't look like there's a leak. An example from the logs showing what looks like a 16ms string/symbol cleanup: [2020-07-29T11:38:37.005+0000] GC(50174) Pause Final Mark (unload classes) [2020-07-29T11:38:37.005+0000] GC(50174) Using 8 of 8 workers for final marking [2020-07-29T11:38:37.021+0000] GC(50174) Cleaned string and symbol table, strings: 82632 processed, 0 removed, symbols: 313453 processed, 0 removed [2020-07-29T11:38:37.022+0000] GC(50174) Adaptive CSet Selection. Target Free: 2901M, Actual Free: 2333M, Max CSet: 853M, Min Garbage: 567M [2020-07-29T11:38:37.022+0000] GC(50174) Collectable Garbage: 13358M (97%), Immediate: 63032K (0%), CSet: 13296M (96%) [2020-07-29T11:38:37.024+0000] GC(50174) Pause Final Mark (unload classes) 18.629ms An excerpt from per-cycle stats on a different GC cycle showing the time reported under "Unload Classes": [2020-07-29T11:38:37.491+0000] Pause Final Mark (N) 18616 us [2020-07-29T11:38:37.491+0000] Finish Queues 1088 us [2020-07-29T11:38:37.491+0000] System Purge 296 us [2020-07-29T11:38:37.491+0000] Unload Classes 14760 us [2020-07-29T11:38:37.491+0000] Weak Roots 296 us [2020-07-29T11:38:37.491+0000] CLDG 1 us [2020-07-29T11:38:37.491+0000] Update Region States 33 us [2020-07-29T11:38:37.491+0000] Retire TLABs 417 us [2020-07-29T11:38:37.491+0000] Choose Collection Set 284 us [2020-07-29T11:38:37.491+0000] Rebuild Free Set 12 us [2020-07-29T11:38:37.491+0000] Initial Evacuation 1671 us, parallelism: 5.04x Zhengyu suggested -XX:+ClassUnloadingWithConcurrentMark, but we didn't see a difference. It looks like this is default true, and from our read of the code it appears its only effect is to disable class unloading if you set it to false. Zhengyu also suggested increasing ShenandoahUnloadClassesFrequency from its default of 5, we haven't tested this yet. Based on all of this, we had some followup questions: * Is our understanding of -XX:+ClassUnloadingWithConcurrentMark correct - is its only impact to disable class unloading if set to false? * Is there a way to get a detailed breakdown of final mark timing and/or class unloading to expose string/symbol table cleanup time? * Is 16ms a reasonable duration to cleanup string/symbol tables given ~80K strings and ~310K symbols? * Apart from increasing ShenandoahUnloadClassesFrequency, are there any options to reduce the impact of string/symbol table cleanup on final marking? Thanks very much for your help! Niall and Bernd ?On 7/17/20, 08:38, "Zhengyu Gu" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. On 7/17/20 11:13 AM, Connaughton, Niall wrote: > Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? class unloading is also performed during final mark, so it impacts final mark pause as well, but probably without this accumulation effect. We have a parameter, ShenandoahUnloadClassesFrequency, to control frequency of class unloading when it is enabled, default is once every 5 GC cycles. You may want to tune the parameter to achieve some sort of balance. Thanks, -Zhengyu > > Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. > > Thanks, > Niall > > On 7/17/20, 06:35, "Zhengyu Gu" wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > Hi Niall, > > By default, class unloading is disabled for Shenandoah in JDK11. > Accumulated class loaders may prolong final mark. You may want to try: > > jcmd VM.classloader_stats > > If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark > > Thanks, > > -Zhengyu > > > > On 7/17/20 12:56 AM, Connaughton, Niall wrote: > > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > > > Happy to take any pointers or provide any more info that would help. > > > > Thanks, > > Niall > > > > From zgu at redhat.com Wed Jul 29 19:38:41 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 29 Jul 2020 15:38:41 -0400 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> Message-ID: <642c40f6-8c9a-9def-06ba-1e3a47f7f67b@redhat.com> Hi Niall, > > > Zhengyu suggested -XX:+ClassUnloadingWithConcurrentMark, but we didn't see a difference. It looks like this is default true, and from our read of the code it appears its only effect is to disable class unloading if you set it to false. Zhengyu also suggested increasing ShenandoahUnloadClassesFrequency from its default of 5, we haven't tested this yet. Aleksey just recently enabled class unloading in 11u, with much lower frequency (once per 100 cycles) [1]. I will take a look on string/symbol table cleaning issues. Thanks for reporting the issues. -Zhengyu [1] https://mail.openjdk.java.net/pipermail/shenandoah-dev/2020-July/012693.html > > Based on all of this, we had some followup questions: > > * Is our understanding of -XX:+ClassUnloadingWithConcurrentMark correct - is its only impact to disable class unloading if set to false? > * Is there a way to get a detailed breakdown of final mark timing and/or class unloading to expose string/symbol table cleanup time? > * Is 16ms a reasonable duration to cleanup string/symbol tables given ~80K strings and ~310K symbols? > * Apart from increasing ShenandoahUnloadClassesFrequency, are there any options to reduce the impact of string/symbol table cleanup on final marking? > > > Thanks very much for your help! > > Niall and Bernd > > ?On 7/17/20, 08:38, "Zhengyu Gu" wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > On 7/17/20 11:13 AM, Connaughton, Niall wrote: > > Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? > > class unloading is also performed during final mark, so it impacts final > mark pause as well, but probably without this accumulation effect. > > We have a parameter, ShenandoahUnloadClassesFrequency, to control > frequency of class unloading when it is enabled, default is once every 5 > GC cycles. You may want to tune the parameter to achieve some sort of > balance. > > Thanks, > > -Zhengyu > > > > > > > Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. > > > > Thanks, > > Niall > > > > On 7/17/20, 06:35, "Zhengyu Gu" wrote: > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > Hi Niall, > > > > By default, class unloading is disabled for Shenandoah in JDK11. > > Accumulated class loaders may prolong final mark. You may want to try: > > > > jcmd VM.classloader_stats > > > > If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark > > > > Thanks, > > > > -Zhengyu > > > > > > > > On 7/17/20 12:56 AM, Connaughton, Niall wrote: > > > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > > > > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > > > > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > > > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > > > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > > > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > > > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > > > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > > > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > > > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > > > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > > > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > > > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > > > > > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > > > > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > > > > > Happy to take any pointers or provide any more info that would help. > > > > > > Thanks, > > > Niall > > > > > > > > > From zgu at redhat.com Wed Jul 29 20:56:18 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Wed, 29 Jul 2020 16:56:18 -0400 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> Message-ID: <0abdedcb-de33-cde8-1040-937540b5f282@redhat.com> Hi Niall, > > Based on all of this, we had some followup questions: > > * Is our understanding of -XX:+ClassUnloadingWithConcurrentMark correct - is its only impact to disable class unloading if set to false? Yes, setting ClassUnloadingWithConcurrentMark to false, still disables class unloading. > * Is there a way to get a detailed breakdown of final mark timing and/or class unloading to expose string/symbol table cleanup time? Not now. We can add these timings. > * Is 16ms a reasonable duration to cleanup string/symbol tables given ~80K strings and ~310K symbols? I don't know. Roman & Aleksey may have some insights. > * Apart from increasing ShenandoahUnloadClassesFrequency, are there any options to reduce the impact of string/symbol table cleanup on final marking? There is possibility that we can move string table cleaning to concurrent phase. I believe string table is a concurrent hash table in 11u already. Thanks, -Zhengyu > > > Thanks very much for your help! > > Niall and Bernd > > ?On 7/17/20, 08:38, "Zhengyu Gu" wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > On 7/17/20 11:13 AM, Connaughton, Niall wrote: > > Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? > > class unloading is also performed during final mark, so it impacts final > mark pause as well, but probably without this accumulation effect. > > We have a parameter, ShenandoahUnloadClassesFrequency, to control > frequency of class unloading when it is enabled, default is once every 5 > GC cycles. You may want to tune the parameter to achieve some sort of > balance. > > Thanks, > > -Zhengyu > > > > > > > Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. > > > > Thanks, > > Niall > > > > On 7/17/20, 06:35, "Zhengyu Gu" wrote: > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > Hi Niall, > > > > By default, class unloading is disabled for Shenandoah in JDK11. > > Accumulated class loaders may prolong final mark. You may want to try: > > > > jcmd VM.classloader_stats > > > > If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark > > > > Thanks, > > > > -Zhengyu > > > > > > > > On 7/17/20 12:56 AM, Connaughton, Niall wrote: > > > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > > > > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > > > > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > > > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > > > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > > > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > > > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > > > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > > > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > > > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > > > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > > > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > > > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > > > > > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > > > > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > > > > > Happy to take any pointers or provide any more info that would help. > > > > > > Thanks, > > > Niall > > > > > > > > > From zgu at redhat.com Thu Jul 30 14:05:22 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Thu, 30 Jul 2020 10:05:22 -0400 Subject: [11u] RFR 8250827: Shenandoah: needs to reset/finish StringTable's dead count before/after parallel walk Message-ID: Please review this small patch that triggers StringTable cleaning and resizing after parallel walk. Bug: https://bugs.openjdk.java.net/browse/JDK-8250827 Webrev: http://cr.openjdk.java.net/~zgu/JDK-8250827/webrev.00/ Test: hotspot_gc_shenandoah Thanks, -Zhengyu From rkennke at redhat.com Thu Jul 30 14:47:54 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 30 Jul 2020 16:47:54 +0200 Subject: [11u] RFR 8250827: Shenandoah: needs to reset/finish StringTable's dead count before/after parallel walk In-Reply-To: References: Message-ID: <711f8132a93f628578f1578c092a325e56c98804.camel@redhat.com> Are you sure, this is 11u-specific? I know that we have concurrent stringtable processing in later JDKs, but some code paths still use the parallelCleaning code, or is it not relevant there? Roman On Thu, 2020-07-30 at 10:05 -0400, Zhengyu Gu wrote: > Please review this small patch that triggers StringTable cleaning > and > resizing after parallel walk. > > Bug: https://bugs.openjdk.java.net/browse/JDK-8250827 > Webrev: http://cr.openjdk.java.net/~zgu/JDK-8250827/webrev.00/ > > > Test: > hotspot_gc_shenandoah > > Thanks, > > -Zhengyu > From zgu at redhat.com Thu Jul 30 15:00:56 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Thu, 30 Jul 2020 11:00:56 -0400 Subject: [11u] RFR 8250827: Shenandoah: needs to reset/finish StringTable's dead count before/after parallel walk In-Reply-To: <711f8132a93f628578f1578c092a325e56c98804.camel@redhat.com> References: <711f8132a93f628578f1578c092a325e56c98804.camel@redhat.com> Message-ID: <8421a856-9e4b-e064-d490-6a4405a13328@redhat.com> No, we have similar problem with 14. But patch is not relevant, we have to do 14 specific fix. -Zhengyu On 7/30/20 10:47 AM, Roman Kennke wrote: > Are you sure, this is 11u-specific? I know that we have concurrent > stringtable processing in later JDKs, but some code paths still use the > parallelCleaning code, or is it not relevant there? > > Roman > > On Thu, 2020-07-30 at 10:05 -0400, Zhengyu Gu wrote: >> Please review this small patch that triggers StringTable cleaning >> and >> resizing after parallel walk. >> >> Bug: https://bugs.openjdk.java.net/browse/JDK-8250827 >> Webrev: http://cr.openjdk.java.net/~zgu/JDK-8250827/webrev.00/ >> >> >> Test: >> hotspot_gc_shenandoah >> >> Thanks, >> >> -Zhengyu >> > From rkennke at redhat.com Thu Jul 30 15:28:00 2020 From: rkennke at redhat.com (Roman Kennke) Date: Thu, 30 Jul 2020 17:28:00 +0200 Subject: [11u] RFR 8250827: Shenandoah: needs to reset/finish StringTable's dead count before/after parallel walk In-Reply-To: <8421a856-9e4b-e064-d490-6a4405a13328@redhat.com> References: <711f8132a93f628578f1578c092a325e56c98804.camel@redhat.com> <8421a856-9e4b-e064-d490-6a4405a13328@redhat.com> Message-ID: Hmm ok then. The patch looks good. Cheers, Roman > No, we have similar problem with 14. But patch is not relevant, we > have > to do 14 specific fix. > > -Zhengyu > > On 7/30/20 10:47 AM, Roman Kennke wrote: > > Are you sure, this is 11u-specific? I know that we have concurrent > > stringtable processing in later JDKs, but some code paths still use > > the > > parallelCleaning code, or is it not relevant there? > > > > Roman > > > > On Thu, 2020-07-30 at 10:05 -0400, Zhengyu Gu wrote: > > > Please review this small patch that triggers StringTable cleaning > > > and > > > resizing after parallel walk. > > > > > > Bug: https://bugs.openjdk.java.net/browse/JDK-8250827 > > > Webrev: http://cr.openjdk.java.net/~zgu/JDK-8250827/webrev.00/ > > > > > > > > > Test: > > > hotspot_gc_shenandoah > > > > > > Thanks, > > > > > > -Zhengyu > > > From zgu at redhat.com Thu Jul 30 17:55:01 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Thu, 30 Jul 2020 13:55:01 -0400 Subject: [15] RFR 8250841: Shenandoah: need to reset/finish dead counters for StringTable/ResolvedMethodTable during STW root processing Message-ID: <762e60dc-3f1b-50f5-9e7a-0b70572f5007@redhat.com> Please review this small patch that triggers StringTable/ResolvedMethodTable cleaning and resizing after STW processing. Without this fix, dead entries may accumulate in the tables, result long pause time. Bug: https://bugs.openjdk.java.net/browse/JDK-8250841 Webrev: http://cr.openjdk.java.net/~zgu/JDK-8250841/webrev.00/ Test: hotspot_gc_shenandoah Thanks, -Zhengyu From mathiske at amazon.com Thu Jul 30 18:12:31 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Thu, 30 Jul 2020 18:12:31 +0000 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: <0abdedcb-de33-cde8-1040-937540b5f282@redhat.com> References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> <0abdedcb-de33-cde8-1040-937540b5f282@redhat.com> Message-ID: Zhengyu, is this bug you filed directly related? https://bugs.openjdk.java.net/browse/JDK-8250841 Do you expect that the patch you also posted might help here? (http://cr.openjdk.java.net/~zgu/JDK-8250841/webrev.00/) Bernd ?On 7/29/20, 1:56 PM, "Zhengyu Gu" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Hi Niall, > > Based on all of this, we had some followup questions: > > * Is our understanding of -XX:+ClassUnloadingWithConcurrentMark correct - is its only impact to disable class unloading if set to false? Yes, setting ClassUnloadingWithConcurrentMark to false, still disables class unloading. > * Is there a way to get a detailed breakdown of final mark timing and/or class unloading to expose string/symbol table cleanup time? Not now. We can add these timings. > * Is 16ms a reasonable duration to cleanup string/symbol tables given ~80K strings and ~310K symbols? I don't know. Roman & Aleksey may have some insights. > * Apart from increasing ShenandoahUnloadClassesFrequency, are there any options to reduce the impact of string/symbol table cleanup on final marking? There is possibility that we can move string table cleaning to concurrent phase. I believe string table is a concurrent hash table in 11u already. Thanks, -Zhengyu > > > Thanks very much for your help! > > Niall and Bernd > > On 7/17/20, 08:38, "Zhengyu Gu" wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > On 7/17/20 11:13 AM, Connaughton, Niall wrote: > > Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? > > class unloading is also performed during final mark, so it impacts final > mark pause as well, but probably without this accumulation effect. > > We have a parameter, ShenandoahUnloadClassesFrequency, to control > frequency of class unloading when it is enabled, default is once every 5 > GC cycles. You may want to tune the parameter to achieve some sort of > balance. > > Thanks, > > -Zhengyu > > > > > > > Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. > > > > Thanks, > > Niall > > > > On 7/17/20, 06:35, "Zhengyu Gu" wrote: > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > Hi Niall, > > > > By default, class unloading is disabled for Shenandoah in JDK11. > > Accumulated class loaders may prolong final mark. You may want to try: > > > > jcmd VM.classloader_stats > > > > If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark > > > > Thanks, > > > > -Zhengyu > > > > > > > > On 7/17/20 12:56 AM, Connaughton, Niall wrote: > > > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > > > > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > > > > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > > > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > > > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > > > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > > > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > > > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > > > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > > > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > > > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > > > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > > > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > > > > > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > > > > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > > > > > Happy to take any pointers or provide any more info that would help. > > > > > > Thanks, > > > Niall > > > > > > > > > From shade at redhat.com Thu Jul 30 18:15:05 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Thu, 30 Jul 2020 20:15:05 +0200 Subject: [15] RFR 8250841: Shenandoah: need to reset/finish dead counters for StringTable/ResolvedMethodTable during STW root processing In-Reply-To: <762e60dc-3f1b-50f5-9e7a-0b70572f5007@redhat.com> References: <762e60dc-3f1b-50f5-9e7a-0b70572f5007@redhat.com> Message-ID: <44a74615-f044-d24e-8c30-568f4e29dd62@redhat.com> On 7/30/20 7:55 PM, Zhengyu Gu wrote: > Bug: https://bugs.openjdk.java.net/browse/JDK-8250841 > Webrev: http://cr.openjdk.java.net/~zgu/JDK-8250841/webrev.00/ Looks fine to me. -- Thanks, -Aleksey From zgu at redhat.com Thu Jul 30 18:16:28 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Thu, 30 Jul 2020 14:16:28 -0400 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> <0abdedcb-de33-cde8-1040-937540b5f282@redhat.com> Message-ID: Hi Bernd, On 7/30/20 2:12 PM, Mathiske, Bernd wrote: > Zhengyu, > > is this bug you filed directly related? > https://bugs.openjdk.java.net/browse/JDK-8250841 > > Do you expect that the patch you also posted might help here? > (http://cr.openjdk.java.net/~zgu/JDK-8250841/webrev.00/) Yes, it is directly related and should help the pause times, but not sure by how much. -Zhengyu > > Bernd > > ?On 7/29/20, 1:56 PM, "Zhengyu Gu" wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > Hi Niall, > > > > Based on all of this, we had some followup questions: > > > > * Is our understanding of -XX:+ClassUnloadingWithConcurrentMark correct - is its only impact to disable class unloading if set to false? > Yes, setting ClassUnloadingWithConcurrentMark to false, still disables > class unloading. > > > * Is there a way to get a detailed breakdown of final mark timing and/or class unloading to expose string/symbol table cleanup time? > Not now. We can add these timings. > > > * Is 16ms a reasonable duration to cleanup string/symbol tables given ~80K strings and ~310K symbols? > > I don't know. Roman & Aleksey may have some insights. > > > * Apart from increasing ShenandoahUnloadClassesFrequency, are there any options to reduce the impact of string/symbol table cleanup on final marking? > > There is possibility that we can move string table cleaning to > concurrent phase. I believe string table is a concurrent hash table in > 11u already. > > Thanks, > > -Zhengyu > > > > > > > > Thanks very much for your help! > > > > Niall and Bernd > > > > On 7/17/20, 08:38, "Zhengyu Gu" wrote: > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > On 7/17/20 11:13 AM, Connaughton, Niall wrote: > > > Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? > > > > class unloading is also performed during final mark, so it impacts final > > mark pause as well, but probably without this accumulation effect. > > > > We have a parameter, ShenandoahUnloadClassesFrequency, to control > > frequency of class unloading when it is enabled, default is once every 5 > > GC cycles. You may want to tune the parameter to achieve some sort of > > balance. > > > > Thanks, > > > > -Zhengyu > > > > > > > > > > > > Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. > > > > > > Thanks, > > > Niall > > > > > > On 7/17/20, 06:35, "Zhengyu Gu" wrote: > > > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > > > > > Hi Niall, > > > > > > By default, class unloading is disabled for Shenandoah in JDK11. > > > Accumulated class loaders may prolong final mark. You may want to try: > > > > > > jcmd VM.classloader_stats > > > > > > If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark > > > > > > Thanks, > > > > > > -Zhengyu > > > > > > > > > > > > On 7/17/20 12:56 AM, Connaughton, Niall wrote: > > > > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > > > > > > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > > > > > > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > > > > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > > > > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > > > > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > > > > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > > > > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > > > > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > > > > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > > > > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > > > > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > > > > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > > > > > > > > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > > > > > > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > > > > > > > Happy to take any pointers or provide any more info that would help. > > > > > > > > Thanks, > > > > Niall > > > > > > > > > > > > > > > > From zgu at redhat.com Thu Jul 30 18:20:07 2020 From: zgu at redhat.com (Zhengyu Gu) Date: Thu, 30 Jul 2020 14:20:07 -0400 Subject: [15] RFR 8250841: Shenandoah: need to reset/finish dead counters for StringTable/ResolvedMethodTable during STW root processing In-Reply-To: <44a74615-f044-d24e-8c30-568f4e29dd62@redhat.com> References: <762e60dc-3f1b-50f5-9e7a-0b70572f5007@redhat.com> <44a74615-f044-d24e-8c30-568f4e29dd62@redhat.com> Message-ID: Thanks, Aleksey. -Zhengyu On 7/30/20 2:15 PM, Aleksey Shipilev wrote: > On 7/30/20 7:55 PM, Zhengyu Gu wrote: >> Bug: https://bugs.openjdk.java.net/browse/JDK-8250841 >> Webrev: http://cr.openjdk.java.net/~zgu/JDK-8250841/webrev.00/ > > Looks fine to me. > From mathiske at amazon.com Thu Jul 30 18:22:43 2020 From: mathiske at amazon.com (Mathiske, Bernd) Date: Thu, 30 Jul 2020 18:22:43 +0000 Subject: Unexpected issues with Final Mark pauses and pacer performance in JDK11 In-Reply-To: References: <2D0CB91B-5463-4F27-B611-E669449130C4@amazon.com> <1c20589f-d43f-518f-b07c-6e0d3de1d4db@redhat.com> <9DF51BD0-4368-4849-96E5-89157D46CE54@amazon.com> <20702f67-6013-db7a-5c9c-ea994248fc13@redhat.com> <0abdedcb-de33-cde8-1040-937540b5f282@redhat.com>

Message-ID: Thanks! I'll add the patch and we will report back. ?On 7/30/20, 11:17 AM, "Zhengyu Gu" wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Hi Bernd, On 7/30/20 2:12 PM, Mathiske, Bernd wrote: > Zhengyu, > > is this bug you filed directly related? > https://bugs.openjdk.java.net/browse/JDK-8250841 > > Do you expect that the patch you also posted might help here? > (http://cr.openjdk.java.net/~zgu/JDK-8250841/webrev.00/) Yes, it is directly related and should help the pause times, but not sure by how much. -Zhengyu > > Bernd > > On 7/29/20, 1:56 PM, "Zhengyu Gu" wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > Hi Niall, > > > > Based on all of this, we had some followup questions: > > > > * Is our understanding of -XX:+ClassUnloadingWithConcurrentMark correct - is its only impact to disable class unloading if set to false? > Yes, setting ClassUnloadingWithConcurrentMark to false, still disables > class unloading. > > > * Is there a way to get a detailed breakdown of final mark timing and/or class unloading to expose string/symbol table cleanup time? > Not now. We can add these timings. > > > * Is 16ms a reasonable duration to cleanup string/symbol tables given ~80K strings and ~310K symbols? > > I don't know. Roman & Aleksey may have some insights. > > > * Apart from increasing ShenandoahUnloadClassesFrequency, are there any options to reduce the impact of string/symbol table cleanup on final marking? > > There is possibility that we can move string table cleaning to > concurrent phase. I believe string table is a concurrent hash table in > 11u already. > > Thanks, > > -Zhengyu > > > > > > > > Thanks very much for your help! > > > > Niall and Bernd > > > > On 7/17/20, 08:38, "Zhengyu Gu" wrote: > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > On 7/17/20 11:13 AM, Connaughton, Niall wrote: > > > Thanks Zhengyu, I'm looking into this, I'm seeing a lot of class loaders containing a single class. I'll test with class unloading enabled. Considering it's disabled by default, is there a downside to enabling it? > > > > class unloading is also performed during final mark, so it impacts final > > mark pause as well, but probably without this accumulation effect. > > > > We have a parameter, ShenandoahUnloadClassesFrequency, to control > > frequency of class unloading when it is enabled, default is once every 5 > > GC cycles. You may want to tune the parameter to achieve some sort of > > balance. > > > > Thanks, > > > > -Zhengyu > > > > > > > > > > > > Roman & Aleksey, thanks for the details. I'm using 11.0.7, will setup a new test with an 11.0.8 build. > > > > > > Thanks, > > > Niall > > > > > > On 7/17/20, 06:35, "Zhengyu Gu" wrote: > > > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > > > > > Hi Niall, > > > > > > By default, class unloading is disabled for Shenandoah in JDK11. > > > Accumulated class loaders may prolong final mark. You may want to try: > > > > > > jcmd VM.classloader_stats > > > > > > If it is the case, you may want to try -XX:+ClassUnloadingWithConcurrentMark > > > > > > Thanks, > > > > > > -Zhengyu > > > > > > > > > > > > On 7/17/20 12:56 AM, Connaughton, Niall wrote: > > > > Hey all, firstly thanks for all the work on Shenandoah, I?m excited for its potential for us. > > > > > > > > I?ve been doing some testing and have run across a couple of issues that have me scratching my head. The first is Final Mark pauses are increasing steadily over several days. On day 1, Final Mark pauses are 6.5-8.5ms. By day 3, they?re 12-16ms. The heap occupancy is not ramping up, and neither are the concurrent cycle times, so I?m not sure what?s behind this. The application is running a 20GB heap, peaking around 35% live data heap occupancy, and allocating ~1.3-1.5GB/s. > > > > > > > > What angles can I look at to dig into the cause of increasing Final Mark pauses? I don?t see a lot of details on the Final Mark in the gc logs, and there doesn?t seem to be much difference in the logs over time, except for the pause duration increasing. Here?s an example of a Final Mark log for before/after comparison: > > > > > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Pause Final Mark > > > > [2020-07-13T22:27:28.835+0000] GC(2224) Using 8 of 8 workers for final marking > > > > [2020-07-13T22:27:28.839+0000] GC(2224) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2429M, Max CSet: 853M, Min Garbage: 0B > > > > [2020-07-13T22:27:28.840+0000] GC(2224) Collectable Garbage: 12171M (88% of total), 176M CSet, 1548 CSet regions > > > > [2020-07-13T22:27:28.840+0000] GC(2224) Immediate Garbage: 46151K (0% of total), 11 regions > > > > [2020-07-13T22:27:28.843+0000] GC(2224) Pause Final Mark 7.373ms > > > > > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Pause Final Mark > > > > [2020-07-15T23:25:05.780+0000] GC(24251) Using 8 of 8 workers for final marking > > > > [2020-07-15T23:25:05.787+0000] GC(24251) Adaptive CSet Selection. Target Free: 2047M, Actual Free: 2513M, Max CSet: 853M, Min Garbage: 0B > > > > [2020-07-15T23:25:05.787+0000] GC(24251) Collectable Garbage: 12062M (88% of total), 184M CSet, 1535 CSet regions > > > > [2020-07-15T23:25:05.787+0000] GC(24251) Immediate Garbage: 34711K (0% of total), 5 regions > > > > [2020-07-15T23:25:05.792+0000] GC(24251) Pause Final Mark 11.790ms > > > > > > > > > > > > The second issue I ran into was that the pacer seemed to be adding a lot of latency. I couldn?t find any traces in the logs of the pacer?s activity. The summary at shutdown from gc+stats is useful, but having some signs of the pacer in the gc logs as the application runs would help correlate against other logs showing latency spikes. Is there any way to get more visibility on the pacer? Disabling the pacer removed the latency impact and we started seeing some really positive signs on the latency. I was expecting that we?d just see more degenerated GC and the latency would be similar, but this wasn?t the case. > > > > > > > > I?m generally happy running with the pacer disabled, especially as it seems we?ll have more visibility into degenerated GCs than we will over the pacer, so we can track regression more easily. So I?m asking this more for understanding than solving a blocking issue. > > > > > > > > Happy to take any pointers or provide any more info that would help. > > > > > > > > Thanks, > > > > Niall > > > > > > > > > > > > > > > > From gnu.andrew at redhat.com Fri Jul 31 15:15:32 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Fri, 31 Jul 2020 16:15:32 +0100 Subject: [RFR] [8u] 8u265-b01 Upstream Sync Message-ID: <20200731151532.GA59309@stopbrexit> Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/ Merge changesets: http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/corba/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jaxp/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jaxws/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jdk/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/hotspot/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/langtools/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/nashorn/merge.changeset http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/root/merge.changeset Changes in aarch64-shenandoah-jdk8u265-b01: - JDK-8248219: aarch64: missing memory barrier in fast_storefield and fast_accessfield - JDK-8249677: Regression in 8u after JDK-8237117: Better ForkJoinPool behavior - JDK-8250546: Expect changed behaviour reported in JDK-8249846 Main issues of note: None, clean merge (no HotSpot changes, JDK-8248219 already upstream). diffstat for root b/.hgtags | 3 +++ 1 file changed, 3 insertions(+) diffstat for corba b/.hgtags | 3 +++ 1 file changed, 3 insertions(+) diffstat for jaxp b/.hgtags | 3 +++ 1 file changed, 3 insertions(+) diffstat for jaxws b/.hgtags | 3 +++ 1 file changed, 3 insertions(+) diffstat for langtools b/.hgtags | 3 +++ 1 file changed, 3 insertions(+) diffstat for nashorn b/.hgtags | 3 +++ 1 file changed, 3 insertions(+) diffstat for jdk b/.hgtags | 3 b/src/share/classes/java/util/concurrent/ForkJoinPool.java | 2 b/src/share/classes/java/util/concurrent/ForkJoinWorkerThread.java | 14 + b/test/java/util/concurrent/forkjoin/AccessControlContext.java | 108 ++++++++++ b/test/java/util/concurrent/forkjoin/AccessControlContext.policy | 3 5 files changed, 128 insertions(+), 2 deletions(-) diffstat for hotspot b/.hgtags | 3 +++ 1 file changed, 3 insertions(+) Successfully built on x86, x86_64, s390, s390x, ppc, ppc64, ppc64le & aarch64. Ok to push? -- Andrew :) Senior Free Java Software Engineer OpenJDK Package Owner Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222 From shade at redhat.com Fri Jul 31 16:48:41 2020 From: shade at redhat.com (Aleksey Shipilev) Date: Fri, 31 Jul 2020 18:48:41 +0200 Subject: [RFR] [8u] 8u265-b01 Upstream Sync In-Reply-To: <20200731151532.GA59309@stopbrexit> References: <20200731151532.GA59309@stopbrexit> Message-ID: <470179aa-fb00-139d-7e18-11b55a02fa08@redhat.com> On 7/31/20 5:15 PM, Andrew Hughes wrote: > Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/ > > Merge changesets: > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/corba/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jaxp/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jaxws/merge.changeset Look trivially good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jdk/merge.changeset Looks good. > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/hotspot/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/langtools/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/nashorn/merge.changeset > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/root/merge.changeset Look trivially good. > Ok to push? Yes. -- Thanks, -Aleksey From gnu.andrew at redhat.com Fri Jul 31 19:09:41 2020 From: gnu.andrew at redhat.com (Andrew Hughes) Date: Fri, 31 Jul 2020 20:09:41 +0100 Subject: [RFR] [8u] 8u265-b01 Upstream Sync In-Reply-To: <470179aa-fb00-139d-7e18-11b55a02fa08@redhat.com> References: <20200731151532.GA59309@stopbrexit> <470179aa-fb00-139d-7e18-11b55a02fa08@redhat.com> Message-ID: <20200731190941.GB59309@stopbrexit> On 18:48 Fri 31 Jul , Aleksey Shipilev wrote: > On 7/31/20 5:15 PM, Andrew Hughes wrote: > > Webrevs: https://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/ > > > > Merge changesets: > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/corba/merge.changeset > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jaxp/merge.changeset > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jaxws/merge.changeset > > Look trivially good. > > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/jdk/merge.changeset > > Looks good. > > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/hotspot/merge.changeset > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/langtools/merge.changeset > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/nashorn/merge.changeset > > http://cr.openjdk.java.net/~andrew/shenandoah-8/u265-b01/root/merge.changeset > > Look trivially good. > > > Ok to push? > > Yes. > > -- > Thanks, > -Aleksey > Thanks, pushed. -- Andrew :) Senior Free Java Software Engineer OpenJDK Package Owner Red Hat, Inc. (http://www.redhat.com) PGP Key: ed25519/0xCFDA0F9B35964222 (hkp://keys.gnupg.net) Fingerprint = 5132 579D D154 0ED2 3E04 C5A0 CFDA 0F9B 3596 4222