RFR: 8276901: Implement UseHeavyMonitors consistently [v10]
Roman Kennke
rkennke at openjdk.java.net
Thu Dec 2 14:41:54 UTC 2021
On Thu, 2 Dec 2021 00:55:02 GMT, Martin Doerr <mdoerr at openjdk.org> wrote:
>>> > I believe you still have to change something in sharedRuntime_ppc.cpp, similar to what I did in, e.g., sharedRuntime_aarch64.cpp.
>>>
>>> You mean in `generate_native_wrapper`? I already did. It uses the same assembler function as C2 on PPC64. Did I miss anything else? I think hacking `unlock` is optional. The additional checks don't really disturb.
>>
>> Ah I haven't seen it, sorry.
>> It turns out, I cannot avoid emitting FastLockNode, some backends (x86 and aarch64) also generate fast-path code that deals with ObjectMonitor, and we want this even when running with +UseHeavyMonitors.
>>
>> Can you verify the new testcase, and perhaps some test programs that do some locking with -XX:+UseHeavyMonitors -XX:+VerifyHeavyMonitors ? You also need to include PPC in arguments.cpp and synchronizer.cpp changes to enable that stuff on PPC:
>
>> It turns out, I cannot avoid emitting FastLockNode, some backends (x86 and aarch64) also generate fast-path code that deals with ObjectMonitor, and we want this even when running with +UseHeavyMonitors.
>
> Ok, thanks for checking. You have convinced me that your version is fine. We should do it the same way on PPC64:
>
> diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
> index 98565003691..cb58e775422 100644
> --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
> +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
> @@ -2660,27 +2660,32 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
> andi_(temp, displaced_header, markWord::monitor_value);
> bne(CCR0, object_has_monitor);
>
> - // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
> - ori(displaced_header, displaced_header, markWord::unlocked_value);
> -
> - // Load Compare Value application register.
> -
> - // Initialize the box. (Must happen before we update the object mark!)
> - std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
> -
> - // Must fence, otherwise, preceding store(s) may float below cmpxchg.
> - // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
> - cmpxchgd(/*flag=*/flag,
> - /*current_value=*/current_header,
> - /*compare_value=*/displaced_header,
> - /*exchange_value=*/box,
> - /*where=*/oop,
> - MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
> - MacroAssembler::cmpxchgx_hint_acquire_lock(),
> - noreg,
> - &cas_failed,
> - /*check without membar and ldarx first*/true);
> - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> + if (!UseHeavyMonitors) {
> + // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
> + ori(displaced_header, displaced_header, markWord::unlocked_value);
> +
> + // Load Compare Value application register.
> +
> + // Initialize the box. (Must happen before we update the object mark!)
> + std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
> +
> + // Must fence, otherwise, preceding store(s) may float below cmpxchg.
> + // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
> + cmpxchgd(/*flag=*/flag,
> + /*current_value=*/current_header,
> + /*compare_value=*/displaced_header,
> + /*exchange_value=*/box,
> + /*where=*/oop,
> + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
> + MacroAssembler::cmpxchgx_hint_acquire_lock(),
> + noreg,
> + &cas_failed,
> + /*check without membar and ldarx first*/true);
> + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> + } else {
> + // Set NE to indicate 'failure' -> take slow-path.
> + crandc(flag, Assembler::equal, flag, Assembler::equal);
> + }
>
> // If the compare-and-exchange succeeded, then we found an unlocked
> // object and we have now locked it.
> @@ -2768,12 +2773,14 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
> }
> #endif
>
> - // Find the lock address and load the displaced header from the stack.
> - ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
> + if (!UseHeavyMonitors) {
> + // Find the lock address and load the displaced header from the stack.
> + ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
>
> - // If the displaced header is 0, we have a recursive unlock.
> - cmpdi(flag, displaced_header, 0);
> - beq(flag, cont);
> + // If the displaced header is 0, we have a recursive unlock.
> + cmpdi(flag, displaced_header, 0);
> + beq(flag, cont);
> + }
>
> // Handle existing monitor.
> // The object has an existing monitor iff (mark & monitor_value) != 0.
> @@ -2782,20 +2789,24 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
> andi_(R0, current_header, markWord::monitor_value);
> bne(CCR0, object_has_monitor);
>
> - // Check if it is still a light weight lock, this is is true if we see
> - // the stack address of the basicLock in the markWord of the object.
> - // Cmpxchg sets flag to cmpd(current_header, box).
> - cmpxchgd(/*flag=*/flag,
> - /*current_value=*/current_header,
> - /*compare_value=*/box,
> - /*exchange_value=*/displaced_header,
> - /*where=*/oop,
> - MacroAssembler::MemBarRel,
> - MacroAssembler::cmpxchgx_hint_release_lock(),
> - noreg,
> - &cont);
> -
> - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> + if (!UseHeavyMonitors) {
> + // Check if it is still a light weight lock, this is is true if we see
> + // the stack address of the basicLock in the markWord of the object.
> + // Cmpxchg sets flag to cmpd(current_header, box).
> + cmpxchgd(/*flag=*/flag,
> + /*current_value=*/current_header,
> + /*compare_value=*/box,
> + /*exchange_value=*/displaced_header,
> + /*where=*/oop,
> + MacroAssembler::MemBarRel,
> + MacroAssembler::cmpxchgx_hint_release_lock(),
> + noreg,
> + &cont);
> + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> + } else {
> + // Set NE to indicate 'failure' -> take slow-path.
> + crandc(flag, Assembler::equal, flag, Assembler::equal);
> + }
>
> // Handle existing monitor.
> b(cont);
> diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp
> index 3396adc0799..969c8e82b91 100644
> --- a/src/hotspot/share/runtime/arguments.cpp
> +++ b/src/hotspot/share/runtime/arguments.cpp
> @@ -2021,12 +2021,12 @@ bool Arguments::check_vm_args_consistency() {
> }
> #endif
>
> -#if !defined(X86) && !defined(AARCH64)
> +#if !defined(X86) && !defined(AARCH64) && !defined(PPC64)
> if (UseHeavyMonitors) {
> warning("UseHeavyMonitors is not fully implemented on this architecture");
> }
> #endif
> -#ifdef X86
> +#if defined(X86) || defined(PPC64)
> if (UseHeavyMonitors && UseRTMForStackLocks) {
> fatal("-XX:+UseHeavyMonitors and -XX:+UseRTMForStackLocks are mutually exclusive");
> }
> diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp
> index 4c5ea4a6e40..4f9c7c21a9b 100644
> --- a/src/hotspot/share/runtime/synchronizer.cpp
> +++ b/src/hotspot/share/runtime/synchronizer.cpp
> @@ -418,7 +418,7 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread
> }
>
> static bool useHeavyMonitors() {
> -#if defined(X86) || defined(AARCH64)
> +#if defined(X86) || defined(AARCH64) || defined(PPC64)
> return UseHeavyMonitors;
> #else
> return false;
> diff --git a/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java b/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java
> index cd32e222f68..922b18836dd 100644
> --- a/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java
> +++ b/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java
> @@ -48,7 +48,7 @@
> /*
> * @test
> * @summary Exercise multithreaded maps, using only heavy monitors.
> - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
> + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch == "ppc64" | os.arch == "ppc64le"
> * @library /test/lib
> * @run main/othervm/timeout=1600 -XX:+IgnoreUnrecognizedVMOptions -XX:+UseHeavyMonitors -XX:+VerifyHeavyMonitors MapLoops
> */
>
> Note that this version does no longer require changes in sharedRuntime_ppc because the native wrapper generator uses the same code as C2. The test case has passed.
Thanks, @TheRealMDoerr ! I've added your PPC port to this PR.
-------------
PR: https://git.openjdk.java.net/jdk/pull/6320
More information about the hotspot-compiler-dev
mailing list