RFR: 8276901: Implement UseHeavyMonitors consistently [v10]

Roman Kennke rkennke at openjdk.java.net
Thu Dec 2 14:41:54 UTC 2021


On Thu, 2 Dec 2021 00:55:02 GMT, Martin Doerr <mdoerr at openjdk.org> wrote:

>>> > I believe you still have to change something in sharedRuntime_ppc.cpp, similar to what I did in, e.g., sharedRuntime_aarch64.cpp.
>>> 
>>> You mean in `generate_native_wrapper`? I already did. It uses the same assembler function as C2 on PPC64. Did I miss anything else? I think hacking `unlock` is optional. The additional checks don't really disturb.
>> 
>> Ah I haven't seen it, sorry.
>> It turns out, I cannot avoid emitting FastLockNode, some backends (x86 and aarch64) also generate fast-path code that deals with ObjectMonitor, and we want this even when running with +UseHeavyMonitors.
>> 
>> Can you verify the new testcase, and perhaps some test programs that do some locking with -XX:+UseHeavyMonitors -XX:+VerifyHeavyMonitors ? You also need to include PPC in arguments.cpp and synchronizer.cpp changes to enable that stuff on PPC:
>
>> It turns out, I cannot avoid emitting FastLockNode, some backends (x86 and aarch64) also generate fast-path code that deals with ObjectMonitor, and we want this even when running with +UseHeavyMonitors.
> 
> Ok, thanks for checking. You have convinced me that your version is fine. We should do it the same way on PPC64:
> 
> diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
> index 98565003691..cb58e775422 100644
> --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
> +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
> @@ -2660,27 +2660,32 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
>    andi_(temp, displaced_header, markWord::monitor_value);
>    bne(CCR0, object_has_monitor);
>  
> -  // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
> -  ori(displaced_header, displaced_header, markWord::unlocked_value);
> -
> -  // Load Compare Value application register.
> -
> -  // Initialize the box. (Must happen before we update the object mark!)
> -  std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
> -
> -  // Must fence, otherwise, preceding store(s) may float below cmpxchg.
> -  // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
> -  cmpxchgd(/*flag=*/flag,
> -           /*current_value=*/current_header,
> -           /*compare_value=*/displaced_header,
> -           /*exchange_value=*/box,
> -           /*where=*/oop,
> -           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
> -           MacroAssembler::cmpxchgx_hint_acquire_lock(),
> -           noreg,
> -           &cas_failed,
> -           /*check without membar and ldarx first*/true);
> -  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> +  if (!UseHeavyMonitors) {
> +    // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
> +    ori(displaced_header, displaced_header, markWord::unlocked_value);
> +
> +    // Load Compare Value application register.
> +
> +    // Initialize the box. (Must happen before we update the object mark!)
> +    std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
> +
> +    // Must fence, otherwise, preceding store(s) may float below cmpxchg.
> +    // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
> +    cmpxchgd(/*flag=*/flag,
> +             /*current_value=*/current_header,
> +             /*compare_value=*/displaced_header,
> +             /*exchange_value=*/box,
> +             /*where=*/oop,
> +             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
> +             MacroAssembler::cmpxchgx_hint_acquire_lock(),
> +             noreg,
> +             &cas_failed,
> +             /*check without membar and ldarx first*/true);
> +    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> +  } else {
> +    // Set NE to indicate 'failure' -> take slow-path.
> +    crandc(flag, Assembler::equal, flag, Assembler::equal);
> +  }
>  
>    // If the compare-and-exchange succeeded, then we found an unlocked
>    // object and we have now locked it.
> @@ -2768,12 +2773,14 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
>    }
>  #endif
>  
> -  // Find the lock address and load the displaced header from the stack.
> -  ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
> +  if (!UseHeavyMonitors) {
> +    // Find the lock address and load the displaced header from the stack.
> +    ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
>  
> -  // If the displaced header is 0, we have a recursive unlock.
> -  cmpdi(flag, displaced_header, 0);
> -  beq(flag, cont);
> +    // If the displaced header is 0, we have a recursive unlock.
> +    cmpdi(flag, displaced_header, 0);
> +    beq(flag, cont);
> +  }
>  
>    // Handle existing monitor.
>    // The object has an existing monitor iff (mark & monitor_value) != 0.
> @@ -2782,20 +2789,24 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
>    andi_(R0, current_header, markWord::monitor_value);
>    bne(CCR0, object_has_monitor);
>  
> -  // Check if it is still a light weight lock, this is is true if we see
> -  // the stack address of the basicLock in the markWord of the object.
> -  // Cmpxchg sets flag to cmpd(current_header, box).
> -  cmpxchgd(/*flag=*/flag,
> -           /*current_value=*/current_header,
> -           /*compare_value=*/box,
> -           /*exchange_value=*/displaced_header,
> -           /*where=*/oop,
> -           MacroAssembler::MemBarRel,
> -           MacroAssembler::cmpxchgx_hint_release_lock(),
> -           noreg,
> -           &cont);
> -
> -  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> +  if (!UseHeavyMonitors) {
> +    // Check if it is still a light weight lock, this is is true if we see
> +    // the stack address of the basicLock in the markWord of the object.
> +    // Cmpxchg sets flag to cmpd(current_header, box).
> +    cmpxchgd(/*flag=*/flag,
> +             /*current_value=*/current_header,
> +             /*compare_value=*/box,
> +             /*exchange_value=*/displaced_header,
> +             /*where=*/oop,
> +             MacroAssembler::MemBarRel,
> +             MacroAssembler::cmpxchgx_hint_release_lock(),
> +             noreg,
> +             &cont);
> +    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
> +  } else {
> +    // Set NE to indicate 'failure' -> take slow-path.
> +    crandc(flag, Assembler::equal, flag, Assembler::equal);
> +  }
>  
>    // Handle existing monitor.
>    b(cont);
> diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp
> index 3396adc0799..969c8e82b91 100644
> --- a/src/hotspot/share/runtime/arguments.cpp
> +++ b/src/hotspot/share/runtime/arguments.cpp
> @@ -2021,12 +2021,12 @@ bool Arguments::check_vm_args_consistency() {
>    }
>  #endif
>  
> -#if !defined(X86) && !defined(AARCH64)
> +#if !defined(X86) && !defined(AARCH64) && !defined(PPC64)
>    if (UseHeavyMonitors) {
>      warning("UseHeavyMonitors is not fully implemented on this architecture");
>    }
>  #endif
> -#ifdef X86
> +#if defined(X86) || defined(PPC64)
>    if (UseHeavyMonitors && UseRTMForStackLocks) {
>      fatal("-XX:+UseHeavyMonitors and -XX:+UseRTMForStackLocks are mutually exclusive");
>    }
> diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp
> index 4c5ea4a6e40..4f9c7c21a9b 100644
> --- a/src/hotspot/share/runtime/synchronizer.cpp
> +++ b/src/hotspot/share/runtime/synchronizer.cpp
> @@ -418,7 +418,7 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread
>  }
>  
>  static bool useHeavyMonitors() {
> -#if defined(X86) || defined(AARCH64)
> +#if defined(X86) || defined(AARCH64) || defined(PPC64)
>    return UseHeavyMonitors;
>  #else
>    return false;
> diff --git a/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java b/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java
> index cd32e222f68..922b18836dd 100644
> --- a/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java
> +++ b/test/jdk/java/util/concurrent/ConcurrentHashMap/MapLoops.java
> @@ -48,7 +48,7 @@
>  /*
>   * @test
>   * @summary Exercise multithreaded maps, using only heavy monitors.
> - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
> + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch == "ppc64" | os.arch == "ppc64le"
>   * @library /test/lib
>   * @run main/othervm/timeout=1600 -XX:+IgnoreUnrecognizedVMOptions -XX:+UseHeavyMonitors -XX:+VerifyHeavyMonitors MapLoops
>   */
> 
> Note that this version does no longer require changes in sharedRuntime_ppc because the native wrapper generator uses the same code as C2. The test case has passed.

Thanks, @TheRealMDoerr ! I've added your PPC port to this PR.

-------------

PR: https://git.openjdk.java.net/jdk/pull/6320


More information about the hotspot-compiler-dev mailing list