RFR: 8315884: New Object to ObjectMonitor mapping [v9]

Roman Kennke rkennke at openjdk.org
Tue Aug 13 16:05:57 UTC 2024


On Tue, 13 Aug 2024 12:57:23 GMT, Axel Boldt-Christmas <aboldtch at openjdk.org> wrote:

>> src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp line 674:
>> 
>>> 672: 
>>> 673:       // Search for obj in cache.
>>> 674:       bind(loop);
>> 
>> Same loop transformation would be possible here.
>
> I tried the following (see diff below) and it shows about a 5-10% regression in most the `LockUnlock.testInflated*` micros. Also tried with just `num_unrolled = 1` saw the same regression.  Maybe there was some other pattern you were thinking of. There are probably architecture and platform differences. This can and should probably be explored in a followup PR.
> 
> 
> 
> diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
> index 5dbfdbc225d..4e6621cfece 100644
> --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
> +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
> @@ -663,25 +663,28 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
>  
>        const int num_unrolled = 2;
>        for (int i = 0; i < num_unrolled; i++) {
> -        cmpptr(obj, Address(t));
> -        jccb(Assembler::equal, monitor_found);
> -        increment(t, in_bytes(OMCache::oop_to_oop_difference()));
> +        Label next;
> +        cmpptr(obj, Address(t, OMCache::oop_to_oop_difference() * i));
> +        jccb(Assembler::notEqual, next);
> +        increment(t, in_bytes(OMCache::oop_to_oop_difference() * i));
> +        jmpb(monitor_found);
> +        bind(next);
>        }
> +      increment(t, in_bytes(OMCache::oop_to_oop_difference() * (num_unrolled - 1)));
>  
>        Label loop;
>  
>        // Search for obj in cache.
>        bind(loop);
> -
> -      // Check for match.
> -      cmpptr(obj, Address(t));
> -      jccb(Assembler::equal, monitor_found);
> -
> +      // Advance.
> +      increment(t, in_bytes(OMCache::oop_to_oop_difference()));
>        // Search until null encountered, guaranteed _null_sentinel at end.
>        cmpptr(Address(t), 1);
>        jcc(Assembler::below, slow_path); // 0 check, but with ZF=0 when *t == 0
> -      increment(t, in_bytes(OMCache::oop_to_oop_difference()));
> -      jmpb(loop);
> +
> +      // Check for match.
> +      cmpptr(obj, Address(t));
> +      jccb(Assembler::notEqual, loop);
>  
>        // Cache hit.
>        bind(monitor_found);

Yeah it's probably not very important. But it's not quite what I had in mind, I was thinking more something like (aarch64 version, untested, may be wrong):


diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 19af03d3488..05bbb5760b8 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -302,14 +302,14 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
       Label monitor_found;

       // Load cache address
-      lea(t3_t, Address(rthread, JavaThread::om_cache_oops_offset()));
+      lea(t3_t, Address(rthread, JavaThread::om_cache_oops_offset() - OMCache::oop_to_oop_difference()));

       const int num_unrolled = 2;
       for (int i = 0; i < num_unrolled; i++) {
+        increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
         ldr(t1, Address(t3_t));
         cmp(obj, t1);
         br(Assembler::EQ, monitor_found);
-        increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
       }

       Label loop;
@@ -317,16 +317,14 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
       // Search for obj in cache.
       bind(loop);

-      // Check for match.
-      ldr(t1, Address(t3_t));
-      cmp(obj, t1);
-      br(Assembler::EQ, monitor_found);
+      increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));

+      ldr(t1, Address(t3_t));
       // Search until null encountered, guaranteed _null_sentinel at end.
-      increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
-      cbnz(t1, loop);
-      // Cache Miss, NE set from cmp above, cbnz does not set flags
-      b(slow_path);
+      cbz(t1, slow_path);
+      // Check for match.
+      cmp(obj, t1);
+      br(Assembler::NE, loop);

       bind(monitor_found);

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/20067#discussion_r1715557161


More information about the core-libs-dev mailing list