performance surprise with Object.hashCode()

Aleksey Shipilev aleksey.shipilev at oracle.com
Mon May 13 11:35:06 PDT 2013


On 05/13/2013 09:29 PM, Andy Nuss wrote:
> Here's my benchmarking code.  I don't see at all why a virtual call to
> Integer.hashCode is so much slower than the base line loop skeleton +
> inline access to member value of Integer (3rd case) + 0.5 nanos of
> vtable call (measured in a separate benchmark).
> 
> To me, this indicates a serious design flaw in so-called intrinsic call
> on modern fast machines that could be corrected (were it possible) by
> making hashCode() a normal non-native function that forwards to
> System.identityHashCode().

Hm, it does not look the microbenchmark I would trust:
  * Warmups?
  * Forks?
  * Statistics?

This is what I can do with JMH:
 http://openjdk.java.net/projects/code-tools/jmh/

@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class AndyBench {

    private Object o_o = new Object();
    private Object o_i = new Integer(42);
    private Integer i_i = new Integer(42);

    @GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
    public int eee() {
        return 42;
    }

    @GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
    public int o_o() {
        return o_o.hashCode();
    }

    @GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
    public int o_i() {
        return o_i.hashCode();
    }

    @GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
    public int i_i() {
        return i_i.hashCode();
    }
}

On my Linux x86_64:

$ ~/Install/jdk8b88/bin/java  -jar target/microbenchmarks.jar
".*AndyBench.*" -wi 1 -r 1 -i 5
...
  o.s.g.a.AndyBench.eee:  0.997 +- 0.020 nsec/op
  o.s.g.a.AndyBench.i_i:  1.288 +- 0.035 nsec/op
  o.s.g.a.AndyBench.o_o:  2.709 +- 0.129 nsec/op
  o.s.g.a.AndyBench.o_i:  4.925 +- 0.098 nsec/op

Looking into the assembly, this is what *admittedly* happens:
  * i_i case has the typecheck against Integer, and then does the
inlined call to Integer.hashCode()
  * o_o case has the typecheck against Object (AFAIU, this is how
inlined intrinsic in C2 works), passes the guard, and does the inlined
call to Object.hashCode()
  * o_i case has the typecheck against Object, but FAILS it, and we go
through the slow path to real virtual call for Integer.hashCode().

Note that the code generated in o_o and o_i cases are structurally
indistinguishable, but o_i naturally goes through the slow path. I
wonder why we are losing the information about receiver type being the
integer in o_i case, and skip the proper devirtualization...

-Aleksey.

[1] i_i case, hot loop:
  0x00007f683d065ad3: mov    0x14(%r11),%r10d   ;*getfield i_i
  0x00007f683d065ad7: mov    0xc(%r12,%r10,8),%r10d  ;*getfield value
  0x00007f683d065adc: mov    0xc(%rcx),%r9d     ;*getfield sink
  0x00007f683d065ae0: mov    0x8c(%r12,%r9,8),%r8d  ;*getfield i1
  0x00007f683d065ae8: mov    0xc(%rcx),%edi     ;*getfield sink
  0x00007f683d065aeb: mov    0xc0(%r12,%rdi,8),%r9d  ;*getfield i2
  0x00007f683d065af3: cmp    %r9d,%r10d
  0x00007f683d065af6: je     0x00007f683d065b8b  ;*ifeq
  0x00007f683d065afc: movzbl 0x8c(%rbx),%r8d    ;*getfield isDone
  0x00007f683d065b04: add    $0x1,%rbp          ;
  0x00007f683d065b08: test   %eax,0xb9774f2(%rip)
  0x00007f683d065b0e: test   %r8d,%r8d
  0x00007f683d065b11: je     0x00007f683d065ad3  ;*ifeq

[2] o_o case, hot loop:
  0x00007f683d074180: mov    0xc(%r8),%r10d     ;*getfield sink
  0x00007f683d074184: mov    0x8c(%r12,%r10,8),%ecx  ;*getfield i1
  0x00007f683d07418c: mov    0xc(%r8),%r10d     ;*getfield sink
  0x00007f683d074190: mov    0xc0(%r12,%r10,8),%r11d  ;*getfield i2
  0x00007f683d074198: cmp    %r11d,%eax
  0x00007f683d07419b: je     0x00007f683d074210  ;*ifeq
  0x00007f683d07419d: mov    0x18(%rsp),%r11d
  0x00007f683d0741a2: movzbl 0x8c(%r12,%r11,8),%r10d  ;*getfield isDone
  0x00007f683d0741ab: add    $0x1,%r9           ; OopMap{r8=Oop rbx=Oop
rbp=Oop [24]=NarrowOop off=335}
  0x00007f683d0741af: test   %eax,0xb968e4b(%rip)        #
0x00007f68489dd000
  0x00007f683d0741b5: test   %r10d,%r10d
  0x00007f683d0741b8: jne    0x00007f683d074231  ;*aload_3
  0x00007f683d0741ba: mov    0xc(%rbp),%r10d    ;*getfield o_o
  0x00007f683d0741be: mov    0x8(%r12,%r10,8),%r11d  ;*invokevirtual
hashCode
  0x00007f683d0741c3: mov    0x1e8(%r12,%r11,8),%r11
  0x00007f683d0741cb: lea    (%r12,%r10,8),%rsi  ;*getfield o_o
  // long and boring Object.hashCode fastpath follows:
  0x00007f683d0741cf: mov    $0x7f68453ae6d0,%r10  ;   {metadata({method}
 {0x00007f68453ae6d0} 'hashCode' '()I' in 'java/lang/Object')}
  0x00007f683d0741d9: cmp    %r10,%r11
  0x00007f683d0741dc: jne    0x00007f683d07414f
  0x00007f683d0741e2: mov    (%rsi),%r10
  0x00007f683d0741e5: mov    %r10,%r11
  0x00007f683d0741e8: and    $0x7,%r11
  0x00007f683d0741ec: cmp    $0x1,%r11
  0x00007f683d0741f0: jne    0x00007f683d07414f
  0x00007f683d0741f6: shr    $0x8,%r10
  0x00007f683d0741fa: mov    %r10d,%eax
  0x00007f683d0741fd: and    $0x7fffffff,%eax
  0x00007f683d074203: test   %eax,%eax
  0x00007f683d074205: je     0x00007f683d07414f  ;*invokevirtual hashCode
  0x00007f683d07420b: jmpq   0x00007f683d074180

[3] o_i case, hot loop:
  0x00007f683d070c00: mov    0xc(%r8),%r10d     ;*getfield sink
  0x00007f683d070c04: mov    0x8c(%r12,%r10,8),%ecx  ;*getfield i1
  0x00007f683d070c0c: mov    0xc(%r8),%r10d     ;*getfield sink
  0x00007f683d070c10: mov    0xc0(%r12,%r10,8),%r11d  ;*getfield i2
  0x00007f683d070c18: cmp    %r11d,%eax
  0x00007f683d070c1b: je     0x00007f683d070c90  ;*ifeq
  0x00007f683d070c1d: mov    0x18(%rsp),%r11d
  0x00007f683d070c22: movzbl 0x8c(%r12,%r11,8),%r10d  ;*getfield isDone
  0x00007f683d070c2b: add    $0x1,%r9           ; OopMap{r8=Oop rbx=Oop
rbp=Oop [24]=NarrowOop off=335}
  0x00007f683d070c2f: test   %eax,0xb96c3cb(%rip)        #
0x00007f68489dd000
  0x00007f683d070c35: test   %r10d,%r10d
  0x00007f683d070c38: jne    0x00007f683d070cb1  ;*aload_3
  0x00007f683d070c3a: mov    0x10(%rbp),%r10d   ;*getfield o_i
  0x00007f683d070c3e: mov    0x8(%r12,%r10,8),%r11d  ;*invokevirtual
hashCode
  0x00007f683d070c43: mov    0x1e8(%r12,%r11,8),%r11
  0x00007f683d070c4b: lea    (%r12,%r10,8),%rsi  ;*getfield o_i
  // long and boring Object.hashCode fastpath follows:
  0x00007f683d070c4f: mov    $0x7f68453ae6d0,%r10  ;   {metadata({method}
 {0x00007f68453ae6d0} 'hashCode' '()I' in 'java/lang/Object')}
  0x00007f683d070c59: cmp    %r10,%r11
  0x00007f683d070c5c: jne    0x00007f683d070bcf
  0x00007f683d070c62: mov    (%rsi),%r10
  0x00007f683d070c65: mov    %r10,%r11
  0x00007f683d070c68: and    $0x7,%r11
  0x00007f683d070c6c: cmp    $0x1,%r11
  0x00007f683d070c70: jne    0x00007f683d070bcf
  0x00007f683d070c76: shr    $0x8,%r10
  0x00007f683d070c7a: mov    %r10d,%eax
  0x00007f683d070c7d: and    $0x7fffffff,%eax
  0x00007f683d070c83: test   %eax,%eax
  0x00007f683d070c85: je     0x00007f683d070bcf  ;*invokevirtual hashCode
  0x00007f683d070c8b: jmpq   0x00007f683d070c00



More information about the hotspot-compiler-dev mailing list