performance surprise with Object.hashCode()
Aleksey Shipilev
aleksey.shipilev at oracle.com
Mon May 13 11:35:06 PDT 2013
On 05/13/2013 09:29 PM, Andy Nuss wrote:
> Here's my benchmarking code. I don't see at all why a virtual call to
> Integer.hashCode is so much slower than the base line loop skeleton +
> inline access to member value of Integer (3rd case) + 0.5 nanos of
> vtable call (measured in a separate benchmark).
>
> To me, this indicates a serious design flaw in so-called intrinsic call
> on modern fast machines that could be corrected (were it possible) by
> making hashCode() a normal non-native function that forwards to
> System.identityHashCode().
Hm, it does not look the microbenchmark I would trust:
* Warmups?
* Forks?
* Statistics?
This is what I can do with JMH:
http://openjdk.java.net/projects/code-tools/jmh/
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class AndyBench {
private Object o_o = new Object();
private Object o_i = new Integer(42);
private Integer i_i = new Integer(42);
@GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
public int eee() {
return 42;
}
@GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
public int o_o() {
return o_o.hashCode();
}
@GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
public int o_i() {
return o_i.hashCode();
}
@GenerateMicroBenchmark(BenchmarkType.AverageTimePerOp)
public int i_i() {
return i_i.hashCode();
}
}
On my Linux x86_64:
$ ~/Install/jdk8b88/bin/java -jar target/microbenchmarks.jar
".*AndyBench.*" -wi 1 -r 1 -i 5
...
o.s.g.a.AndyBench.eee: 0.997 +- 0.020 nsec/op
o.s.g.a.AndyBench.i_i: 1.288 +- 0.035 nsec/op
o.s.g.a.AndyBench.o_o: 2.709 +- 0.129 nsec/op
o.s.g.a.AndyBench.o_i: 4.925 +- 0.098 nsec/op
Looking into the assembly, this is what *admittedly* happens:
* i_i case has the typecheck against Integer, and then does the
inlined call to Integer.hashCode()
* o_o case has the typecheck against Object (AFAIU, this is how
inlined intrinsic in C2 works), passes the guard, and does the inlined
call to Object.hashCode()
* o_i case has the typecheck against Object, but FAILS it, and we go
through the slow path to real virtual call for Integer.hashCode().
Note that the code generated in o_o and o_i cases are structurally
indistinguishable, but o_i naturally goes through the slow path. I
wonder why we are losing the information about receiver type being the
integer in o_i case, and skip the proper devirtualization...
-Aleksey.
[1] i_i case, hot loop:
0x00007f683d065ad3: mov 0x14(%r11),%r10d ;*getfield i_i
0x00007f683d065ad7: mov 0xc(%r12,%r10,8),%r10d ;*getfield value
0x00007f683d065adc: mov 0xc(%rcx),%r9d ;*getfield sink
0x00007f683d065ae0: mov 0x8c(%r12,%r9,8),%r8d ;*getfield i1
0x00007f683d065ae8: mov 0xc(%rcx),%edi ;*getfield sink
0x00007f683d065aeb: mov 0xc0(%r12,%rdi,8),%r9d ;*getfield i2
0x00007f683d065af3: cmp %r9d,%r10d
0x00007f683d065af6: je 0x00007f683d065b8b ;*ifeq
0x00007f683d065afc: movzbl 0x8c(%rbx),%r8d ;*getfield isDone
0x00007f683d065b04: add $0x1,%rbp ;
0x00007f683d065b08: test %eax,0xb9774f2(%rip)
0x00007f683d065b0e: test %r8d,%r8d
0x00007f683d065b11: je 0x00007f683d065ad3 ;*ifeq
[2] o_o case, hot loop:
0x00007f683d074180: mov 0xc(%r8),%r10d ;*getfield sink
0x00007f683d074184: mov 0x8c(%r12,%r10,8),%ecx ;*getfield i1
0x00007f683d07418c: mov 0xc(%r8),%r10d ;*getfield sink
0x00007f683d074190: mov 0xc0(%r12,%r10,8),%r11d ;*getfield i2
0x00007f683d074198: cmp %r11d,%eax
0x00007f683d07419b: je 0x00007f683d074210 ;*ifeq
0x00007f683d07419d: mov 0x18(%rsp),%r11d
0x00007f683d0741a2: movzbl 0x8c(%r12,%r11,8),%r10d ;*getfield isDone
0x00007f683d0741ab: add $0x1,%r9 ; OopMap{r8=Oop rbx=Oop
rbp=Oop [24]=NarrowOop off=335}
0x00007f683d0741af: test %eax,0xb968e4b(%rip) #
0x00007f68489dd000
0x00007f683d0741b5: test %r10d,%r10d
0x00007f683d0741b8: jne 0x00007f683d074231 ;*aload_3
0x00007f683d0741ba: mov 0xc(%rbp),%r10d ;*getfield o_o
0x00007f683d0741be: mov 0x8(%r12,%r10,8),%r11d ;*invokevirtual
hashCode
0x00007f683d0741c3: mov 0x1e8(%r12,%r11,8),%r11
0x00007f683d0741cb: lea (%r12,%r10,8),%rsi ;*getfield o_o
// long and boring Object.hashCode fastpath follows:
0x00007f683d0741cf: mov $0x7f68453ae6d0,%r10 ; {metadata({method}
{0x00007f68453ae6d0} 'hashCode' '()I' in 'java/lang/Object')}
0x00007f683d0741d9: cmp %r10,%r11
0x00007f683d0741dc: jne 0x00007f683d07414f
0x00007f683d0741e2: mov (%rsi),%r10
0x00007f683d0741e5: mov %r10,%r11
0x00007f683d0741e8: and $0x7,%r11
0x00007f683d0741ec: cmp $0x1,%r11
0x00007f683d0741f0: jne 0x00007f683d07414f
0x00007f683d0741f6: shr $0x8,%r10
0x00007f683d0741fa: mov %r10d,%eax
0x00007f683d0741fd: and $0x7fffffff,%eax
0x00007f683d074203: test %eax,%eax
0x00007f683d074205: je 0x00007f683d07414f ;*invokevirtual hashCode
0x00007f683d07420b: jmpq 0x00007f683d074180
[3] o_i case, hot loop:
0x00007f683d070c00: mov 0xc(%r8),%r10d ;*getfield sink
0x00007f683d070c04: mov 0x8c(%r12,%r10,8),%ecx ;*getfield i1
0x00007f683d070c0c: mov 0xc(%r8),%r10d ;*getfield sink
0x00007f683d070c10: mov 0xc0(%r12,%r10,8),%r11d ;*getfield i2
0x00007f683d070c18: cmp %r11d,%eax
0x00007f683d070c1b: je 0x00007f683d070c90 ;*ifeq
0x00007f683d070c1d: mov 0x18(%rsp),%r11d
0x00007f683d070c22: movzbl 0x8c(%r12,%r11,8),%r10d ;*getfield isDone
0x00007f683d070c2b: add $0x1,%r9 ; OopMap{r8=Oop rbx=Oop
rbp=Oop [24]=NarrowOop off=335}
0x00007f683d070c2f: test %eax,0xb96c3cb(%rip) #
0x00007f68489dd000
0x00007f683d070c35: test %r10d,%r10d
0x00007f683d070c38: jne 0x00007f683d070cb1 ;*aload_3
0x00007f683d070c3a: mov 0x10(%rbp),%r10d ;*getfield o_i
0x00007f683d070c3e: mov 0x8(%r12,%r10,8),%r11d ;*invokevirtual
hashCode
0x00007f683d070c43: mov 0x1e8(%r12,%r11,8),%r11
0x00007f683d070c4b: lea (%r12,%r10,8),%rsi ;*getfield o_i
// long and boring Object.hashCode fastpath follows:
0x00007f683d070c4f: mov $0x7f68453ae6d0,%r10 ; {metadata({method}
{0x00007f68453ae6d0} 'hashCode' '()I' in 'java/lang/Object')}
0x00007f683d070c59: cmp %r10,%r11
0x00007f683d070c5c: jne 0x00007f683d070bcf
0x00007f683d070c62: mov (%rsi),%r10
0x00007f683d070c65: mov %r10,%r11
0x00007f683d070c68: and $0x7,%r11
0x00007f683d070c6c: cmp $0x1,%r11
0x00007f683d070c70: jne 0x00007f683d070bcf
0x00007f683d070c76: shr $0x8,%r10
0x00007f683d070c7a: mov %r10d,%eax
0x00007f683d070c7d: and $0x7fffffff,%eax
0x00007f683d070c83: test %eax,%eax
0x00007f683d070c85: je 0x00007f683d070bcf ;*invokevirtual hashCode
0x00007f683d070c8b: jmpq 0x00007f683d070c00
More information about the hotspot-compiler-dev
mailing list