XCHG is slow ?
Vladimir Kozlov
vladimir.kozlov at oracle.com
Tue Jul 3 18:30:32 PDT 2012
I agree with you that it is strange. I looked on code and, as you said, one with
overflow detection executes a lot more instructions and stack operations. What
is time difference? I ran latest 7u6 (linux64) promoted build and got this numbers:
$ bin/jruby -X+C bench/bench_fib_recursive.rb
0.170000 0.070000 0.240000 ( 0.100000)
0.030000 0.020000 0.050000 ( 0.036000)
0.040000 0.020000 0.060000 ( 0.036000)
0.040000 0.030000 0.070000 ( 0.036000)
0.040000 0.020000 0.060000 ( 0.036000)
Vladimir
Rémi Forax wrote:
> On 07/04/2012 01:44 AM, Vladimir Kozlov wrote:
>> Rémi,
>>
>> NOP and xchg %ax,%ax are and always were the same instruction and have
>> the same encoding 0x90. What you see is multibytes nop (2 bytes in
>> your case) to align following call instruction: 0x66 0x90.
>>
>> Vladimir
>
> I see and I suppose that a multibytes nop is not slower than a single
> byte nop.
>
> So to explain my problem clearly, I've two codes the first one is
> fibonacci (ClassicFibo)
> with an int, the second one is fibonacci with an int plus the overflow
> detection (FiboSample).
>
> My problem is that the code with the overflow detection is faster the code
> without the overflow detection :)
>
> Rémi
>
> # {method} 'fibo' '(I)I' in 'ClassicFibo'
> # parm0: rsi = int
> # [sp+0x30] (sp of caller)
> 0x00007fdf91061620: mov %eax,-0x14000(%rsp) ;...89842400 c0feff
> 0x00007fdf91061627: push %rbp ;...55
> 0x00007fdf91061628: sub $0x20,%rsp ;...4883ec20
> ;*synchronization entry
> ; - ClassicFibo::fibo at -1
> (line 4)
> 0x00007fdf9106162c: mov %esi,%ebp ;...8bee
> 0x00007fdf9106162e: cmp $0x2,%esi ;...83fe02
> 0x00007fdf91061631: jl 0x00007fdf91061651 ;...7c1e
> ;*if_icmpge
> ; - ClassicFibo::fibo at 2
> (line 4)
> 0x00007fdf91061633: dec %esi ;...ffce
> ;*isub
> ; - ClassicFibo::fibo at 9
> (line 7)
> 0x00007fdf91061635: xchg %ax,%ax ;...6690
> 0x00007fdf91061637: callq 0x00007fdf91038060 ;...e8246afd ff
> ; OopMap{off=28}
> ;*invokestatic fibo
> ; - ClassicFibo::fibo at 10
> (line 7)
> ; {static_call}
> 0x00007fdf9106163c: mov %eax,(%rsp) ;...890424
> 0x00007fdf9106163f: mov %ebp,%esi ;...8bf5
> 0x00007fdf91061641: add $0xfffffffffffffffe,%esi ;...83c6fe
> ;*isub
> ; - ClassicFibo::fibo at 15
> (line 7)
> 0x00007fdf91061644: xchg %ax,%ax ;...666690
> 0x00007fdf91061647: callq 0x00007fdf91038060 ;...e8146afd ff
> ; OopMap{off=44}
> ;*invokestatic fibo
> ; - ClassicFibo::fibo at 16
> (line 7)
> ; {static_call}
> 0x00007fdf9106164c: add (%rsp),%eax ;...030424
> ;*iadd
> ; - ClassicFibo::fibo at 19
> (line 7)
> 0x00007fdf9106164f: jmp 0x00007fdf91061656 ;...eb05
> 0x00007fdf91061651: mov $0x1,%eax ;...b8010000 00
> 0x00007fdf91061656: add $0x20,%rsp ;...4883c420
> 0x00007fdf9106165a: pop %rbp ;...5d
> 0x00007fdf9106165b: test %eax,0x91c899f(%rip) #
> 0x00007fdf9a22a000
> ;...85059f89 1c09
> ; {poll_return}
> 0x00007fdf91061661: retq ;...c3
> ;*invokestatic fibo
> ; - ClassicFibo::fibo at 10
> (line 7)
> 0x00007fdf91061662: mov %rax,%rsi ;...488bf0
> 0x00007fdf91061665: jmp 0x00007fdf9106166a ;...eb03
> 0x00007fdf91061667: mov %rax,%rsi ;...488bf0
> ;*invokestatic fibo
> ; - ClassicFibo::fibo at 16
> (line 7)
> 0x00007fdf9106166a: add $0x20,%rsp ;...4883c420
> 0x00007fdf9106166e: pop %rbp ;...5d
> 0x00007fdf9106166f: jmpq 0x00007fdf91061220 ;...e9acfbff ff
> ; {runtime_call}
> 0x00007fdf91061674: hlt ;...f4
> 0x00007fdf91061675: hlt ;...f4
> 0x00007fdf91061676: hlt ;...f4
> 0x00007fdf91061677: hlt ;...f4
> 0x00007fdf91061678: hlt ;...f4
> 0x00007fdf91061679: hlt ;...f4
> 0x00007fdf9106167a: hlt ;...f4
> 0x00007fdf9106167b: hlt ;...f4
> 0x00007fdf9106167c: hlt ;...f4
> 0x00007fdf9106167d: hlt ;...f4
> 0x00007fdf9106167e: hlt ;...f4
> 0x00007fdf9106167f: hlt ;...f4
> [Stub Code]
> 0x00007fdf91061680: mov $0x0,%rbx ;...48bb0000 000000
> ;...000000
> ; {no_reloc}
> 0x00007fdf9106168a: jmpq 0x00007fdf9106168a ;...e9fbffff ff
> ; {runtime_call}
> 0x00007fdf9106168f: mov $0x0,%rbx ;...48bb0000 000000
> ;...000000
> ; {static_stub}
> 0x00007fdf91061699: jmpq 0x00007fdf91061699 ;...e9fbffff ff
> ; {runtime_call}
> [Exception Handler]
> 0x00007fdf9106169e: jmpq 0x00007fdf9105e4a0 ;...e9fdcdff ff
> ; {runtime_call}
> [Deopt Handler Code]
> 0x00007fdf910616a3: callq 0x00007fdf910616a8 ;...e8000000 00
> 0x00007fdf910616a8: subq $0x5,(%rsp) ;...48832c24 05
> 0x00007fdf910616ad: jmpq 0x00007fdf91038c00 ;...e94e75fd ff
> ; {runtime_call}
> 0x00007fdf910616b2: hlt ;...f4
> 0x00007fdf910616b3: hlt ;...f4
> 0x00007fdf910616b4: hlt ;...f4
> 0x00007fdf910616b5: hlt ;...f4
> 0x00007fdf910616b6: hlt ;...f4
> 0x00007fdf910616b7: hlt ;...f4
>
>
> # {method} 'fibo' '(I)I' in 'FiboSample'
> # parm0: rsi = int
> # [sp+0x30] (sp of caller)
> 0x00007faadd05fd40: mov %eax,-0x14000(%rsp) ;...89842400 c0feff
> 0x00007faadd05fd47: push %rbp ;...55
> 0x00007faadd05fd48: sub $0x20,%rsp ;...4883ec20
> ;*synchronization entry
> ; - FiboSample::fibo at -1
> 0x00007faadd05fd4c: mov %esi,(%rsp) ;...893424
> 0x00007faadd05fd4f: cmp $0x2,%esi ;...83fe02
> 0x00007faadd05fd52: jl 0x00007faadd05fda1 ;...7c4d
> ;*if_icmpge
> ; - FiboSample::fibo at 2
> 0x00007faadd05fd54: dec %esi ;...ffce
> ;*isub
> ; - FiboSample::fibo at 9
> 0x00007faadd05fd56: nop ;...90
> 0x00007faadd05fd57: callq 0x00007faadd038060 ;...e80483fd ff
> ; OopMap{off=28}
> ;*invokestatic fibo
> ; - FiboSample::fibo at 10
> ; {static_call}
> 0x00007faadd05fd5c: mov %eax,0x4(%rsp) ;...89442404
> ;*goto
> ; - FiboSample::fibo at 61
> 0x00007faadd05fd60: mov (%rsp),%esi ;...8b3424
> 0x00007faadd05fd63: add $0xfffffffffffffffe,%esi ;...83c6fe
> ;*isub
> ; - FiboSample::fibo at 18
> 0x00007faadd05fd66: nop ;...90
> 0x00007faadd05fd67: callq 0x00007faadd038060 ;...e8f482fd ff
> ; OopMap{off=44}
> ;*invokestatic fibo
> ; - FiboSample::fibo at 19
> ; {static_call}
> 0x00007faadd05fd6c: mov %eax,%r9d ;...448bc8
> 0x00007faadd05fd6f: mov 0x4(%rsp),%eax ;...8b442404
> 0x00007faadd05fd73: add %r9d,%eax ;...4103c1
> ;*goto
> ; - FiboSample::fibo at 75
> 0x00007faadd05fd76: mov 0x4(%rsp),%r11d ;...448b5c24 04
> 0x00007faadd05fd7b: xor %eax,%r11d ;...4433d8
> 0x00007faadd05fd7e: mov %r9d,%r8d ;...458bc1
> 0x00007faadd05fd81: xor %eax,%r8d ;...4433c0
> 0x00007faadd05fd84: and %r8d,%r11d ;...4523d8
> 0x00007faadd05fd87: test %r11d,%r11d ;...4585db
> 0x00007faadd05fd8a: jge 0x00007faadd05fda6 ;...7d1a
> ;*ifge
> ; -
> jdart.runtime.RT::addExact at 11 (line 139)
> ; - FiboSample::fibo at 37
> 0x00007faadd05fd8c: mov $0xa5,%esi ;...bea50000 00
> 0x00007faadd05fd91: mov %r9d,(%rsp) ;...44890c24
> 0x00007faadd05fd95: xchg %ax,%ax ;...6690
> 0x00007faadd05fd97: callq 0x00007faadd039020 ;...e88492fd ff
> ; OopMap{off=92}
> ;*new ; -
> jdart.runtime.RT::addExact at 14 (line 140)
> ; - FiboSample::fibo at 37
> ; {runtime_call}
> 0x00007faadd05fd9c: callq 0x00007faae7394800 ;...e85f4a33 0a
> ;*new ; -
> jdart.runtime.RT::addExact at 14 (line 140)
> ; - FiboSample::fibo at 37
> ; {runtime_call}
> 0x00007faadd05fda1: mov $0x1,%eax ;...b8010000 00
> 0x00007faadd05fda6: add $0x20,%rsp ;...4883c420
> 0x00007faadd05fdaa: pop %rbp ;...5d
> 0x00007faadd05fdab: test %eax,0xab5524f(%rip) #
> 0x00007faae7bb5000
> ;...85054f52 b50a
> ; {poll_return}
> 0x00007faadd05fdb1: retq ;...c3
> 0x00007faadd05fdb2: mov 0x8(%rax),%r11d ;...448b5808
> 0x00007faadd05fdb6: cmp $0xefe4b124,%r11d ;...4181fb24 b1e4ef
> ;
> {oop('jdart/runtime/ControlFlowException')}
> 0x00007faadd05fdbd: jne 0x00007faadd05fdf5 ;...7536
> ;*invokestatic fibo
> ; - FiboSample::fibo at 19
> 0x00007faadd05fdbf: mov 0x20(%rax),%ebp ;...8b6820
> 0x00007faadd05fdc2: test %ebp,%ebp ;...85ed
> 0x00007faadd05fdc4: jne 0x00007faadd05fe11 ;...754b
> ;*getfield value
> ; - FiboSample::fibo at 70
> 0x00007faadd05fdc6: mov 0x4(%rsp),%eax ;...8b442404
> 0x00007faadd05fdca: xor %r9d,%r9d ;...4533c9
> 0x00007faadd05fdcd: jmp 0x00007faadd05fd76 ;...eba7
> 0x00007faadd05fdcf: mov 0x8(%rax),%r11d ;...448b5808
> 0x00007faadd05fdd3: cmp $0xefe4b124,%r11d ;...4181fb24 b1e4ef
> ;
> {oop('jdart/runtime/ControlFlowException')}
> 0x00007faadd05fdda: jne 0x00007faadd05fdf0 ;...7514
> ;*invokestatic fibo
> ; - FiboSample::fibo at 10
> 0x00007faadd05fddc: mov 0x20(%rax),%ebp ;...8b6820
> 0x00007faadd05fddf: test %ebp,%ebp ;...85ed
> 0x00007faadd05fde1: jne 0x00007faadd05fe02 ;...751f
> ;*getfield value
> ; - FiboSample::fibo at 57
> 0x00007faadd05fde3: xor %r11d,%r11d ;...4533db
> 0x00007faadd05fde6: mov %r11d,0x4(%rsp) ;...44895c24 04
> 0x00007faadd05fdeb: jmpq 0x00007faadd05fd60 ;...e970ffff ff
> 0x00007faadd05fdf0: mov %rax,%rsi ;...488bf0
> 0x00007faadd05fdf3: jmp 0x00007faadd05fdf8 ;...eb03
> 0x00007faadd05fdf5: mov %rax,%rsi ;...488bf0
> ;*invokestatic fibo
> ; - FiboSample::fibo at 19
> 0x00007faadd05fdf8: add $0x20,%rsp ;...4883c420
> 0x00007faadd05fdfc: pop %rbp ;...5d
> 0x00007faadd05fdfd: jmpq 0x00007faadd061660 ;...e95e1800 00
> ; {runtime_call}
> 0x00007faadd05fe02: mov $0xffffffec,%esi ;...beecffff ff
> 0x00007faadd05fe07: callq 0x00007faadd039020 ;...e81492fd ff
> ; OopMap{rbp=NarrowOop
> off=204}
> ;*astore_2
> ; - FiboSample::fibo at 60
> ; {runtime_call}
> 0x00007faadd05fe0c: callq 0x00007faae7394800 ;...e8ef4933 0a
> ;*getfield value
> ; - FiboSample::fibo at 57
> ; {runtime_call}
> 0x00007faadd05fe11: mov $0xffffffec,%esi ;...beecffff ff
> 0x00007faadd05fe16: nop ;...90
> 0x00007faadd05fe17: callq 0x00007faadd039020 ;...e80492fd ff
> ; OopMap{rbp=NarrowOop
> off=220}
> ;*astore
> ; - FiboSample::fibo at 73
> ; {runtime_call}
> 0x00007faadd05fe1c: callq 0x00007faae7394800 ;...e8df4933 0a
> ;*getfield value
> ; - FiboSample::fibo at 70
> ; {runtime_call}
> 0x00007faadd05fe21: hlt ;...f4
> 0x00007faadd05fe22: hlt ;...f4
> 0x00007faadd05fe23: hlt ;...f4
> 0x00007faadd05fe24: hlt ;...f4
> 0x00007faadd05fe25: hlt ;...f4
> 0x00007faadd05fe26: hlt ;...f4
> 0x00007faadd05fe27: hlt ;...f4
> 0x00007faadd05fe28: hlt ;...f4
> 0x00007faadd05fe29: hlt ;...f4
> 0x00007faadd05fe2a: hlt ;...f4
> 0x00007faadd05fe2b: hlt ;...f4
> 0x00007faadd05fe2c: hlt ;...f4
> 0x00007faadd05fe2d: hlt ;...f4
> 0x00007faadd05fe2e: hlt ;...f4
> 0x00007faadd05fe2f: hlt ;...f4
> 0x00007faadd05fe30: hlt ;...f4
> 0x00007faadd05fe31: hlt ;...f4
> 0x00007faadd05fe32: hlt ;...f4
> 0x00007faadd05fe33: hlt ;...f4
> 0x00007faadd05fe34: hlt ;...f4
> 0x00007faadd05fe35: hlt ;...f4
> 0x00007faadd05fe36: hlt ;...f4
> 0x00007faadd05fe37: hlt ;...f4
> 0x00007faadd05fe38: hlt ;...f4
> 0x00007faadd05fe39: hlt ;...f4
> 0x00007faadd05fe3a: hlt ;...f4
> 0x00007faadd05fe3b: hlt ;...f4
> 0x00007faadd05fe3c: hlt ;...f4
> 0x00007faadd05fe3d: hlt ;...f4
> 0x00007faadd05fe3e: hlt ;...f4
> 0x00007faadd05fe3f: hlt ;...f4
> [Stub Code]
> 0x00007faadd05fe40: mov $0x0,%rbx ;...48bb0000 000000
> ;...000000
> ; {no_reloc}
> 0x00007faadd05fe4a: jmpq 0x00007faadd05fe4a ;...e9fbffff ff
> ; {runtime_call}
> 0x00007faadd05fe4f: mov $0x0,%rbx ;...48bb0000 000000
> ;...000000
> ; {static_stub}
> 0x00007faadd05fe59: jmpq 0x00007faadd05fe59 ;...e9fbffff ff
> ; {runtime_call}
> [Exception Handler]
> 0x00007faadd05fe5e: jmpq 0x00007faadd05e8e0 ;...e97deaff ff
> ; {runtime_call}
> [Deopt Handler Code]
> 0x00007faadd05fe63: callq 0x00007faadd05fe68 ;...e8000000 00
> 0x00007faadd05fe68: subq $0x5,(%rsp) ;...48832c24 05
> 0x00007faadd05fe6d: jmpq 0x00007faadd038c00 ;...e98e8dfd ff
> ; {runtime_call}
> 0x00007faadd05fe72: hlt ;...f4
> 0x00007faadd05fe73: hlt ;...f4
> 0x00007faadd05fe74: hlt ;...f4
> 0x00007faadd05fe75: hlt ;...f4
> 0x00007faadd05fe76: hlt ;...f4
> 0x00007faadd05fe77: hlt ;...f4
>
>
>>
>> Rémi Forax wrote:
>>> Hi guys,
>>> I've found something really weird, c2 sometimes generates
>>> the assembler instruction xchg %ax, %ax (see the assembly code
>>> of fibo just before the first recursive call)
>>> which I believe is equivalent to a nop but slower.
>>>
>>> In fact, xchg is really slow in my laptop (Nehalem), slower than
>>> at least 5/6 instructions like move/xor/and.
>>> I think it's because xchg is atomic see [1]
>>>
>>> I think c2 should never generate xchg or at least replace all xchg
>>> %r, %r by nop.
>>>
>>> Rémi
>>>
>>> [1]
>>> http://www.intel.ru/content/dam/doc/white-paper/intel-microarchitecture-white-paper.pdf
>>>
>>>
>>>
>>> public class ClassicFibo {
>>> private static int fibo(int n) {
>>> if (n < 2) {
>>> return 1;
>>> }
>>> return fibo(n - 1) + fibo(n - 2);
>>> }
>>>
>>> public static void main(String[] args) {
>>> System.out.println(fibo(40));
>>> }
>>> }
>>>
>>> # {method} 'fibo' '(I)I' in 'ClassicFibo'
>>> # parm0: rsi = int
>>> # [sp+0x30] (sp of caller)
>>> 0x00007fb409061620: mov %eax,-0x14000(%rsp)
>>> 0x00007fb409061627: push %rbp
>>> 0x00007fb409061628: sub $0x20,%rsp ;*synchronization entry
>>> ; -
>>> ClassicFibo::fibo at -1 (line 4)
>>> 0x00007fb40906162c: mov %esi,%ebp
>>> 0x00007fb40906162e: cmp $0x2,%esi
>>> 0x00007fb409061631: jl 0x00007fb409061651 ;*if_icmpge
>>> ; -
>>> ClassicFibo::fibo at 2 (line 4)
>>> 0x00007fb409061633: dec %esi ;*isub
>>> ; -
>>> ClassicFibo::fibo at 9 (line 7)
>>> 0x00007fb409061635: xchg %ax,%ax
>>> 0x00007fb409061637: callq 0x00007fb409038060 ; OopMap{off=28}
>>> ;*invokestatic fibo
>>> ; -
>>> ClassicFibo::fibo at 10 (line 7)
>>> ; {static_call}
>>> ...
>>>
>>>
>>>
>>>
>
>
More information about the hotspot-compiler-dev
mailing list