hg: jdk7/tl/jdk: 6642323: Speeding up Single Byte Decoders; ...

Ulf Zibis Ulf.Zibis at gmx.de
Mon Jan 19 06:41:49 PST 2009


Hi Christian,

thanks for your numbers from your tests.
The magic inlining deceleration disapeared on your machine.

Do you have used -client mode, because my values, you have taken for 
compare have been from -client mode. O:-)

I also played around with warm up:
time for warm up 1: 3539 ms
time for warm up 2: 3110 ms
time for warm up 3: 2697 ms
time for warm up 4: 2703 ms
time for map[a & 0xFF]: 3561 ms
time for map[a + 0x80]: 3510 ms
time for inlined map[a & 0xFF]: 3563 ms
time for inlined map[a + 0x80]: 3470 ms
test loops ./. warm up: 1.3042054

*Surprise*: testloops are much slower than warm up.
Maybe it's the overheating effect on my mobile CPU. Can you check on 
your system?

See my updated benchmark code at the end of mail ...

-Ulf


Am 19.01.2009 12:05, Christian Thalinger schrieb:
> On Sun, 2009-01-18 at 03:01 +0100, Ulf Zibis wrote:
>   
>> My results by JDK 1.7.0 ea b43 :
>>
>> time for 1st warm up: 3091 ms
>> time for 2nd warm up: 3100 ms
>> time for map[a & 0xFF]: 7207 ms
>> time for map[a + 0x80]: 7225 ms
>> time for inlined map[a & 0xFF]: 8100 ms
>> time for inlined map[a + 0x80]: 8084 ms
>>     
>
> Vanilla HotSpot:
>
> time for 1st warm up: 622 ms
> time for 2nd warm up: 586 ms
> time for map[a & 0xFF]: 1460 ms
> time for map[a + 0x80]: 1456 ms
> time for inlined map[a & 0xFF]: 1461 ms
> time for inlined map[a + 0x80]: 1454 ms
>
> Hotspot w/ unsigned-byte loads:
>
> time for 1st warm up: 680 ms
> time for 2nd warm up: 641 ms
> time for map[a & 0xFF]: 1537 ms
> time for map[a + 0x80]: 1468 ms
> time for inlined map[a & 0xFF]: 1525 ms
> time for inlined map[a + 0x80]: 1462 ms
>
> For technical details I will reply on hotspot-compiler-dev.
>
> -- Christian
>
>
>   
package build.tools.charsetmapping;

/**
 *
 * @author Ulf Zibis <Ulf.Zibis at CoSoCo.de>
 */
public class DecoderBenchmark {

    public static void main(String[] args) {
        foo();
    }

    static final char[] map = new char[256];
    static final byte[] src = new byte[16384]; // be aware of exceeding 
CPU cache
    static final char[] dst = new char[16384]; // be aware of exceeding 
CPU cache
    static final int LOOPS = 100;
    static final int OUTER_LOOPS = 1000;
    static final int WARMUP_LOOPS = 4;
    static final float WARMUP_RATIO = 0.25f;

    static void foo() {
        // fill arrays, to force real memory load and prohibit HotSpot 
from just incrementing
        // (maybe candidate for sophisticated HotSpot optimization ;-) )
        for (int i=0; i<map.length; i++)
            map[i] = (char)(59 * (227 - i));
        for (int i=0; i<src.length; i++)
            src[i] = (byte)(13 * (17 - i));
        // warm up:
        long time = System.nanoTime();
        long lastWarmUpTime = 0;
        for (int h=0; h<WARMUP_LOOPS; ) {
            for (int i=0; i<WARMUP_RATIO*OUTER_LOOPS; i++) {
                for (int j=0; j<LOOPS; j++)
                    bar1(src, dst);
                for (int j=0; j<LOOPS; j++)
                    bar2(src, dst);
                for (int j=0; j<LOOPS; j++)
                    bar3(src, dst);
                for (int j=0; j<LOOPS; j++)
                    bar4(src, dst);
            }
            lastWarmUpTime = System.nanoTime()-time;
            System.out.println("time for warm up "+(++h)+": 
"+(lastWarmUpTime)/1000000+" ms");
            time = System.nanoTime();// don't count time for print ;-)
        }
        long time1 = 0;
        long time2 = 0;
        long time3 = 0;
        long time4 = 0;
        // swap decoders to eliminate influence of
        // other processes and CPU clockdown, caused by overheating
        for (int i=0; i<OUTER_LOOPS; i++) {
            for (int j=0; j<LOOPS; j++)
                bar3(src, dst);
            time3 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                bar4(src, dst);
            time4 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                bar1(src, dst);
            time1 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                bar2(src, dst);
            time2 -= time - (time = System.nanoTime());
        }
        System.out.println("time for map[a & 0xFF]: "+time1/1000000+" ms");
        System.out.println("time for map[a + 0x80]: "+time2/1000000+" ms");
        System.out.println("time for inlined map[a & 0xFF]: 
"+time3/1000000+" ms");
        System.out.println("time for inlined map[a + 0x80]: 
"+time4/1000000+" ms");
        System.out.println("test loops ./. last warm up: "
                
+(float)(time1+time2+time3+time4)/lastWarmUpTime*WARMUP_RATIO);
    }

    static void bar1(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = decode1(src[i]);
    }
    static void bar2(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = decode2(src[i]);
    }
    static void bar3(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = map[src[i] & 0xFF];
    }
    static void bar4(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = map[src[i] + 0x80];
    }

    public static char decode1(byte a) {
        return map[a & 0xFF];
    }
    public static char decode2(byte a) {
        return map[a + 0x80];
    }

}





More information about the hotspot-compiler-dev mailing list