hg: jdk7/tl/jdk: 6642323: Speeding up Single Byte Decoders; ...
Ulf Zibis
Ulf.Zibis at gmx.de
Sat Jan 17 18:01:33 PST 2009
Am 17.01.2009 23:48, Christian Thalinger schrieb:
>
> I tried that one and it's slightly slower with my new version (unsigned
> byte loads). Btw. I ran the benchmarks on an Intel Core 2 Duo.
>
> -- Christian
>
>
Hi Christian,
I have Intel Pentium M, 2 GHz, FSB 533 MHz (notebook).
I experienced, that CPU will clock down after some time because of
overheating, so comparing absolute times became nonsense.
Will you please check your HotSpot optimizations with this code:
/**
*
* @author Ulf Zibis <Ulf.Zibis at CoSoCo.de>
*/
public class DecoderBenchmark {
public static void main(String[] args) {
foo();
}
static final char[] map = new char[256];
static final byte[] src = new byte[16384]; // be aware of exceeding CPU cache
static final char[] dst = new char[16384]; // be aware of exceeding CPU cache
static void foo() {
// fill arrays, to force real memory load and prohibit HotSpot from just incrementing
// (maybe candidate for sophisticated HotSpot optimization ;-) )
for (int i=0; i<map.length; i++)
map[i] = (char)(59 * (227 - i));
for (int i=0; i<src.length; i++)
src[i] = (byte)(13 * (17 - i));
// warm up:
long time = System.nanoTime();
for (int i=0; i<100; i++) {
for (int j=0; j<100; j++)
bar1(src, dst);
for (int j=0; j<100; j++)
bar2(src, dst);
for (int j=0; j<100; j++)
bar3(src, dst);
for (int j=0; j<100; j++)
bar4(src, dst);
}
System.out.println("time for 1st warm up: "+(-time+(time = System.nanoTime()))/1000000+" ms");
for (int i=0; i<100; i++) {
for (int j=0; j<100; j++)
bar1(src, dst);
for (int j=0; j<100; j++)
bar2(src, dst);
for (int j=0; j<100; j++)
bar3(src, dst);
for (int j=0; j<100; j++)
bar4(src, dst);
}
System.out.println("time for 2nd warm up: "+(-time+(time = System.nanoTime()))/1000000+" ms");
long time1 = 0;
long time2 = 0;
long time3 = 0;
long time4 = 0;
// swap decoders to eliminate influence of
// other processes and CPU clockdown, caused by overheating
for (int i=0; i<100; i++) {
for (int j=0; j<1000; j++)
bar1(src, dst);
time1 -= time - (time = System.nanoTime());
for (int j=0; j<1000; j++)
bar2(src, dst);
time2 -= time - (time = System.nanoTime());
for (int j=0; j<1000; j++)
bar3(src, dst);
time3 -= time - (time = System.nanoTime());
for (int j=0; j<1000; j++)
bar4(src, dst);
time4 -= time - (time = System.nanoTime());
}
System.out.println("time for map[a & 0xFF]: "+time1/1000000+" ms");
System.out.println("time for map[a + 0x80]: "+time2/1000000+" ms");
System.out.println("time for inlined map[a & 0xFF]: "+time3/1000000+" ms");
System.out.println("time for inlined map[a + 0x80]: "+time4/1000000+" ms");
}
static void bar1(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = decode1(src[i]);
}
static void bar2(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = decode2(src[i]);
}
static void bar3(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = map[src[i] & 0xFF];
}
static void bar4(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = map[src[i] + 0x80];
}
public static char decode1(byte a) {
return map[a & 0xFF];
}
public static char decode2(byte a) {
return map[a + 0x80];
}
}
My results by JDK 1.6.0_03 :
time for 1st warm up: 3136 ms
time for 2nd warm up: 3098 ms
time for map[a & 0xFF]: 7909 ms
time for map[a + 0x80]: 8081 ms
time for inlined map[a & 0xFF]: 8841 ms
time for inlined map[a + 0x80]: 9150 ms
My results by JDK 1.6.0_11 :
time for 1st warm up: 3152 ms
time for 2nd warm up: 3117 ms
time for map[a & 0xFF]: 7147 ms
time for map[a + 0x80]: 7124 ms
time for inlined map[a & 0xFF]: 7969 ms
time for inlined map[a + 0x80]: 7990 ms
My results by JDK 1.7.0 ea b43 :
time for 1st warm up: 3091 ms
time for 2nd warm up: 3100 ms
time for map[a & 0xFF]: 7207 ms
time for map[a + 0x80]: 7225 ms
time for inlined map[a & 0xFF]: 8100 ms
time for inlined map[a + 0x80]: 8084 ms
- Ulf
More information about the hotspot-dev
mailing list