hg: jdk7/hotspot-comp/hotspot: 6797305: Add LoadUB and LoadUI opcode class
Ulf Zibis
Ulf.Zibis at gmx.de
Fri Mar 13 10:57:58 PDT 2009
Am 13.03.2009 18:01, Christian Thalinger schrieb:
> On Fri, 2009-03-13 at 17:43 +0100, Ulf Zibis wrote:
>
>> Hi Christian,
>>
>> where can I find information, when this CR will be available in snapshot
>> release?
>>
>
> The changeset has been promoted to the hotspot repository[1] and is part
> of HS15 build 03, which will be included in JDK7 build 51. A build
> schedule can be found here[2].
>
> [1] http://hg.openjdk.java.net/jdk7/hotspot/hotspot/shortlog
> [2] http://openjdk.java.net/projects/jdk7/builds/
>
Wow, that sounds great. It should be available today. (OK, maybe
tomorrow) ;-)
Much thanks for your answer, because I still feel unable to retrieve
that information from [1]
Regarding:
if (inByte >= 0)
return (char)inByte;
else
return (char)(inByte & 0xFF);
I'm curious, if your CR would also result in some progress for that case.
So I have expanded my benchmark test:
import org.junit.*;
/**
*
* @author Ulf Zibis <Ulf.Zibis at CoSoCo.de>
*/
public class DecoderBenchmark {
static final char[] map = new char[256];
static final byte[] src = new byte[131072]; // exceed CPU L1-cache
for real world constraint
static final char[] dst = new char[131072]; // exceed CPU L1-cache
for real world constraint
static final int LOOPS = 125;
// static final byte[] src = new byte[16384]; // don't exceed CPU
L1-cache
// static final char[] dst = new char[16384]; // don't exceed CPU
L1-cache
// static final int LOOPS = 1000;
// static final byte[] src = new byte[2048]; // far below of
exceeding CPU L1-cache
// static final char[] dst = new char[2048]; // far below of
exceeding CPU L1-cache
// static final int LOOPS = 8000;
static final int OUTER_LOOPS = 100;
static final int WARMUP_LOOPS = 4;
static final float WARMUP_RATIO = 0.10f;
@Test
public void foo() {
// fill arrays, to force real memory load and prohibit HotSpot
from just incrementing
// (maybe candidate for sophisticated HotSpot optimization ;-) )
for (int i=0; i<map.length; i++)
map[i] = (char)(59 * (227 - i));
for (int i=0; i<src.length; i++)
src[i] = (byte)(13 * (17 - i));
// for (int i=0; i<src.length; i++) { // for mostly positive bytes
// byte b = (byte)(13 * (17 - i));
// src[i] = b < -1 ? (byte)-b : b;
// }
// warm up:
long time = System.nanoTime();
long lastWarmUpTime = 0;
for (int h=0; h<WARMUP_LOOPS; ) {
for (int i=0; i<WARMUP_RATIO*OUTER_LOOPS; i++) {
for (int j=0; j<LOOPS; j++)
loop1(src, dst);
for (int j=0; j<LOOPS; j++)
loop2(src, dst);
for (int j=0; j<LOOPS; j++)
inline1(src, dst);
for (int j=0; j<LOOPS; j++)
inline2(src, dst);
for (int j=0; j<LOOPS; j++)
loop3(src, dst);
for (int j=0; j<LOOPS; j++)
loop4(src, dst);
for (int j=0; j<LOOPS; j++)
loop5(src, dst);
for (int j=0; j<LOOPS; j++)
inline3(src, dst);
for (int j=0; j<LOOPS; j++)
inline4(src, dst);
for (int j=0; j<LOOPS; j++)
inline5(src, dst);
}
lastWarmUpTime = System.nanoTime()-time;
System.out.println("time for warm up "+(++h)+":
"+(lastWarmUpTime)/1000000+" ms");
time = System.nanoTime();// don't count time for print ;-)
}
long time1 = 0;
long time2 = 0;
long itime1 = 0;
long itime2 = 0;
long time3 = 0;
long time4 = 0;
long time5 = 0;
long itime3 = 0;
long itime4 = 0;
long itime5 = 0;
// swap decoders to eliminate influence of
// other processes and CPU clockdown, caused by overheating
for (int i=0; i<OUTER_LOOPS; i++) {
for (int j=0; j<LOOPS; j++)
loop1(src, dst);
time1 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
loop2(src, dst);
time2 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
inline1(src, dst);
itime1 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
inline2(src, dst);
itime2 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
loop3(src, dst);
time3 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
loop4(src, dst);
time4 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
loop5(src, dst);
time5 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
inline3(src, dst);
itime3 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
inline4(src, dst);
itime4 -= time - (time = System.nanoTime());
for (int j=0; j<LOOPS; j++)
inline5(src, dst);
itime5 -= time - (time = System.nanoTime());
}
System.out.println("time for map[a & 0xFF]: "+time1/1000000+" ms");
System.out.println("time for map[a + 0x80]: "+time2/1000000+" ms");
System.out.println("time for inlined map[a & 0xFF]:
"+itime1/1000000+" ms");
System.out.println("time for inlined map[a + 0x80]:
"+itime2/1000000+" ms");
System.out.println("time for (char)a: "+time3/1000000+" ms");
System.out.println("time for (char)(a & 0xFF): "+time4/1000000+"
ms");
System.out.println("time for (a>=0) ? (char)a : (char)(a&0xFF):
"+time5/1000000+" ms");
System.out.println("time for inlined (char)a: "+itime3/1000000+"
ms");
System.out.println("time for inlined (char)(a & 0xFF):
"+itime4/1000000+" ms");
System.out.println("time for inlined (a>=0) ? (char)a :
(char)(a&0xFF): "+itime5/1000000+" ms");
System.out.println("last warm up ./. test loops: "
+(float)lastWarmUpTime/(time1+time2+itime1+itime2)/WARMUP_RATIO);
}
static void loop1(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = decode1(src[i]);
}
static void loop2(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = decode2(src[i]);
}
static void loop3(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = decode3(src[i]);
}
static void loop4(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = decode4(src[i]);
}
static void loop5(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = decode5(src[i]);
}
static void inline1(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = map[src[i] & 0xFF];
}
static void inline2(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = map[src[i] + 0x80];
}
static void inline3(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = (char)src[i];
}
static void inline4(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
dst[i] = (char)(src[i] & 0xFF);
}
static void inline5(byte[] src, char[] dst) {
for (int i=0; i<src.length; i++)
if (src[i] >= 0)
dst[i] = (char)src[i];
else
dst[i] = (char)(src[i] & 0xFF);
}
public static char decode1(byte a) {
return map[a & 0xFF];
}
public static char decode2(byte a) {
return map[a + 0x80];
}
public static char decode3(byte a) {
return (char)a;
}
public static char decode4(byte a) {
return (char)(a & 0xFF);
}
public static char decode5(byte a) { // seems to be not inlined,
check with mostly positive src bytes
if (a >= 0)
return (char)a;
else
return (char)(a & 0xFF);
}
public static void main(String[] args) {
DecoderBenchmark dbm = new DecoderBenchmark();
dbm.foo();
}
}
More information about the jdk7-changes
mailing list