hg: jdk7/hotspot-comp/hotspot: 6797305: Add LoadUB and LoadUI opcode class

Ulf Zibis Ulf.Zibis at gmx.de
Fri Mar 13 10:57:58 PDT 2009


Am 13.03.2009 18:01, Christian Thalinger schrieb:
> On Fri, 2009-03-13 at 17:43 +0100, Ulf Zibis wrote:
>   
>> Hi Christian,
>>
>> where can I find information, when this CR will be available in snapshot 
>> release?
>>     
>
> The changeset has been promoted to the hotspot repository[1] and is part
> of HS15 build 03, which will be included in JDK7 build 51.  A build
> schedule can be found here[2].
>
> [1] http://hg.openjdk.java.net/jdk7/hotspot/hotspot/shortlog
> [2] http://openjdk.java.net/projects/jdk7/builds/
>   

Wow, that sounds great. It should be available today. (OK, maybe 
tomorrow) ;-)

Much thanks for your answer, because I still feel unable to retrieve 
that information from [1]


Regarding:
       if (inByte >= 0)
           return (char)inByte;
       else
           return (char)(inByte & 0xFF);

I'm curious, if your CR would also result in some progress for that case.

So I have expanded my benchmark test:


import org.junit.*;

/**
 *
 * @author Ulf Zibis <Ulf.Zibis at CoSoCo.de>
 */
public class DecoderBenchmark {

    static final char[] map = new char[256];
    static final byte[] src = new byte[131072]; // exceed CPU L1-cache 
for real world constraint
    static final char[] dst = new char[131072]; // exceed CPU L1-cache 
for real world constraint
    static final int LOOPS = 125;
//    static final byte[] src = new byte[16384]; // don't exceed CPU 
L1-cache
//    static final char[] dst = new char[16384]; // don't exceed CPU 
L1-cache
//    static final int LOOPS = 1000;
//    static final byte[] src = new byte[2048]; // far below of 
exceeding CPU L1-cache
//    static final char[] dst = new char[2048]; // far below of 
exceeding CPU L1-cache
//    static final int LOOPS = 8000;
    static final int OUTER_LOOPS = 100;
    static final int WARMUP_LOOPS = 4;
    static final float WARMUP_RATIO = 0.10f;

    @Test
    public void foo() {
        // fill arrays, to force real memory load and prohibit HotSpot 
from just incrementing
        // (maybe candidate for sophisticated HotSpot optimization ;-) )
        for (int i=0; i<map.length; i++)
            map[i] = (char)(59 * (227 - i));
        for (int i=0; i<src.length; i++)
            src[i] = (byte)(13 * (17 - i));
//        for (int i=0; i<src.length; i++) { // for mostly positive bytes
//            byte b = (byte)(13 * (17 - i));
//            src[i] = b < -1 ? (byte)-b : b;
//        }
        // warm up:
        long time = System.nanoTime();
        long lastWarmUpTime = 0;
        for (int h=0; h<WARMUP_LOOPS; ) {
            for (int i=0; i<WARMUP_RATIO*OUTER_LOOPS; i++) {
                for (int j=0; j<LOOPS; j++)
                    loop1(src, dst);
                for (int j=0; j<LOOPS; j++)
                    loop2(src, dst);
                for (int j=0; j<LOOPS; j++)
                    inline1(src, dst);
                for (int j=0; j<LOOPS; j++)
                    inline2(src, dst);
                for (int j=0; j<LOOPS; j++)
                    loop3(src, dst);
                for (int j=0; j<LOOPS; j++)
                    loop4(src, dst);
                for (int j=0; j<LOOPS; j++)
                    loop5(src, dst);
                for (int j=0; j<LOOPS; j++)
                    inline3(src, dst);
                for (int j=0; j<LOOPS; j++)
                    inline4(src, dst);
                for (int j=0; j<LOOPS; j++)
                    inline5(src, dst);
            }
            lastWarmUpTime = System.nanoTime()-time;
            System.out.println("time for warm up "+(++h)+": 
"+(lastWarmUpTime)/1000000+" ms");
            time = System.nanoTime();// don't count time for print ;-)
        }
        long time1 = 0;
        long time2 = 0;
        long itime1 = 0;
        long itime2 = 0;
        long time3 = 0;
        long time4 = 0;
        long time5 = 0;
        long itime3 = 0;
        long itime4 = 0;
        long itime5 = 0;
        // swap decoders to eliminate influence of
        // other processes and CPU clockdown, caused by overheating
        for (int i=0; i<OUTER_LOOPS; i++) {
            for (int j=0; j<LOOPS; j++)
                loop1(src, dst);
            time1 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                loop2(src, dst);
            time2 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                inline1(src, dst);
            itime1 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                inline2(src, dst);
            itime2 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                loop3(src, dst);

            time3 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                loop4(src, dst);
            time4 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                loop5(src, dst);
            time5 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                inline3(src, dst);
            itime3 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                inline4(src, dst);
            itime4 -= time - (time = System.nanoTime());
            for (int j=0; j<LOOPS; j++)
                inline5(src, dst);
            itime5 -= time - (time = System.nanoTime());
        }
        System.out.println("time for map[a & 0xFF]: "+time1/1000000+" ms");
        System.out.println("time for map[a + 0x80]: "+time2/1000000+" ms");
        System.out.println("time for inlined map[a & 0xFF]: 
"+itime1/1000000+" ms");
        System.out.println("time for inlined map[a + 0x80]: 
"+itime2/1000000+" ms");
        System.out.println("time for (char)a: "+time3/1000000+" ms");
        System.out.println("time for (char)(a & 0xFF): "+time4/1000000+" 
ms");
        System.out.println("time for (a>=0) ? (char)a : (char)(a&0xFF): 
"+time5/1000000+" ms");
        System.out.println("time for inlined (char)a: "+itime3/1000000+" 
ms");
        System.out.println("time for inlined (char)(a & 0xFF): 
"+itime4/1000000+" ms");
        System.out.println("time for inlined (a>=0) ? (char)a : 
(char)(a&0xFF): "+itime5/1000000+" ms");
        System.out.println("last warm up ./. test loops: "
                
+(float)lastWarmUpTime/(time1+time2+itime1+itime2)/WARMUP_RATIO);
    }

    static void loop1(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = decode1(src[i]);
    }
    static void loop2(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = decode2(src[i]);
    }

    static void loop3(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = decode3(src[i]);
    }
    static void loop4(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = decode4(src[i]);
    }
    static void loop5(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = decode5(src[i]);
    }

    static void inline1(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = map[src[i] & 0xFF];
    }
    static void inline2(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = map[src[i] + 0x80];
    }

    static void inline3(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = (char)src[i];
    }
    static void inline4(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            dst[i] = (char)(src[i] & 0xFF);
    }
    static void inline5(byte[] src, char[] dst) {
        for (int i=0; i<src.length; i++)
            if (src[i] >= 0)
                dst[i] = (char)src[i];
            else
                dst[i] = (char)(src[i] & 0xFF);
    }

    public static char decode1(byte a) {
        return map[a & 0xFF];
    }
    public static char decode2(byte a) {
        return map[a + 0x80];
    }

    public static char decode3(byte a) {
        return (char)a;
    }
    public static char decode4(byte a) {
        return (char)(a & 0xFF);
    }
    public static char decode5(byte a) { // seems to be not inlined, 
check with mostly positive src bytes
        if (a >= 0)
            return (char)a;
        else
            return (char)(a & 0xFF);
    }

    public static void main(String[] args) {
        DecoderBenchmark dbm = new DecoderBenchmark();
        dbm.foo();
    }
}





More information about the jdk7-changes mailing list