Character decoding -> interesting performance results

Ulf Zibis Ulf.Zibis at CoSoCo.de
Sat May 17 17:50:54 UTC 2008


*As you see in the following source snippet, I have discovered 
interesting performance results.*


*public* *abstract* *class* SingleByteFastDecoder
    *extends* SingleByteDecoder
{
 
    *private* *final* *char*[] byteToCharArray_00_7F = *new* *char*[0x80];
    *private* *final* *char*[] byteToCharArray_80_FF = *new* *char*[0x100];
 
    *protected* SingleByteFastDecoder(Charset cs, String byteToCharTable) {
	*super*(cs, byteToCharTable);
        byteToCharTable.getChars(0, 0x7F, byteToCharArray_00_7F, 0);
        byteToCharTable.getChars(0, 0x7F, byteToCharArray_80_FF, 0x80);
    }
 
    CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
	*byte*[] sa = src.array();
	*int* sp = src.arrayOffset() + src.position();
	*int* sl = src.arrayOffset() + src.limit();
	*assert* (sp <= sl);
	sp = (sp <= sl ? sp : sl);
	*char*[] da = dst.array();
	*int* dp = dst.arrayOffset() + dst.position();
	*int* dl = dst.arrayOffset() + dst.limit();
	*assert* (dp <= dl);
	dp = (dp <= dl ? dp : dl);
 
	*try* {
	    *byte* b;
	    *char* c;
	    *int* i;
	    *while* (sp < sl)
		*if* (dp < dl)
            /*
             * Different algorithms to calculate the matching with the code tables
             * and their performance results on a 2GHz 1-core Intel Centrino.
             * 
             * The first value is the minimum for 100 loops over a 190 KB 7-bit-ASCII file.
             * The second value results from a 190 KB MS1252 coded file, containing
             * 10 % 8-bit german 'Umlaute'. (see test/SpeedTest.java)
             * Interesting, that there are different coding times for different extended
             * 8-bit processing, even if there are no extended characters in the test file !
             * It's a pity, that compiler/hotspot don't inline method _decode(int).
             */
////		    da[dp++] = _decode(sa[sp++]); // is slower; compiler and hotspot don't inline it
//		    da[dp++] = (b = sa[sp++]) >= 0 ? (char)b
//				: byteToCharTable.charAt(b & 0x7F);    // (125 / 172 ms) // best String speed!
//				: byteToCharTable.charAt(b + 0x80);    // (125 / 172 ms)
//				: byteToCharArray_00_7F[b & 0x7F];     // (171 / 203 ms)
//				: byteToCharArray_00_7F[b + 0x80];     // (135 / 171 ms)
//				: byteToCharArray_80_FF[b & 0xFF];     // (140 / 172 ms)
//				: byteToCharArray_80_FF[b + 0x100];    // (140 / 171 ms)
		    da[dp++] = (c = (*char*)sa[sp++]) < 0x80 ? c
//				: byteToCharTable.charAt(c & 0x7F);    // (140 / 172 ms)
//				: byteToCharTable.charAt(c - 0xFF80);  // (140 / 172 ms)
//				: byteToCharArray_00_7F[c & 0x7F];     // (140 / 172 ms)
				: byteToCharArray_00_7F[c - 0xFF80];   // (125 / 156 ms) // best char[] speed!
//				: byteToCharArray_80_FF[c & 0xFF];     // (125 / 171 ms)
//				: byteToCharArray_80_FF[c - 0xFF00];   // (125 / 156 ms)
//		    da[dp++] = (c = (char)(sa[sp++] & 0xFF)) < 0x80 ? c
//				: byteToCharTable.charAt(c & 0x7F);    // (140 / 171 ms)
//				: byteToCharTable.charAt(c - 0x80);    // (140 / 171 ms)
//				: byteToCharArray_80_FF[c];            // (140 / 171 ms)
//		    da[dp++] = (i = sa[sp++]) >= 0 ? (char)i
//				: byteToCharTable.charAt(i & 0x7F);    // (125 / 172 ms)
//				: byteToCharTable.charAt(i + 0x80);    // (125 / 171 ms)
//				: byteToCharArray_00_7F[i & 0x7F];     // (171 / 203 ms)
//				: byteToCharArray_00_7F[i + 0x80];     // (140 / 165 ms)
//				: byteToCharArray_80_FF[i & 0xFF];     // (135 / 172 ms)
//				: byteToCharArray_80_FF[i + 0x100];    // (140 / 171 ms)
//		    da[dp++] = (i = sa[sp++] & 0xFF) < 0x80 ? (char)i
//				: byteToCharTable.charAt(i & 0x7F);    // (140 / 171 ms)
//				: byteToCharTable.charAt(i - 0x80);    // (135 / 171 ms)
//				: byteToCharArray_80_FF[i];            // (140 / 160 ms)
                *else*
		    *return* CoderResult.OVERFLOW;
	    *return* CoderResult.UNDERFLOW;
	} *finally* {
	    src.position(sp - src.arrayOffset());
	    dst.position(dp - dst.arrayOffset());
	}
    }
...
    *private* *final* *char* _decode(*int* inByte) {
	*return* inByte >= 0 ? (*char*)inByte
		: byteToCharTable.charAt(inByte + 0x80);
    }
...
}


>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openjdk.java.net/pipermail/core-libs-dev/attachments/20080517/b2be87da/attachment.html>


More information about the core-libs-dev mailing list