Character decoding -> interesting performance results
Ulf Zibis
Ulf.Zibis at CoSoCo.de
Sat May 17 17:50:54 UTC 2008
*As you see in the following source snippet, I have discovered
interesting performance results.*
*public* *abstract* *class* SingleByteFastDecoder
*extends* SingleByteDecoder
{
*private* *final* *char*[] byteToCharArray_00_7F = *new* *char*[0x80];
*private* *final* *char*[] byteToCharArray_80_FF = *new* *char*[0x100];
*protected* SingleByteFastDecoder(Charset cs, String byteToCharTable) {
*super*(cs, byteToCharTable);
byteToCharTable.getChars(0, 0x7F, byteToCharArray_00_7F, 0);
byteToCharTable.getChars(0, 0x7F, byteToCharArray_80_FF, 0x80);
}
CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
*byte*[] sa = src.array();
*int* sp = src.arrayOffset() + src.position();
*int* sl = src.arrayOffset() + src.limit();
*assert* (sp <= sl);
sp = (sp <= sl ? sp : sl);
*char*[] da = dst.array();
*int* dp = dst.arrayOffset() + dst.position();
*int* dl = dst.arrayOffset() + dst.limit();
*assert* (dp <= dl);
dp = (dp <= dl ? dp : dl);
*try* {
*byte* b;
*char* c;
*int* i;
*while* (sp < sl)
*if* (dp < dl)
/*
* Different algorithms to calculate the matching with the code tables
* and their performance results on a 2GHz 1-core Intel Centrino.
*
* The first value is the minimum for 100 loops over a 190 KB 7-bit-ASCII file.
* The second value results from a 190 KB MS1252 coded file, containing
* 10 % 8-bit german 'Umlaute'. (see test/SpeedTest.java)
* Interesting, that there are different coding times for different extended
* 8-bit processing, even if there are no extended characters in the test file !
* It's a pity, that compiler/hotspot don't inline method _decode(int).
*/
//// da[dp++] = _decode(sa[sp++]); // is slower; compiler and hotspot don't inline it
// da[dp++] = (b = sa[sp++]) >= 0 ? (char)b
// : byteToCharTable.charAt(b & 0x7F); // (125 / 172 ms) // best String speed!
// : byteToCharTable.charAt(b + 0x80); // (125 / 172 ms)
// : byteToCharArray_00_7F[b & 0x7F]; // (171 / 203 ms)
// : byteToCharArray_00_7F[b + 0x80]; // (135 / 171 ms)
// : byteToCharArray_80_FF[b & 0xFF]; // (140 / 172 ms)
// : byteToCharArray_80_FF[b + 0x100]; // (140 / 171 ms)
da[dp++] = (c = (*char*)sa[sp++]) < 0x80 ? c
// : byteToCharTable.charAt(c & 0x7F); // (140 / 172 ms)
// : byteToCharTable.charAt(c - 0xFF80); // (140 / 172 ms)
// : byteToCharArray_00_7F[c & 0x7F]; // (140 / 172 ms)
: byteToCharArray_00_7F[c - 0xFF80]; // (125 / 156 ms) // best char[] speed!
// : byteToCharArray_80_FF[c & 0xFF]; // (125 / 171 ms)
// : byteToCharArray_80_FF[c - 0xFF00]; // (125 / 156 ms)
// da[dp++] = (c = (char)(sa[sp++] & 0xFF)) < 0x80 ? c
// : byteToCharTable.charAt(c & 0x7F); // (140 / 171 ms)
// : byteToCharTable.charAt(c - 0x80); // (140 / 171 ms)
// : byteToCharArray_80_FF[c]; // (140 / 171 ms)
// da[dp++] = (i = sa[sp++]) >= 0 ? (char)i
// : byteToCharTable.charAt(i & 0x7F); // (125 / 172 ms)
// : byteToCharTable.charAt(i + 0x80); // (125 / 171 ms)
// : byteToCharArray_00_7F[i & 0x7F]; // (171 / 203 ms)
// : byteToCharArray_00_7F[i + 0x80]; // (140 / 165 ms)
// : byteToCharArray_80_FF[i & 0xFF]; // (135 / 172 ms)
// : byteToCharArray_80_FF[i + 0x100]; // (140 / 171 ms)
// da[dp++] = (i = sa[sp++] & 0xFF) < 0x80 ? (char)i
// : byteToCharTable.charAt(i & 0x7F); // (140 / 171 ms)
// : byteToCharTable.charAt(i - 0x80); // (135 / 171 ms)
// : byteToCharArray_80_FF[i]; // (140 / 160 ms)
*else*
*return* CoderResult.OVERFLOW;
*return* CoderResult.UNDERFLOW;
} *finally* {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
...
*private* *final* *char* _decode(*int* inByte) {
*return* inByte >= 0 ? (*char*)inByte
: byteToCharTable.charAt(inByte + 0x80);
}
...
}
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openjdk.java.net/pipermail/core-libs-dev/attachments/20080517/b2be87da/attachment.html>
More information about the core-libs-dev
mailing list