Request for review: 6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
Christian Thalinger
christian.thalinger at oracle.com
Fri Jan 11 14:53:41 PST 2013
On Jan 11, 2013, at 10:19 AM, Ulf Zibis <Ulf.Zibis at CoSoCo.de> wrote:
> Hi Sherman,
>
> Am 11.01.2013 06:47, schrieb Xueming Shen:
>> (1) sp++ at Ln#159 probably can be moved into ln#155? the local sp here should
>> not change the real "sp" after "break+return".
>> (2) maybe the "overflowflag" can just be replaced by "CoderResult cr", with init value
>> of CoderResult.UNDERFLOW;", then "cr = CoderResult.OVERFLOW" at ln#182, and
>> simply "return cr" at ln#193, without another "if".
>
> I've enhanced your suggestions, see more below...
> Additionally, some part of encodeArrayLoop(...) maybe could be moved into a separate method, to be reused by encode(char[] src, int sp, int len, byte[] dst).
> Some of the re-engineering could be adapted to the Decoder methods.
>
>> I'm surprised we only get 1.6% boost. Maybe it is worth measuring the performance
>> of some "small" buf/array encoding? I'm a little concerned that the overhead may
>> slow down the small size buf/array encoding. There is a simple benchmark for String
>> en/decoding at test/sun/nio/cs/StrCodingBenchmark.
>
> I think we should balance 4 cases in rating the performance:
> a) few loops, small buf/array
> b) few loops, big buf/array
> c) many loops, small buf/array
> d) many loops, big buf/array
> In a), b) the loop surrounding code will not be JIT-compiled, so should be optimized for interpreter.
> In c) d) the loop surrounding code *may be* JIT-compiled and consequtively inline the inner loop, should be verified.
> In b), d) the small inner loop, as demonstrated below, will be JIT-compiled, regardless if moved into separate method.
But you guys noticed that sentence in the initial review request, right?
"Move encoding loop into separate method for which VM will use intrinsic on x86."
Just wanted to make sure ;-)
-- Chris
>
> -Ulf
>
> 1) Check for (sp >= sl) is superfluous.
> ======================================
> private static int copyISOs(
> char[] sa, int sp, byte[] da, int dp, int len) {
> int i = 0;
> for (; i < len; i++) {
> char c = sa[sp++];
> // if (c & '\uFF00' != 0) // needs bug 6935994 to be fixed, would be fastest
> if ((byte)(c >> 8) != 0) // temporary replacement, fast byte-length operation on x86
> break;
> da[dp++] = (byte)c;
> }
> return i;
> }
>
> private CoderResult encodeArrayLoop(
> CharBuffer src, ByteBuffer dst) {
> char[] sa = src.array();
> int soff = src.arrayOffset();
> int sp = soff + src.position();
> int sr = src.remaining();
> int sl = sp + sr;
> byte[] da = dst.array();
> int doff = dst.arrayOffset();
> int dp = doff + dst.position();
> int dr = dst.remaining();
> CoderResult cr;
> if (dr < sr) {
> sr = dr;
> cr = CoderResult.OVERFLOW;
> } else
> cr = CoderResult.UNDERFLOW;
> try {
> int ret = copyISOs(sa, sp, da, dp, sr);
> sp = sp + ret;
> dp = dp + ret;
> if (ret != sr) {
> if (sgp.parse(sa[sp], sa, sp, sl) < 0)
> return sgp.error();
> return sgp.unmappableResult();
> }
> return cr;
> } finally {
> src.position(sp - soff);
> dst.position(dp - doff);
> }
> }
>
> 2) Avoids sp, dp to be recalculated; shorter surrounding code -> best chance to become JIT-compiled.
> ======================================
> // while inlinig, JIT will erase the surrounding int[] p
> private static boolean copyISOs(
> char[] sa, byte[] da, final int[] p, int sl) {
> for (int sp = p[0], dp = p[1]; sp < sl; sp++) {
> char c = sa[sp];
> // if (c & '\uFF00' != 0) // needs bug 6935994 to be fixed, would be fastest
> if ((byte)(c >> 8) != 0) // temporary replacement, fast byte-length operation on x86
> return false;
> da[dp++] = (byte)c;
> }
> return true;
> }
>
> private CoderResult encodeArrayLoop(
> CharBuffer src, ByteBuffer dst) {
> char[] sa = src.array();
> int soff = src.arrayOffset();
> int sp = soff + src.position();
> int sr = src.remaining();
> byte[] da = dst.array();
> int doff = dst.arrayOffset();
> int dp = doff + dst.position();
> int dr = dst.remaining();
> CoderResult cr;
> if (dr < sr) {
> sr = dr;
> cr = CoderResult.OVERFLOW;
> } else
> cr = CoderResult.UNDERFLOW;
> try {
> int sl = sp + sr;
> final int[] p = { sp, dp };
> if (!copyISOs(sa, da, p, sl)) {
> if (sgp.parse(sa[sp], sa, sp, sl) < 0)
> return sgp.error();
> return sgp.unmappableResult();
> }
> return cr;
> } finally {
> src.position(sp - soff);
> dst.position(dp - doff);
> }
> }
>
> 3) No more needs try...finally block.
> ======================================
> private CoderResult encodeArrayLoop(
> CharBuffer src, ByteBuffer dst) {
> char[] sa = src.array();
> int soff = src.arrayOffset();
> int sp = soff + src.position();
> int sr = src.remaining();
> byte[] da = dst.array();
> int doff = dst.arrayOffset();
> int dp = doff + dst.position();
> int dr = dst.remaining();
> CoderResult cr;
> if (dr < sr) {
> sr = dr;
> cr = CoderResult.OVERFLOW;
> } else
> cr = CoderResult.UNDERFLOW;
> int sl = sp + sr;
> for (; sp < sl; sp++) {
> char c = sa[sp];
> // if (c & '\uFF00' != 0) { // needs bug 6935994 to be fixed, would be fastest
> if ((byte)(c >> 8) != 0) { // temporary replacement, fast byte-length operation on x86
> cr = null;
> break;
> }
> da[dp++] = (byte)c;
> }
> src.position(sp - soff);
> dst.position(dp - doff);
> return result(cr, sa, sp, sl);
> }
>
> // if adapted, maybe could also be reused in encodeBufferLoop()
> private static CoderResult result(CoderResult cr, byte[] sa, int sp, int sl) {
> return cr != null ? cr :
> sgp.parse(sa[sp], sa, sp, sl) < 0
> ? sgp.error();
> : sgp.unmappableResult();
> }
>
More information about the hotspot-compiler-dev
mailing list