RFR: 8336856: Optimize String Concat [v16]

Shaojin Wen duke at openjdk.org
Sun Jul 28 09:31:56 UTC 2024


On Thu, 25 Jul 2024 14:52:11 GMT, Shaojin Wen <duke at openjdk.org> wrote:

>> The current implementation of StringConcat is to mix the coder and length into a long. This operation will have some overhead for int/long/boolean types. We can separate the calculation of the coder from the calculation of the length, which can improve the performance in the scenario of concat int/long/boolean.
>> 
>> This idea comes from the suggestion of @l4es in the discussion of PR https://github.com/openjdk/jdk/pull/20253#issuecomment-2240412866
>
> Shaojin Wen has updated the pull request incrementally with two additional commits since the last revision:
> 
>  - remove benchmark
>  - common simpleConcat

Should mixer and prepend add the two types of Integer and Long? Now we need to do integer.toString and then concatenate, which leads to performance degradation.

Now Integer.getChar and Long.getChar are implemented with pepend, and StringConcat is forced to use prepend.

If we use the append-based Integer.getChar and Long.getChar algorithms, then String.Concat can be implemented based on append, which should give us better performance.

Of course, modifying the algorithm of Integer.getChars is a big change, oh, what a hassle.

The following is an algorithm for implementing Integer.getChars based on append:

    static Unsafe UNSAFE = JDKUtils.UNSAFE;
    private static final byte[] MIN_INT_BYTES = "-2147483648".getBytes();
    public static final int[] DIGITS_K_32 = new int[1000];
    static {
        for (int i = 0; i < 1000; i++) {
            int c0 = i < 10 ? 2 : i < 100 ? 1 : 0;
            int c1 = (i / 100) + '0';
            int c2 = ((i / 10) % 10) + '0';
            int c3 = i % 10 + '0';
            DIGITS_K_32[i] = c0 + (c1 << 8) + (c2 << 16) + (c3 << 24);
        }
    }

    @Native
    public static final int   MIN_VALUE = 0x80000000;
    public static int writeInt32(final byte[] buf, int pos, final int value) {
        int i;
        if (value < 0) {
            if (value == Integer.MIN_VALUE) {
                System.arraycopy(MIN_INT_BYTES, 0, buf, pos, MIN_INT_BYTES.length);
                return pos + MIN_INT_BYTES.length;
            }
            i = -value;
            buf[pos++] = '-';
        } else {
            i = value;
        }

        if (i < 1000) {
            int v = DIGITS_K_32[i];
            final int start = (byte) v;
            if (start == 0) {
                putShort(buf, pos, (short) (v >> 8));
                pos += 2;
            } else if (start == 1) {
                buf[pos++] = (byte) (v >> 16);
            }
            buf[pos] = (byte) (v >> 24);
            return pos + 1;
        }

        final int q1 = i / 1000;
        final int r1 = i - q1 * 1000;
        final int v1 = DIGITS_K_32[r1];
        if (i < 1000000) {
            final int v2 = DIGITS_K_32[q1];
            int start = (byte) v2;
            if (start == 0) {
                putShort(buf, pos, (short) (v2 >> 8));
                pos += 2;
            } else if (start == 1) {
                buf[pos++] = (byte) (v2 >> 16);
            }
            putInt(buf, pos, v1 & 0xffffff00 | (v2 >> 24));
            return pos + 4;
        }
        final int q2 = q1 / 1000;
        final int r2 = q1 - q2 * 1000;
        final int q3 = q2 / 1000;
        final int v2 = DIGITS_K_32[r2];
        if (q3 == 0) {
            int v = DIGITS_K_32[q2];
            final int start = (byte) v;
            if (start == 0) {
                putShort(buf, pos, (short) (v >> 8));
                pos += 2;
            } else if (start == 1) {
                buf[pos++] = (byte) (v >> 16);
            }
            buf[pos++] = (byte) (v >> 24);
        } else {
            putInt(buf, pos, DIGITS_K_32[q2 - q3 * 1000] & 0xffffff00 | (q3 + '0'));
            pos += 4;
        }

        putShort(buf, pos, (short) (v2 >> 8));
        putInt(buf, pos + 2, v1 & 0xffffff00 | (v2 >> 24));
        return pos + 6;
    }

    private static void putShort(byte[] buf, int pos, short v) {
        UNSAFE.putShort(
                buf,
                ARRAY_BYTE_BASE_OFFSET + pos,
                BIG_ENDIAN ? Short.reverseBytes(v) : v
        );
    }

    private static void putInt(byte[] buf, int pos, int v) {
        UNSAFE.putInt(
                buf,
                ARRAY_BYTE_BASE_OFFSET + pos,
                BIG_ENDIAN ? Integer.reverseBytes(v) : v
        );
    }

-------------

PR Comment: https://git.openjdk.org/jdk/pull/20273#issuecomment-2254277745
PR Comment: https://git.openjdk.org/jdk/pull/20273#issuecomment-2254354750


More information about the core-libs-dev mailing list