RFR: 8351443: Improve robustness of StringBuilder

Wed Apr 30 16:19:45 UTC 2025

On Wed, 30 Apr 2025 14:12:36 GMT, Roger Riggs <rriggs at openjdk.org> wrote:

> Refactor AbstractStringBuilder to maintain consistency among count, coder, and value buffers while the buffer capacity is being expanded and/or inflated from Latin1 to UTF16 representations. 
> The refactoring pattern is to read and write AbstractStringBuilder fields once using locals for all intermediate values. 
> Support methods are static, designed to pass all values as arguments and return a value.
> 
> The value byte array is reallocated under 3 conditions:
> - Increasing the capacity with the same encoder
> - Increasing the capacity and inflation to change the coder from LATIN1 to UTF16
> - Inflation with the same capacity
> 
> Added StressSBTest to exercise public instance methods of StringBuilder.

StringLatin1 also provides the same putCharsAt method as StringUTF16, which may look better in appendNull and append(boolean), such as this

class AbstractStringBuilder {
    private AbstractStringBuilder appendNull() {
        byte coder = this.coder;
        int count = this.count;
        byte[] value = ensureCapacitySameCoder(this.value, coder, count + 4);
        if (isLatin1(coder)) {
            count = StringLatin1.putCharsAt(value, count, 'n', 'u', 'l', 'l');
        } else {
            count = StringUTF16.putCharsAt(value, count, 'n', 'u', 'l', 'l');
        }
        this.count = count;
        this.value = value;
        return this;
    }

    public AbstractStringBuilder append(boolean b) {
        byte coder = this.coder;
        int count = this.count;

        int newCount = count + (b ? 4 : 5);
        byte[] value = ensureCapacitySameCoder(this.value, coder, newCount);
        if (isLatin1(coder)) {
            if (b) {
                newCount = StringLatin1.putCharsAt(value, count, 't', 'r', 'u', 'e');
            } else {
                newCount = StringLatin1.putCharsAt(value, count, 'f', 'a', 'l', 's', 'e');
            }
        } else {
            if (b) {
                newCount = StringUTF16.putCharsAt(value, count, 't', 'r', 'u', 'e');
            } else {
                newCount = StringUTF16.putCharsAt(value, count, 'f', 'a', 'l', 's', 'e');
            }
        }
        this.value = value;
        this.count = newCount;
        return this;
    }
}

StringLatin1 also provides the same putCharsAt method as StringUTF16

final class StringLatin1 {
    public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
        value[i    ] = (byte)(c1);
        value[i + 1] = (byte)(c2);
        value[i + 2] = (byte)(c3);
        value[i + 3] = (byte)(c4);
        return end;
    }

    public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
        int end = i + 5;
        value[i    ] = (byte)(c1);
        value[i + 1] = (byte)(c2);
        value[i + 2] = (byte)(c3);
        value[i + 3] = (byte)(c4);
        value[i + 4] = (byte)(c5);
        return end;
    }
}

StringUTF16 uses `+1 +2 +3 +4` to replace `++`

class StringUTF16 {
    public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
        int end = i + 4;
        checkBoundsBeginEnd(i, end, value);
        putChar(value, i    , c1);
        putChar(value, i + 1, c2);
        putChar(value, i + 2, c3);
        putChar(value, i + 3, c4);
        assert(i + 4 == end);
        return end;
    }

    public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
        int end = i + 5;
        checkBoundsBeginEnd(i, end, value);
        putChar(value, i    , c1);
        putChar(value, i + 1, c2);
        putChar(value, i + 2, c3);
        putChar(value, i + 3, c4);
        putChar(value, i + 4, c5);
        assert(i + 5 == end);
        return end;
    }
}

-------------

PR Comment: https://git.openjdk.org/jdk/pull/24967#issuecomment-2842531659