RFR: 8304745: Lazily initialize byte[] in java.io.BufferedInputStream [v3]
Alan Bateman
alanb at openjdk.org
Tue Apr 4 11:54:14 UTC 2023
On Mon, 27 Mar 2023 15:24:05 GMT, Brian Burkhalter <bpb at openjdk.org> wrote:
>> I skimmed through the latest version but there are still several issues. Can you try the patch below instead? This will ensure that read, skip, reset, etc. check buf as before. It also ensures that read1 does the same threshold check as it has always done - your patch changes that in a subtle way and I think we have to be careful with behavior changes to this really old API.
>>
>> To Eirik's comment about benchmarks. This change is about memory footprint and lazily creating the internal buffer. A good example is the BIS on stdin which has an 8k buffer that is only needed if something reads from System.in. There may be a few more like this. The other case is a large read where BIS will delegate to the wrapped stream when it doesn't have any bytes buffered. It would be useful to get some data on cases where it helps (the BIS wrapping BIS only works when the enclosing BIS has the same or larger buffer than the enclosed BIS).
>>
>>
>> --- a/src/java.base/share/classes/java/io/BufferedInputStream.java
>> +++ b/src/java.base/share/classes/java/io/BufferedInputStream.java
>> @@ -57,6 +57,8 @@ public class BufferedInputStream extends FilterInputStream {
>>
>> private static final int DEFAULT_BUFFER_SIZE = 8192;
>>
>> + private static final byte[] EMPTY = new byte[0];
>> +
>> /**
>> * As this class is used early during bootstrap, it's motivated to use
>> * Unsafe.compareAndSetObject instead of AtomicReferenceFieldUpdater
>> @@ -70,6 +72,9 @@ public class BufferedInputStream extends FilterInputStream {
>> // initialized to null when BufferedInputStream is sub-classed
>> private final InternalLock lock;
>>
>> + // initial buffer size (DEFAULT_BUFFER_SIZE or size specified to constructor)
>> + private final int initialSize;
>> +
>> /**
>> * The internal buffer array where the data is stored. When necessary,
>> * it may be replaced by another array of
>> @@ -166,16 +171,42 @@ public class BufferedInputStream extends FilterInputStream {
>> }
>>
>> /**
>> - * Check to make sure that buffer has not been nulled out due to
>> - * close; if not return it;
>> + * Returns the internal buffer, optionally allocating it if empty.
>> + * @param allocateIfEmpty true to allocate if empty
>> + * @throws IOException if the stream is closed (buf is null)
>> */
>> - private byte[] getBufIfOpen() throws IOException {
>> + private byte[] getBufIfOpen(boolean allocateIfEmpty) throws IOException {
>> byte[] buffer = buf;
>> - if (buffer == null)
>> + if (allocateIfEmpty && buffer == EMPTY) {
>> + buffer = new byte[initialSize];
>> + if (!U.compareAndSetReference(this, BUF_OFFSET, EMPTY, buffer)) {
>> + // re-read buf
>> + buffer = buf;
>> + }
>> + }
>> + if (buffer == null) {
>> throw new IOException("Stream closed");
>> + }
>> return buffer;
>> }
>>
>> + /**
>> + * Returns the internal buffer, allocating it if empty.
>> + * @throws IOException if the stream is closed (buf is null)
>> + */
>> + private byte[] getBufIfOpen() throws IOException {
>> + return getBufIfOpen(true);
>> + }
>> +
>> + /**
>> + * Throws IOException if the stream is closed (buf is null).
>> + */
>> + private void ensureOpen() throws IOException {
>> + if (buf == null) {
>> + throw new IOException("Stream closed");
>> + }
>> + }
>> +
>> /**
>> * Creates a {@code BufferedInputStream}
>> * and saves its argument, the input stream
>> @@ -205,13 +236,15 @@ public class BufferedInputStream extends FilterInputStream {
>> if (size <= 0) {
>> throw new IllegalArgumentException("Buffer size <= 0");
>> }
>> - buf = new byte[size];
>> -
>> - // use monitors when BufferedInputStream is sub-classed
>> + initialSize = size;
>> if (getClass() == BufferedInputStream.class) {
>> + // use internal lock and lazily create buffer when not subclassed
>> lock = InternalLock.newLockOrNull();
>> + buf = EMPTY;
>> } else {
>> + // use monitors and eagerly create buffer when subclassed
>> lock = null;
>> + buf = new byte[size];
>> }
>> }
>>
>> @@ -307,7 +340,8 @@ public class BufferedInputStream extends FilterInputStream {
>> if there is no mark/reset activity, do not bother to copy the
>> bytes into the local buffer. In this way buffered streams will
>> cascade harmlessly. */
>> - if (len >= getBufIfOpen().length && markpos == -1) {
>> + int size = Math.max(getBufIfOpen(false).length, initialSize);
>> + if (len >= size && markpos == -1) {
>> return getInIfOpen().read(b, off, len);
>> }
>> fill();
>> @@ -374,7 +408,7 @@ public class BufferedInputStream extends FilterInputStream {
>> }
>>
>> private int implRead(byte[] b, int off, int len) throws IOException {
>> - getBufIfOpen(); // Check for closed stream
>> + ensureOpen();
>> if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
>> throw new IndexOutOfBoundsException();
>> } else if (len == 0) {
>> @@ -421,7 +455,7 @@ public class BufferedInputStream extends FilterInputStream {
>> }
>>
>> private long implSkip(long n) throws IOException {
>> - getBufIfOpen(); // Check for closed stream
>> + ensureOpen();
>> if (n <= 0) {
>> return 0;
>> }
>> @@ -544,7 +578,7 @@ public class BufferedInputStream extends FilterInputStream {
>> }
>>
>> private void implReset() throws IOException {
>> - getBufIfOpen(); // Cause exception if closed
>> + ensureOpen();
>> if (markpos < 0)
>> throw new IOException("Resetting to invalid mark");
>> pos = markpos;
>
>> Can you try the patch below instead? This will ensure that read, skip, reset, etc. check buf as before.
>
> I think that the implementation provided by @AlanBateman looks good.
The BIS changes in this PR looks okay. I can't be Reviewer for that part because I provided the current changes. @bplb tells me that he will review.
I don't have strong views on the benchmark. The main benefit of the lazy allocation is, in my view, for cases where a BIS is created and either not used, or the first usage is long after it is created. We see this with System.in but it's possible that most BIS usages involve a read soon after the BIS is created. The other scenario is the subtle optimization to bypass the internal buffer. This only works in the benchmark because readAllBytes uses a 16k buffer.
-------------
PR Comment: https://git.openjdk.org/jdk/pull/13150#issuecomment-1495839430
More information about the core-libs-dev
mailing list