RFR: 8322256: Define and document GZIPInputStream concatenated stream semantics [v5]
Jaikiran Pai
jpai at openjdk.org
Mon Jul 15 13:14:56 UTC 2024
On Mon, 15 Jul 2024 10:06:20 GMT, Jaikiran Pai <jpai at openjdk.org> wrote:
>> Archie Cobbs has updated the pull request with a new target base due to a merge or a rebase. The pull request now contains 12 commits:
>>
>> - Bump @since from 23 to 24.
>> - Merge branch 'master' into JDK-8322256
>> - Relabel "trailing garbage" as "extra bytes" to sound less accusatory.
>> - Simplify code by eliminating an impossible case.
>> - Field name change and Javadoc wording tweaks.
>> - Merge branch 'master' into JDK-8322256
>> - Javadoc wording tweaks.
>> - Merge branch 'master' into JDK-8322256
>> - Clarify exceptions: sometimes ZipException, sometimes EOFException.
>> - Merge branch 'master' into JDK-8322256
>> - ... and 2 more: https://git.openjdk.org/jdk/compare/75dc2f85...f845a75b
>
> src/java.base/share/classes/java/util/zip/GZIPInputStream.java line 153:
>
>> 151: */
>> 152: public GZIPInputStream(InputStream in, int size,
>> 153: boolean allowConcatenation, boolean ignoreExtraBytes) throws IOException {
>
> I haven't reviewed the javadoc changes. I will do that separately once we settle down on the API and implementation changes.
> As for this new constructor, I think the only new parameter we should introduce is whether or not the underlying input stream `in` is expected/allowed to have concatenated GZIP stream(s). So I think we should remove the "ignoreExtraBytes" new parameters from this constructor and. Additionally, I think the `allowConcatenation` should instead be named `allowConcatenatedGZIPStream`:
>
>
> public GZIPInputStream(InputStream in, int size, boolean allowConcatenatedGZIPStream) throws IOException {
Just to provide a more concrete input, here's a very minimal tested version of what I had in mind:
--- a/src/java.base/share/classes/java/util/zip/GZIPInputStream.java
+++ b/src/java.base/share/classes/java/util/zip/GZIPInputStream.java
@@ -55,6 +55,8 @@ public class GZIPInputStream extends InflaterInputStream {
private boolean closed = false;
+ private final boolean allowConcatenatedGZIPStream;
+
/**
* Check to make sure that this stream has not been closed
*/
@@ -76,14 +78,7 @@ private void ensureOpen() throws IOException {
* @throws IllegalArgumentException if {@code size <= 0}
*/
public GZIPInputStream(InputStream in, int size) throws IOException {
- super(in, createInflater(in, size), size);
- usesDefaultInflater = true;
- try {
- readHeader(in);
- } catch (IOException ioe) {
- this.inf.end();
- throw ioe;
- }
+ this(in, size, true);
}
/*
@@ -111,7 +106,28 @@ private static Inflater createInflater(InputStream in, int size) {
* @throws IOException if an I/O error has occurred
*/
public GZIPInputStream(InputStream in) throws IOException {
- this(in, 512);
+ this(in, 512, true);
+ }
+
+ /**
+ * WIP
+ * @param in the input stream
+ * @param size the input buffer size
+ * @param allowConcatenatedGZIPStream true if the input stream is allowed to contain
+ * concatenated GZIP streams. false otherwise
+ * @throws IOException if an I/O error has occurred
+ */
+ public GZIPInputStream(InputStream in, int size, boolean allowConcatenatedGZIPStream)
+ throws IOException {
+ super(in, createInflater(in, size), size);
+ this.allowConcatenatedGZIPStream = allowConcatenatedGZIPStream;
+ usesDefaultInflater = true;
+ try {
+ readHeader(in);
+ } catch (IOException ioe) {
+ this.inf.end();
+ throw ioe;
+ }
}
/**
@@ -150,10 +166,45 @@ public int read(byte[] buf, int off, int len) throws IOException {
}
int n = super.read(buf, off, len);
if (n == -1) {
- if (readTrailer())
+ // reading of deflated data is now complete, we now read the GZIP stream trailer
+ readTrailer();
+ if (!allowConcatenatedGZIPStream) {
eos = true;
- else
+ } else {
+ // This GZIPInputStream instance was created to allow potential
+ // concatenated GZIP stream, so we now try and read the next
+ // GZIP stream header, if any.
+
+ // use any un-inflated remaining bytes from the stream
+ InputStream headerIS = in;
+ int remainingUnInflated = inf.getRemaining();
+ if (remainingUnInflated > 0) {
+ headerIS = new SequenceInputStream(
+ new ByteArrayInputStream(this.buf, this.len - remainingUnInflated,
+ remainingUnInflated),
+ new FilterInputStream(in) {
+ public void close() throws IOException {}
+ });
+ }
+ int numHeaderBytes;
+ try {
+ numHeaderBytes = readHeader(headerIS); // next GZIP stream header, if any
+ } catch (EOFException _) {
+ // TODO: somehow verify that this failed when reading the
+ // first byte of the header?
+ eos = true;
+ return n; // -1
+ }
+ // reset the inflater to read the deflated content of the next GZIP stream
+ inf.reset();
+ // adjust the input to the inflater correctly past the GZIP stream header
+ if (remainingUnInflated > numHeaderBytes) {
+ inf.setInput(this.buf, this.len - remainingUnInflated + numHeaderBytes,
+ remainingUnInflated - numHeaderBytes);
+ }
+ // read the next GZIP stream's deflated data
return this.read(buf, off, len);
+ }
} else {
crc.update(buf, off, n);
}
@@ -242,12 +293,12 @@ private int readHeader(InputStream this_in) throws IOException {
* reached, false if there are more (concatenated gzip
* data set)
*/
- private boolean readTrailer() throws IOException {
+ private void readTrailer() throws IOException {
InputStream in = this.in;
- int n = inf.getRemaining();
- if (n > 0) {
+ int remainingUnInflated = inf.getRemaining();
+ if (remainingUnInflated > 0) {
in = new SequenceInputStream(
- new ByteArrayInputStream(buf, len - n, n),
+ new ByteArrayInputStream(buf, len - remainingUnInflated, remainingUnInflated),
new FilterInputStream(in) {
public void close() throws IOException {}
});
@@ -255,20 +306,15 @@ public void close() throws IOException {}
// Uses left-to-right evaluation order
if ((readUInt(in) != crc.getValue()) ||
// rfc1952; ISIZE is the input size modulo 2^32
- (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL)))
+ (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL))) {
throw new ZipException("Corrupt GZIP trailer");
-
- // try concatenated case
- int m = 8; // this.trailer
- try {
- m += readHeader(in); // next.header
- } catch (IOException ze) {
- return true; // ignore any malformed, do nothing
}
+ int numTrailerBytes = 8;
inf.reset();
- if (n > m)
- inf.setInput(buf, len - n + m, n - m);
- return false;
+ if (remainingUnInflated > numTrailerBytes) {
+ inf.setInput(buf, len - remainingUnInflated + numTrailerBytes,
+ remainingUnInflated - numTrailerBytes);
+ }
}
/*
It still has one or two unanswered questions that I haven't fully thought of, but I thought this diff might be useful while discussing these changes.
-------------
PR Review Comment: https://git.openjdk.org/jdk/pull/18385#discussion_r1677807848
More information about the core-libs-dev
mailing list