RFR: JDK-8222187: java.util.Base64.Decoder stream adds unexpected null bytes at the end
Raffaello Giulietti
raffaello.giulietti at gmail.com
Fri Jul 3 15:48:12 UTC 2020
Hello,
after Roger's notes, which escaped my attention before, I've withdrawn
all the changes but for DecInputStream, *except* that I couldn't resist
to simplify the maths in encodedOutLength(), while still using
xxxExact() arithmetic.
Sorry for the confusion
Raffaello
> Hi Raffaello,
>
> There is way more code changed here than is needed to fix the bug.
> General enhancement should be separated from bug fixes.
> It makes it easier to review to see the bug was fixed
> and easier to separately review other code to see that there are no
> unexpected changes.
>
> If some of the changes are motivated by expected performance improvements,
> there should be JMH tests comparing the before and after.
> The change to use byte arrays seems useful, but even using char[]
> there is little danger of cache thrashing.
>
> If using the code using xxxExact was correct, don't change it.
> Those methods are intrinsified and perform as well or better than using
> long.
> Usually, it is better to leave code alone and not risk breaking it.
>
> Special care needs taken when changing a method that is intrinsified.
> The optimized version may use fields of the object and stop
> working if they are changed.
>
> In the test, the range of buffer sizes tests seems to waste a lot
> of cycles on sizes greater than the encoded size of the input.
>
> Regards, Roger
---------------------
# HG changeset patch
# User lello
# Date 1593790152 -7200
# Fri Jul 03 17:29:12 2020 +0200
# Node ID 73370832de1d2bf3b450930f5cb2467e10528c69
# Parent a7c0307232406c7b0c1a4b8c2de111077848203d
8222187: java.util.Base64.Decoder stream adds unexpected null bytes at
the end
Reviewed-by: TBD
Contributed-by: Raffaello Giulietti <raffaello.giulietti at gmail.com>
diff --git a/src/java.base/share/classes/java/util/Base64.java
b/src/java.base/share/classes/java/util/Base64.java
--- a/src/java.base/share/classes/java/util/Base64.java
+++ b/src/java.base/share/classes/java/util/Base64.java
@@ -255,14 +255,11 @@
*
*/
private final int encodedOutLength(int srclen, boolean
throwOOME) {
- int len = 0;
+ int len;
try {
- if (doPadding) {
- len = Math.multiplyExact(4, (Math.addExact(srclen,
2) / 3));
- } else {
- int n = srclen % 3;
- len = Math.addExact(Math.multiplyExact(4, (srclen /
3)), (n == 0 ? 0 : n + 1));
- }
+ len = doPadding
+ ? Math.multiplyExact(4, (Math.addExact(srclen,
2) / 3))
+ : Math.addExact((Math.addExact(srclen, 2) / 3),
srclen);
if (linemax > 0) { // line
separators
len = Math.addExact(len, (len - 1) / linemax *
newline.length);
}
@@ -961,14 +958,15 @@
private final InputStream is;
private final boolean isMIME;
- private final int[] base64; // base64 -> byte mapping
- private int bits = 0; // 24-bit buffer for decoding
- private int nextin = 18; // next available "off" in
"bits" for input;
- // -> 18, 12, 6, 0
- private int nextout = -8; // next available "off" in
"bits" for output;
- // -> 8, 0, -8 (no byte for
output)
- private boolean eof = false;
- private boolean closed = false;
+ private final int[] base64; // mapping from alphabet to values
+ private int bits; // 24 bit buffer for decoding
+ private int wpos; // writing bit pos inside bits
+ // one of 24 (left, msb), 18, 12, 6, 0
+ private int rpos; // reading bit pos inside bits
+ // one of 24 (left, msb), 16, 8, 0
+ private boolean eos;
+ private boolean closed;
+ private byte[] onebuf = new byte[1];
DecInputStream(InputStream is, int[] base64, boolean isMIME) {
this.is = is;
@@ -976,114 +974,158 @@
this.isMIME = isMIME;
}
- private byte[] sbBuf = new byte[1];
-
@Override
public int read() throws IOException {
- return read(sbBuf, 0, 1) == -1 ? -1 : sbBuf[0] & 0xff;
+ return read(onebuf, 0, 1) >= 0 ? onebuf[0] & 0xff : -1;
+ }
+
+ private int leftovers(byte[] b, int off, int pos, int limit) {
+ eos = true;
+ /*
+ We use a loop here, as this code is executed only once.
+ Unrolling the loop would probably not contribute much here.
+ */
+ while (rpos - 8 >= wpos && pos != limit) {
+ b[pos++] = (byte) (bits >> (rpos -= 8));
+ }
+ return pos - off != 0 || rpos - 8 >= wpos ? pos - off : -1;
}
- private int eof(byte[] b, int off, int len, int oldOff)
- throws IOException
- {
- eof = true;
- if (nextin != 18) {
- if (nextin == 12)
- throw new IOException("Base64 stream has one
un-decoded dangling byte.");
- // treat ending xx/xxx without padding character legal.
- // same logic as v == '=' below
- b[off++] = (byte)(bits >> (16));
- if (nextin == 0) { // only one padding byte
- if (len == 1) { // no enough output space
- bits >>= 8; // shift to lowest byte
- nextout = 0;
- } else {
- b[off++] = (byte) (bits >> 8);
- }
- }
+ private int eos(byte[] b, int off, int pos, int limit) throws
IOException {
+ /*
+ wpos == 18: x dangling single x, invalid unit
+ accept ending xx or xxx without padding characters
+ */
+ if (wpos == 18) {
+ throw new IOException("Base64 stream has one un-decoded
dangling byte");
}
- return off == oldOff ? -1 : off - oldOff;
+ rpos = 24;
+ return leftovers(b, off, pos, limit);
}
- private int padding(byte[] b, int off, int len, int oldOff)
- throws IOException
- {
- // = shiftto==18 unnecessary padding
- // x= shiftto==12 dangling x, invalid unit
- // xx= shiftto==6 && missing last '='
- // xx=y or last is not '='
- if (nextin == 18 || nextin == 12 ||
- nextin == 6 && is.read() != '=') {
- throw new IOException("Illegal base64 ending sequence:"
+ nextin);
+ private int padding(byte[] b, int off, int pos, int limit)
throws IOException {
+ /*
+ wpos == 24: = (unnecessary padding)
+ wpos == 18: x= (dangling single x, invalid unit)
+ wpos == 12 and missing last '=': xx= (invalid padding)
+ wpos == 12 and last is not '=': xx=x (invalid padding)
+ */
+ if (wpos >= 18 || wpos == 12 && is.read() != '=') {
+ throw new IOException("Illegal base64 ending sequence");
}
- b[off++] = (byte)(bits >> (16));
- if (nextin == 0) { // only one padding byte
- if (len == 1) { // no enough output space
- bits >>= 8; // shift to lowest byte
- nextout = 0;
- } else {
- b[off++] = (byte) (bits >> 8);
- }
- }
- eof = true;
- return off - oldOff;
+ rpos = 24;
+ return leftovers(b, off, pos, limit);
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
- if (closed)
+ if (closed) {
throw new IOException("Stream is closed");
- if (eof && nextout < 0) // eof and no leftover
- return -1;
- if (off < 0 || len < 0 || len > b.length - off)
- throw new IndexOutOfBoundsException();
- int oldOff = off;
- while (nextout >= 0) { // leftover output byte(s) in
bits buf
- if (len == 0)
- return off - oldOff;
- b[off++] = (byte)(bits >> nextout);
- len--;
- nextout -= 8;
+ }
+ Objects.checkFromIndexSize(off, len, b.length);
+ if (len == 0) {
+ return 0;
}
- bits = 0;
- while (len > 0) {
- int v = is.read();
- if (v == -1) {
- return eof(b, off, len, oldOff);
+
+ /*
+ Rather than keeping 2 running vars (e.g., off and len), we only
+ keep one (pos), while definitely fixing the boundaries of
the range
+ [off, limit).
+
+ Note that limit can overflow to Integer.MIN_VALUE. However,
as long
+ as comparisons with pos are done as coded, there's no harm.
+
+ In addition, limit - off (= len) is used from here on, so the
+ location for len can be reallocated to other vars (e.g.,
limit).
+ */
+ int pos = off;
+ int limit = off + len;
+ if (eos) {
+ return leftovers(b, off, pos, limit);
+ }
+
+ /*
+ leftovers from previous invocation; here, wpos = 0.
+ There can be at most 2 leftover bytes (rpos <= 16).
+ Further, the buffer b has at least one free place.
+
+ The logic could be coded as a loop, (as in method leftovers())
+ but the explicit "unrolling" makes it possible to generate
better
+ byte extraction code.
+ */
+ if (rpos == 16) {
+ b[pos++] = (byte) (bits >> 8);
+ rpos = 8;
+ if (pos == limit) {
+ return limit - off;
}
- if ((v = base64[v]) < 0) {
- if (v == -2) { // padding byte(s)
- return padding(b, off, len, oldOff);
- }
- if (v == -1) {
- if (!isMIME)
- throw new IOException("Illegal base64
character " +
- Integer.toString(v, 16));
- continue; // skip if for rfc2045
- }
- // neve be here
- }
- bits |= (v << nextin);
- if (nextin == 0) {
- nextin = 18; // clear for next in
- b[off++] = (byte)(bits >> 16);
- if (len == 1) {
- nextout = 8; // 2 bytes left in bits
- break;
- }
- b[off++] = (byte)(bits >> 8);
- if (len == 2) {
- nextout = 0; // 1 byte left in bits
- break;
- }
- b[off++] = (byte)bits;
- len -= 3;
- bits = 0;
- } else {
- nextin -= 6;
+ }
+ if (rpos == 8) {
+ b[pos++] = (byte) bits;
+ rpos = 0;
+ if (pos == limit) {
+ return limit - off;
}
}
- return off - oldOff;
+
+ bits = 0;
+ wpos = 24;
+ for (;;) {
+ // Here, pos != limit
+ int i = is.read();
+ if (i < 0) {
+ return eos(b, off, pos, limit);
+ }
+ int v = base64[i];
+ if (v < 0) {
+ /*
+ i not in alphabet, thus
+ v = -2: i is '=', the padding
+ v = -1: i is something else, e.g., CR or LF
+ */
+ if (v == -1) {
+ if (isMIME) {
+ continue;
+ }
+ throw new IOException("Illegal base64 byte 0x" +
+ Integer.toHexString(i));
+ }
+ return padding(b, off, pos, limit);
+ }
+ bits |= (v << (wpos -= 6));
+ if (wpos != 0) {
+ continue;
+ }
+ if (limit - pos >= 3) {
+ // frequently taken fast path, no need to track rpos
+ b[pos++] = (byte) (bits >> 16);
+ b[pos++] = (byte) (bits >> 8);
+ b[pos++] = (byte) bits;
+ bits = 0;
+ wpos = 24;
+ if (pos == limit) {
+ return limit - off;
+ }
+ continue;
+ }
+ /*
+ Here, buffer b has either 1 or 2 free places, that is,
+ limit - pos = 1 or limit - pos = 2.
+
+ As above, this could be coded as a loop. But since the
+ shift lengths are explicit multiples of 8, better code
can be
+ probably generated.
+ */
+ b[pos++] = (byte) (bits >> 16);
+ if (pos == limit) {
+ rpos = 16;
+ return limit - off;
+ }
+ b[pos++] = (byte) (bits >> 8);
+ // Here, pos = limit, no need for an if.
+ rpos = 8;
+ return limit - off;
+ }
}
@Override
diff --git a/test/jdk/java/util/Base64/TestBase64.java
b/test/jdk/java/util/Base64/TestBase64.java
--- a/test/jdk/java/util/Base64/TestBase64.java
+++ b/test/jdk/java/util/Base64/TestBase64.java
@@ -144,6 +144,10 @@
testDecoderKeepsAbstinence(Base64.getDecoder());
testDecoderKeepsAbstinence(Base64.getUrlDecoder());
testDecoderKeepsAbstinence(Base64.getMimeDecoder());
+
+ // tests patch addressing JDK-8222187
+ // https://bugs.openjdk.java.net/browse/JDK-8222187
+ testJDK_8222187();
}
private static void test(Base64.Encoder enc, Base64.Decoder dec,
@@ -607,4 +611,26 @@
}
}
}
+
+ private static void testJDK_8222187() throws Throwable {
+ byte[] orig = "12345678".getBytes("US-ASCII");
+ byte[] encoded = Base64.getEncoder().encode(orig);
+ // decode using different buffer sizes
+ for (int bufferSize = 1; bufferSize <= encoded.length + 1;
bufferSize++) {
+ try (
+ InputStream in = Base64.getDecoder().wrap(
+ new ByteArrayInputStream(encoded));
+ ByteArrayOutputStream baos = new
ByteArrayOutputStream();
+ ) {
+ byte[] buffer = new byte[bufferSize];
+ int read;
+ while ((read = in.read(buffer, 0, bufferSize)) >= 0) {
+ baos.write(buffer, 0, read);
+ }
+ // compare result, output info if lengths do not match
+ byte[] decoded = baos.toByteArray();
+ checkEqual(decoded, orig, "Base64 stream decoding
failed!");
+ }
+ }
+ }
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: JDK-8222187.patch
Type: text/x-patch
Size: 14294 bytes
Desc: not available
URL: <https://mail.openjdk.java.net/pipermail/core-libs-dev/attachments/20200703/2bf1ee1d/JDK-8222187-0001.patch>
More information about the core-libs-dev
mailing list