RFR: JDK-8222187: java.util.Base64.Decoder stream adds unexpected null bytes at the end

Roger Riggs Roger.Riggs at oracle.com
Wed Jul 15 20:21:08 UTC 2020


Hi Raffaello,

Base64.java:
2: Please update 2nd copyright year to 2020

leftovers():
996: "&" the shortcutting && is more typical in a condition.
997: the -= is buried in an expression and a reader might miss it.


1053:  "can be reallocated to other vars":  not a useful comment, 
reflecting on only one possible implementation that is out of the 
control of the developer.
I'd almost rather see 'len' than 'limit - off'; it might be informative 
to the reader if 'limit' was declared final. (1056:)

TestBase54.java:

2: Update 2nd copyright year to 2020

27:  Please add the bug number to the @bug line.

Style-wise, I would remove the blank lines at the beginning of blocks.  
(1095, 1115)

Thanks, Roger


On 7/14/20 11:47 AM, Raffaello Giulietti wrote:
> Hi Roger,
>
> here's the latest version of the patch.
>
> I did:
> * Withdraw the simplification in encodedOutLength(), as it is indeed 
> out of scope for this bug fix.
> * Restore field names in DecInputStream except for nextin (now wpos) 
> and nextout (now rpos) because they have slightly different semantics. 
> That's just for clarity. I would have nothing against reusing the old 
> names for the proposed purpose.
> * Switch back to the original no-arg read() implementation.
> * Adjust comment continuation lines.
> * Preserve the proposed logic of read(byte[], int, int) and the 
> supporting methods.
>
> Others needed changes are:
> * Correct the copyright years: that's something better left to Oracle.
> * Remove the apparently useless import at L33. I could build and run 
> without it.
>
>
> Greetings
> Raffaello
>
> ----
>
> # HG changeset patch
> # User lello
> # Date 1594741356 -7200
> #      Tue Jul 14 17:42:36 2020 +0200
> # Node ID 2bebe2aced4c3fa3b42b3c6ee445f9b7b0d20b5d
> # Parent  a5649ebf94193770ca763391dd63807059d333b4
> 8222187: java.util.Base64.Decoder stream adds unexpected null bytes at 
> the end
> Reviewed-by: TBD
> Contributed-by: Raffaello Giulietti <raffaello.giulietti at gmail.com>
>
> diff --git a/src/java.base/share/classes/java/util/Base64.java 
> b/src/java.base/share/classes/java/util/Base64.java
> --- a/src/java.base/share/classes/java/util/Base64.java
> +++ b/src/java.base/share/classes/java/util/Base64.java
> @@ -961,12 +961,15 @@
>
>          private final InputStream is;
>          private final boolean isMIME;
> -        private final int[] base64;      // base64 -> byte mapping
> -        private int bits = 0;            // 24-bit buffer for decoding
> -        private int nextin = 18;         // next available "off" in 
> "bits" for input;
> -                                         // -> 18, 12, 6, 0
> -        private int nextout = -8;        // next available "off" in 
> "bits" for output;
> -                                         // -> 8, 0, -8 (no byte for 
> output)
> +        private final int[] base64;     // base64 -> byte mapping
> +        private int bits = 0;           // 24-bit buffer for decoding
> +
> +        /* writing bit pos inside bits; one of 24 (left, msb), 18, 
> 12, 6, 0 */
> +        private int wpos = 0;
> +
> +        /* reading bit pos inside bits: one of 24 (left, msb), 16, 8, 
> 0 */
> +        private int rpos = 0;
> +
>          private boolean eof = false;
>          private boolean closed = false;
>
> @@ -983,107 +986,156 @@
>              return read(sbBuf, 0, 1) == -1 ? -1 : sbBuf[0] & 0xff;
>          }
>
> -        private int eof(byte[] b, int off, int len, int oldOff)
> -            throws IOException
> -        {
> +        private int leftovers(byte[] b, int off, int pos, int limit) {
>              eof = true;
> -            if (nextin != 18) {
> -                if (nextin == 12)
> -                    throw new IOException("Base64 stream has one 
> un-decoded dangling byte.");
> -                // treat ending xx/xxx without padding character legal.
> -                // same logic as v == '=' below
> -                b[off++] = (byte)(bits >> (16));
> -                if (nextin == 0) {           // only one padding byte
> -                    if (len == 1) {          // no enough output space
> -                        bits >>= 8;          // shift to lowest byte
> -                        nextout = 0;
> -                    } else {
> -                        b[off++] = (byte) (bits >>  8);
> -                    }
> -                }
> +
> +            /*
> +             * We use a loop here, as this method is executed only a 
> few times.
> +             * Unrolling the loop would probably not contribute much 
> here.
> +             */
> +            while (rpos - 8 >= wpos & pos != limit) {
> +                b[pos++] = (byte) (bits >> (rpos -= 8));
>              }
> -            return off == oldOff ? -1 : off - oldOff;
> +            return pos - off != 0 || rpos - 8 >= wpos ? pos - off : -1;
>          }
>
> -        private int padding(byte[] b, int off, int len, int oldOff)
> -            throws IOException
> -        {
> -            // =     shiftto==18 unnecessary padding
> -            // x=    shiftto==12 dangling x, invalid unit
> -            // xx=   shiftto==6 && missing last '='
> -            // xx=y  or last is not '='
> -            if (nextin == 18 || nextin == 12 ||
> -                nextin == 6 && is.read() != '=') {
> -                throw new IOException("Illegal base64 ending 
> sequence:" + nextin);
> +        private int eof(byte[] b, int off, int pos, int limit) throws 
> IOException {
> +            /*
> +             * pos != limit
> +             *
> +             * wpos == 18: x     dangling single x, invalid unit
> +             * accept ending xx or xxx without padding characters
> +             */
> +            if (wpos == 18) {
> +                throw new IOException("Base64 stream has one 
> un-decoded dangling byte.");
>              }
> -            b[off++] = (byte)(bits >> (16));
> -            if (nextin == 0) {           // only one padding byte
> -                if (len == 1) {          // no enough output space
> -                    bits >>= 8;          // shift to lowest byte
> -                    nextout = 0;
> -                } else {
> -                    b[off++] = (byte) (bits >>  8);
> -                }
> +            rpos = 24;
> +            return leftovers(b, off, pos, limit);
> +        }
> +
> +        private int padding(byte[] b, int off, int pos, int limit) 
> throws IOException {
> +            /*
> +             * pos != limit
> +             *
> +             * wpos == 24: =    (unnecessary padding)
> +             * wpos == 18: x=   (dangling single x, invalid unit)
> +             * wpos == 12 and missing last '=': xx=  (invalid padding)
> +             * wpos == 12 and last is not '=': xx=x (invalid padding)
> +             */
> +            if (wpos >= 18 || wpos == 12 && is.read() != '=') {
> +                throw new IOException("Illegal base64 ending 
> sequence:" + wpos);
>              }
> -            eof = true;
> -            return off - oldOff;
> +            rpos = 24;
> +            return leftovers(b, off, pos, limit);
>          }
>
>          @Override
>          public int read(byte[] b, int off, int len) throws IOException {
> -            if (closed)
> +            if (closed) {
>                  throw new IOException("Stream is closed");
> -            if (eof && nextout < 0)    // eof and no leftover
> -                return -1;
> -            if (off < 0 || len < 0 || len > b.length - off)
> -                throw new IndexOutOfBoundsException();
> -            int oldOff = off;
> -            while (nextout >= 0) {       // leftover output byte(s) 
> in bits buf
> -                if (len == 0)
> -                    return off - oldOff;
> -                b[off++] = (byte)(bits >> nextout);
> -                len--;
> -                nextout -= 8;
> +            }
> +            Objects.checkFromIndexSize(off, len, b.length);
> +            if (len == 0) {
> +                return 0;
>              }
> -            bits = 0;
> -            while (len > 0) {
> -                int v = is.read();
> -                if (v == -1) {
> -                    return eof(b, off, len, oldOff);
> +
> +            /*
> +             * Rather than keeping 2 running vars (e.g., off and len),
> +             * we only keep one (pos), while definitely fixing the 
> boundaries
> +             * of the range [off, limit).
> +             * More specifically, each use of pos as an index in b, 
> it meets
> +             *      pos - off >= 0 & limit - pos > 0
> +             *
> +             * Note that limit can overflow to Integer.MIN_VALUE. 
> However,
> +             * as long as comparisons with pos are as coded, there's 
> no harm.
> +             *
> +             * In addition, limit - off (= len) is used from here on, 
> so the
> +             * space for len can be reallocated to other vars (e.g., 
> limit).
> +             */
> +            int pos = off;
> +            int limit = off + len;
> +            if (eof) {
> +                return leftovers(b, off, pos, limit);
> +            }
> +
> +            /*
> +             * Leftovers from previous invocation; here, wpos = 0.
> +             * There can be at most 2 leftover bytes (rpos <= 16).
> +             * Further, b has at least one free place.
> +             *
> +             * The logic could be coded as a loop, (as in method 
> leftovers())
> +             * but the explicit "unrolling" makes it possible to 
> generate
> +             * better byte extraction code.
> +             */
> +            if (rpos == 16) {
> +                b[pos++] = (byte) (bits >> 8);
> +                rpos = 8;
> +                if (pos == limit) {
> +                    return limit - off;
>                  }
> -                if ((v = base64[v]) < 0) {
> -                    if (v == -2) {       // padding byte(s)
> -                        return padding(b, off, len, oldOff);
> -                    }
> -                    if (v == -1) {
> -                        if (!isMIME)
> -                            throw new IOException("Illegal base64 
> character " +
> -                                Integer.toString(v, 16));
> -                        continue;        // skip if for rfc2045
> -                    }
> -                    // neve be here
> -                }
> -                bits |= (v << nextin);
> -                if (nextin == 0) {
> -                    nextin = 18;         // clear for next in
> -                    b[off++] = (byte)(bits >> 16);
> -                    if (len == 1) {
> -                        nextout = 8;    // 2 bytes left in bits
> -                        break;
> -                    }
> -                    b[off++] = (byte)(bits >> 8);
> -                    if (len == 2) {
> -                        nextout = 0;    // 1 byte left in bits
> -                        break;
> -                    }
> -                    b[off++] = (byte)bits;
> -                    len -= 3;
> -                    bits = 0;
> -                } else {
> -                    nextin -= 6;
> +            }
> +            if (rpos == 8) {
> +                b[pos++] = (byte) bits;
> +                rpos = 0;
> +                if (pos == limit) {
> +                    return limit - off;
>                  }
>              }
> -            return off - oldOff;
> +
> +            bits = 0;
> +            wpos = 24;
> +            for (;;) {
> +                /* pos != limit & rpos == 0 */
> +                int i = is.read();
> +                if (i < 0) {
> +                    return eof(b, off, pos, limit);
> +                }
> +                int v = base64[i];
> +                if (v < 0) {
> +
> +                    /*
> +                     * i not in alphabet, thus
> +                     *      v == -2: i is '=', the padding
> +                     *      v == -1: i is something else, typically 
> CR or LF
> +                     */
> +                    if (v == -1) {
> +                        if (isMIME) {
> +                            continue;
> +                        }
> +                        throw new IOException("Illegal base64 
> character 0x" +
> +                                Integer.toHexString(i));
> +                    }
> +                    return padding(b, off, pos, limit);
> +                }
> +                bits |= (v << (wpos -= 6));
> +                if (wpos != 0) {
> +                    continue;
> +                }
> +                if (limit - pos >= 3) {
> +
> +                    /* frequently taken fast path, no need to track 
> rpos */
> +                    b[pos++] = (byte) (bits >> 16);
> +                    b[pos++] = (byte) (bits >> 8);
> +                    b[pos++] = (byte) bits;
> +                    bits = 0;
> +                    wpos = 24;
> +                    if (pos == limit) {
> +                        return limit - off;
> +                    }
> +                    continue;
> +                }
> +
> +                /* b has either 1 or 2 free places */
> +                b[pos++] = (byte) (bits >> 16);
> +                if (pos == limit) {
> +                    rpos = 16;
> +                    return limit - off;
> +                }
> +                b[pos++] = (byte) (bits >> 8);
> +                /* pos == limit, no need for an if */
> +                rpos = 8;
> +                return limit - off;
> +            }
>          }
>
>          @Override
> diff --git a/test/jdk/java/util/Base64/TestBase64.java 
> b/test/jdk/java/util/Base64/TestBase64.java
> --- a/test/jdk/java/util/Base64/TestBase64.java
> +++ b/test/jdk/java/util/Base64/TestBase64.java
> @@ -144,6 +144,10 @@
>          testDecoderKeepsAbstinence(Base64.getDecoder());
>          testDecoderKeepsAbstinence(Base64.getUrlDecoder());
>          testDecoderKeepsAbstinence(Base64.getMimeDecoder());
> +
> +        // tests patch addressing JDK-8222187
> +        // https://bugs.openjdk.java.net/browse/JDK-8222187
> +        testJDK_8222187();
>      }
>
>      private static void test(Base64.Encoder enc, Base64.Decoder dec,
> @@ -607,4 +611,26 @@
>              }
>          }
>      }
> +
> +    private static void testJDK_8222187() throws Throwable {
> +        byte[] orig = "12345678".getBytes("US-ASCII");
> +        byte[] encoded = Base64.getEncoder().encode(orig);
> +        // decode using different buffer sizes, up to a longer one 
> than needed
> +        for (int bufferSize = 1; bufferSize <= encoded.length + 1; 
> bufferSize++) {
> +            try (
> +                    InputStream in = Base64.getDecoder().wrap(
> +                            new ByteArrayInputStream(encoded));
> +                    ByteArrayOutputStream baos = new 
> ByteArrayOutputStream();
> +            ) {
> +                byte[] buffer = new byte[bufferSize];
> +                int read;
> +                while ((read = in.read(buffer, 0, bufferSize)) >= 0) {
> +                    baos.write(buffer, 0, read);
> +                }
> +                // compare result, output info if lengths do not match
> +                byte[] decoded = baos.toByteArray();
> +                checkEqual(decoded, orig, "Base64 stream decoding 
> failed!");
> +            }
> +        }
> +    }
>  }
>



More information about the core-libs-dev mailing list