JDK-8242959 residual cleanup: Move encoding checks into ZipCoder

Eirik Bjørsnøs eirbjo at gmail.com
Sat Jan 16 09:53:29 UTC 2021


Hi,

The following is purely a code cleanup/refactoring proposal for ZipFile
internals:

ZipFile.Source.initCEN currently checks that each entry name is encoded
using bytes valid in the entry's encoding:

if (zc.isUTF8() || (flag & USE_UTF8) != 0) {
          checkUTF8(cen, pos + CENHDR, nlen);
} else {
           checkEncoding(zc, cen, pos + CENHDR, nlen);
}

I find this unnecessarily complicated for a couple of reasons:

1: The if repeats flag checking logic which already exists in zipCoderForPos
2: checkUTF and checkEncoding are encoding-specific methods and as such
would have a better home in ZipCoder

As a cleanup, I propose that we move checkEncoding into ZipCoder and make
checkUTF an override of checkEncoding in UTF8ZipCoder.

Here's a patch which implements the proposal. Sponsors are welcome :-)

diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java
b/src/java.base/share/classes/java/util/zip/ZipCoder.java
index 13012aab0f6..cb1ab256891 100644
--- a/src/java.base/share/classes/java/util/zip/ZipCoder.java
+++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java
@@ -54,6 +54,14 @@ class ZipCoder {
         return new ZipCoder(charset);
     }

+    void checkEncoding(byte[] a, int pos, int nlen) throws ZipException {
+        try {
+            toString(a, pos, nlen);
+        } catch(Exception e) {
+            throw new ZipException("invalid CEN header (bad entry name)");
+        }
+    }
+
     String toString(byte[] ba, int off, int length) {
         try {
             return decoder().decode(ByteBuffer.wrap(ba, off,
length)).toString();
@@ -203,6 +211,25 @@ class ZipCoder {
             return true;
         }

+        @Override
+        void checkEncoding(byte[] a, int pos, int len) throws ZipException
{
+            try {
+                int end = pos + len;
+                while (pos < end) {
+                    // ASCII fast-path: When checking that a range of
bytes is
+                    // valid UTF-8, we can avoid some allocation by
skipping
+                    // past bytes in the 0-127 range
+                    if (a[pos] < 0) {
+                        ZipCoder.toStringUTF8(a, pos, end - pos);
+                        break;
+                    }
+                    pos++;
+                }
+            } catch(Exception e) {
+                throw new ZipException("invalid CEN header (bad entry
name)");
+            }
+        }
+
         @Override
         String toString(byte[] ba, int off, int length) {
             return JLA.newStringUTF8NoRepl(ba, off, length);
diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java
b/src/java.base/share/classes/java/util/zip/ZipFile.java
index 4c53855abfe..07edc06999b 100644
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -1309,31 +1309,6 @@ public class ZipFile implements ZipConstants,
Closeable {
             }
         }

-        private static final void checkUTF8(byte[] a, int pos, int len)
throws ZipException {
-            try {
-                int end = pos + len;
-                while (pos < end) {
-                    // ASCII fast-path: When checking that a range of
bytes is
-                    // valid UTF-8, we can avoid some allocation by
skipping
-                    // past bytes in the 0-127 range
-                    if (a[pos] < 0) {
-                        ZipCoder.toStringUTF8(a, pos, end - pos);
-                        break;
-                    }
-                    pos++;
-                }
-            } catch(Exception e) {
-                zerror("invalid CEN header (bad entry name)");
-            }
-        }
-
-        private final void checkEncoding(ZipCoder zc, byte[] a, int pos,
int nlen) throws ZipException {
-            try {
-                zc.toString(a, pos, nlen);
-            } catch(Exception e) {
-                zerror("invalid CEN header (bad entry name)");
-            }
-        }

         private static class End {
             int  centot;     // 4 bytes
@@ -1520,13 +1495,10 @@ public class ZipFile implements ZipConstants,
Closeable {
                     zerror("invalid CEN header (bad compression method: "
+ method + ")");
                 if (entryPos + nlen > limit)
                     zerror("invalid CEN header (bad header size)");
-                if (zc.isUTF8() || (flag & USE_UTF8) != 0) {
-                    checkUTF8(cen, pos + CENHDR, nlen);
-                } else {
-                    checkEncoding(zc, cen, pos + CENHDR, nlen);
-                }
+                ZipCoder zcp = zipCoderForPos(pos);
+                zcp.checkEncoding(cen, pos + CENHDR, nlen);
                 // Record the CEN offset and the name hash in our hash
cell.
-                hash = zipCoderForPos(pos).normalizedHash(cen, entryPos,
nlen);
+                hash = zcp.normalizedHash(cen, entryPos, nlen);
                 hsh = (hash & 0x7fffffff) % tablelen;
                 next = table[hsh];
                 table[hsh] = idx;


More information about the core-libs-dev mailing list