Improving ZipFile.getEntryPos double loop queries

Eirik Bjørsnøs eirbjo at gmail.com
Thu Apr 16 07:35:50 UTC 2020


Hi,

ZipEntry.getEntryPos currently has a double loop which retries search after
a failed lookup by appending a '/' to the name/hash. This means that any
miss needs to query the hash table twice.

The following patch updates hashN to truncate any '/' at the end of an
entry name. This ensures that the hash of META-INF and META-INF/ will
always collide and the double query loop is no longer needed.

 I'm seeing ~35% time saving on misses and a slight improvement on hits.
Removing the double loop also improves readability.

diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java
b/src/java.base/share/classes/java/util/zip/ZipFile.java
index 274e8788d1..fd6a4e276c 100644
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -1266,6 +1266,9 @@ public class ZipFile implements ZipConstants,
Closeable {
         }

         private static final int hashN(byte[] a, int off, int len) {
+            if(a[off + len -1] == '/') {
+                len--;
+            }
             int h = 1;
             while (len-- > 0) {
                 h = 31 * h + a[off++];
@@ -1493,48 +1496,31 @@ public class ZipFile implements ZipConstants,
Closeable {
             int hsh = hashN(name, 0, name.length);
             int idx = table[(hsh & 0x7fffffff) % tablelen];
             /*
-             * This while loop is an optimization where a double lookup
-             * for name and name+/ is being performed. The name char
-             * array has enough room at the end to try again with a
-             * slash appended if the first table lookup does not succeed.
+             * Search down the target hash chain for a entry whose
+             * 32 bit hash matches the hashed name.
              */
-            while (true) {
-                /*
-                 * Search down the target hash chain for a entry whose
-                 * 32 bit hash matches the hashed name.
-                 */
-                while (idx != ZIP_ENDCHAIN) {
-                    if (getEntryHash(idx) == hsh) {
-                        // The CEN name must match the specfied one
-                        int pos = getEntryPos(idx);
-                        if (name.length == CENNAM(cen, pos)) {
-                            boolean matched = true;
-                            int nameoff = pos + CENHDR;
-                            for (int i = 0; i < name.length; i++) {
-                                if (name[i] != cen[nameoff++]) {
-                                    matched = false;
-                                    break;
-                                }
-                            }
-                            if (matched) {
-                                return pos;
+            while (idx != ZIP_ENDCHAIN) {
+                if (getEntryHash(idx) == hsh) {
+                    // The CEN name must match the specfied one
+                    int pos = getEntryPos(idx);
+                    final int nlen = CENNAM(cen, pos);
+                    if (name.length == nlen || (addSlash && name.length ==
nlen - 1 && cen[pos + CENHDR + nlen - 1] == '/')) {
+                        boolean matched = true;
+                        int nameoff = pos + CENHDR;
+                        for (int i = 0; i < name.length; i++) {
+                            if (name[i] != cen[nameoff++]) {
+                                matched = false;
+                                break;
                             }
-                         }
+                        }
+                        if (matched) {
+                            return pos;
+                        }
                     }
-                    idx = getEntryNext(idx);
-                }
-                /* If not addSlash, or slash is already there, we are done
*/
-                if (!addSlash  || name.length == 0 || name[name.length -
1] == '/') {
-                     return -1;
                 }
-                /* Add slash and try once more */
-                name = Arrays.copyOf(name, name.length + 1);
-                name[name.length - 1] = '/';
-                hsh = hash_append(hsh, (byte)'/');
-                //idx = table[hsh % tablelen];
-                idx = table[(hsh & 0x7fffffff) % tablelen];
-                addSlash = false;
+                idx = getEntryNext(idx);
             }
+            return -1;
         }

         /**


More information about the core-libs-dev mailing list