Improving ZipFile.getEntryPos double loop queries
Eirik Bjørsnøs
eirbjo at gmail.com
Thu Apr 16 07:35:50 UTC 2020
Hi,
ZipEntry.getEntryPos currently has a double loop which retries search after
a failed lookup by appending a '/' to the name/hash. This means that any
miss needs to query the hash table twice.
The following patch updates hashN to truncate any '/' at the end of an
entry name. This ensures that the hash of META-INF and META-INF/ will
always collide and the double query loop is no longer needed.
I'm seeing ~35% time saving on misses and a slight improvement on hits.
Removing the double loop also improves readability.
diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java
b/src/java.base/share/classes/java/util/zip/ZipFile.java
index 274e8788d1..fd6a4e276c 100644
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -1266,6 +1266,9 @@ public class ZipFile implements ZipConstants,
Closeable {
}
private static final int hashN(byte[] a, int off, int len) {
+ if(a[off + len -1] == '/') {
+ len--;
+ }
int h = 1;
while (len-- > 0) {
h = 31 * h + a[off++];
@@ -1493,48 +1496,31 @@ public class ZipFile implements ZipConstants,
Closeable {
int hsh = hashN(name, 0, name.length);
int idx = table[(hsh & 0x7fffffff) % tablelen];
/*
- * This while loop is an optimization where a double lookup
- * for name and name+/ is being performed. The name char
- * array has enough room at the end to try again with a
- * slash appended if the first table lookup does not succeed.
+ * Search down the target hash chain for a entry whose
+ * 32 bit hash matches the hashed name.
*/
- while (true) {
- /*
- * Search down the target hash chain for a entry whose
- * 32 bit hash matches the hashed name.
- */
- while (idx != ZIP_ENDCHAIN) {
- if (getEntryHash(idx) == hsh) {
- // The CEN name must match the specfied one
- int pos = getEntryPos(idx);
- if (name.length == CENNAM(cen, pos)) {
- boolean matched = true;
- int nameoff = pos + CENHDR;
- for (int i = 0; i < name.length; i++) {
- if (name[i] != cen[nameoff++]) {
- matched = false;
- break;
- }
- }
- if (matched) {
- return pos;
+ while (idx != ZIP_ENDCHAIN) {
+ if (getEntryHash(idx) == hsh) {
+ // The CEN name must match the specfied one
+ int pos = getEntryPos(idx);
+ final int nlen = CENNAM(cen, pos);
+ if (name.length == nlen || (addSlash && name.length ==
nlen - 1 && cen[pos + CENHDR + nlen - 1] == '/')) {
+ boolean matched = true;
+ int nameoff = pos + CENHDR;
+ for (int i = 0; i < name.length; i++) {
+ if (name[i] != cen[nameoff++]) {
+ matched = false;
+ break;
}
- }
+ }
+ if (matched) {
+ return pos;
+ }
}
- idx = getEntryNext(idx);
- }
- /* If not addSlash, or slash is already there, we are done
*/
- if (!addSlash || name.length == 0 || name[name.length -
1] == '/') {
- return -1;
}
- /* Add slash and try once more */
- name = Arrays.copyOf(name, name.length + 1);
- name[name.length - 1] = '/';
- hsh = hash_append(hsh, (byte)'/');
- //idx = table[hsh % tablelen];
- idx = table[(hsh & 0x7fffffff) % tablelen];
- addSlash = false;
+ idx = getEntryNext(idx);
}
+ return -1;
}
/**
More information about the core-libs-dev
mailing list