JarFile.getVersionedEntry scalability with new release cadence

Eirik Bjørsnøs eirbjo at gmail.com
Sun Apr 12 11:26:11 UTC 2020


Claes,

Since the ZIP index is already scanned for META-INF/ names in
ZipFile.Source.initCEN, it might make sense to move version scanning there.
This would also allow identifying version strings at the byte array level.

Here's a patch which implements this:

diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
b/src/java.base/share/classes/java/util/jar/JarFile.java
index 1ec0f5bdae..988cc837f0 100644
--- a/src/java.base/share/classes/java/util/jar/JarFile.java
+++ b/src/java.base/share/classes/java/util/jar/JarFile.java
@@ -49,6 +49,7 @@ import java.util.Locale;
 import java.util.NoSuchElementException;
 import java.util.Objects;
 import java.util.function.Function;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipException;
@@ -161,7 +162,7 @@ public class JarFile extends ZipFile {
     private final Runtime.Version version;  // current version
     private final int versionFeature;       // version.feature()
     private boolean isMultiRelease;         // is jar multi-release?
-
+    private int[] versions;                 // which versions does the jar
contain
     // indicates if Class-Path attribute present
     private boolean hasClassPathAttribute;
     // true if manifest checked for special attributes
@@ -599,12 +600,13 @@ public class JarFile extends ZipFile {
     }

     private JarEntry getVersionedEntry(String name, JarEntry je) {
-        if (BASE_VERSION_FEATURE < versionFeature) {
+        int[] versions = this.versions;
+        if (BASE_VERSION_FEATURE < versionFeature && versions != null &&
versions.length > 0) {
             if (!name.startsWith(META_INF)) {
                 // search for versioned entry
-                int v = versionFeature;
-                while (v > BASE_VERSION_FEATURE) {
-                    JarFileEntry vje = getEntry0(META_INF_VERSIONS + v +
"/" + name);
+                int v = versions.length - 1;
+                while (v >= 0) {
+                    JarFileEntry vje = getEntry0(META_INF_VERSIONS +
versions[v] + "/" + name);
                     if (vje != null) {
                         return vje.withBasename(name);
                     }
@@ -1016,9 +1018,16 @@ public class JarFile extends ZipFile {
                         byte[] lbuf = new byte[512];
                         Attributes attr = new Attributes();
                         attr.read(new Manifest.FastInputStream(
-                            new ByteArrayInputStream(b)), lbuf);
-                        isMultiRelease = Boolean.parseBoolean(
-                            attr.getValue(Attributes.Name.MULTI_RELEASE));
+                                new ByteArrayInputStream(b)), lbuf);
+                        if(Boolean.parseBoolean(
+
 attr.getValue(Attributes.Name.MULTI_RELEASE))) {
+                            isMultiRelease = true;
+                            versions =
IntStream.of(JUZFA.getMetaInfVersions(this))
+                                    .filter(v -> v >= BASE_VERSION_FEATURE
&& v <= versionFeature)
+                                    .sorted()
+                                    .toArray();
+                        }
+
                     }
                 }
             }
diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java
b/src/java.base/share/classes/java/util/zip/ZipFile.java
index 274e8788d1..25eeb57555 100644
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -48,6 +48,7 @@ import java.util.Iterator;
 import java.util.Objects;
 import java.util.NoSuchElementException;
 import java.util.Set;
+import java.util.HashSet;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.WeakHashMap;
@@ -1063,6 +1064,11 @@ public class ZipFile implements ZipConstants,
Closeable {
                     return zip.getMetaInfEntryNames();
                 }
                 @Override
+                public int[] getMetaInfVersions(ZipFile zip) {
+                    final int[] metaVersions = zip.res.zsrc.metaVersions;
+                    return metaVersions == null ? new int[0] :
metaVersions;
+                }
+                @Override
                 public JarEntry getEntry(ZipFile zip, String name,
                     Function<String, JarEntry> func) {
                     return (JarEntry)zip.getEntry(name, func);
@@ -1097,6 +1103,7 @@ public class ZipFile implements ZipConstants,
Closeable {
         private byte[] comment;              // zip file comment
                                              // list of meta entries in
META-INF dir
         private int[] metanames;
+        private int[] metaVersions;          // list of unique versions
found in META-INF/versions/
         private final boolean startsWithLoc; // true, if zip file starts
with LOCSIG (usually true)

         // A Hashmap for all entries.
@@ -1424,6 +1431,8 @@ public class ZipFile implements ZipConstants,
Closeable {

             // list for all meta entries
             ArrayList<Integer> metanamesList = null;
+            // Set of all version numbers seen in META-INF/versions/
+            Set<Integer> metaVersionsSet = null;

             // Iterate through the entries in the central directory
             int i = 0;
@@ -1461,6 +1470,12 @@ public class ZipFile implements ZipConstants,
Closeable {
                     if (metanamesList == null)
                         metanamesList = new ArrayList<>(4);
                     metanamesList.add(pos);
+                    int version = getMetaVersion(cen, pos + CENHDR + 9,
nlen);
+                    if(version != -1) {
+                        if(metaVersionsSet == null)
+                            metaVersionsSet = new HashSet<>();
+                        metaVersionsSet.add(version);
+                    }
                 }
                 // skip ext and comment
                 pos += (CENHDR + nlen + elen + clen);
@@ -1473,6 +1488,13 @@ public class ZipFile implements ZipConstants,
Closeable {
                     metanames[j] = metanamesList.get(j);
                 }
             }
+            if(metaVersionsSet != null) {
+                metaVersions = new int[metaVersionsSet.size()];
+                int c = 0;
+                for (Integer version : metaVersionsSet) {
+                    metaVersions[c++] = version;
+                }
+            }
             if (pos + ENDHDR != cen.length) {
                 zerror("invalid CEN header (bad header size)");
             }
@@ -1556,6 +1578,49 @@ public class ZipFile implements ZipConstants,
Closeable {
                 && (name[off]         ) == '/';
         }

+        /*
+         * If bytes represents a non-directory name beginning
+         * with "versions/", and continuing with an integer (version)
+         * followed by a '/', then return the parsed version integer.
+         * Otherwise, return -1
+         */
+        private static int getMetaVersion(byte[] name, int off, int len) {
+            int nend = off+len;
+            if(! (len > 9                        // "versions/".length()
+                    && name[off + len - 1] != '/'  // non-directory
+                    && (name[off++] | 0x20) == 'v'
+                    && (name[off++] | 0x20) == 'e'
+                    && (name[off++] | 0x20) == 'r'
+                    && (name[off++] | 0x20) == 's'
+                    && (name[off++] | 0x20) == 'i'
+                    && (name[off++] | 0x20) == 'o'
+                    && (name[off++] | 0x20) == 'n'
+                    && (name[off++] | 0x20) == 's'
+                    && (name[off++]         ) == '/')) {
+                return -1;
+            }
+            int vstart = off;
+            for(int i = vstart; i < nend; i++) {
+                final byte c = name[i];
+                if(c != '/' && (c < '0' || c > '9')) {
+                    return -1;
+                }
+                if(c == '/') {
+                    int vlen = i - vstart;
+
+                    if(vlen < 1) {
+                        return -1;
+                    }
+                    try {
+                        return Integer.parseInt(new String(name, vstart,
vlen, UTF_8.INSTANCE));
+                    } catch (NumberFormatException e) {
+                        return -1;
+                    }
+                }
+            }
+            return -1;
+        }
+
         /**
          * Returns the number of CEN headers in a central directory.
          * Will not throw, even if the zip file is corrupt.
diff --git
a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
index 10187e2f50..0d808b9286 100644
---
a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
+++
b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
@@ -35,6 +35,7 @@ import java.util.zip.ZipFile;
 public interface JavaUtilZipFileAccess {
     public boolean startsWithLocHeader(ZipFile zip);
     public String[] getMetaInfEntryNames(ZipFile zip);
+    public int[] getMetaInfVersions(ZipFile zip);
     public JarEntry getEntry(ZipFile zip, String name, Function<String,
JarEntry> func);
     public Enumeration<JarEntry> entries(ZipFile zip, Function<String,
JarEntry> func);
     public Stream<JarEntry> stream(ZipFile zip, Function<String, JarEntry>
func);




On Sun, Apr 12, 2020 at 10:50 AM Eirik Bjørsnøs <eirbjo at gmail.com> wrote:

> I think you could tune away a significant part of that up front cost by
>> using JUZFA.entryNameStream(this) instead of
>> this.stream().map(ZipEntry::getName). This will avoid expanding each
>> entry into a JarEntry internally. Perhaps this gets the up-front
>> overhead down to more acceptable levels..?
>>
>
> I found JUZFA.getMetaInfEntryNames which made the up front scanning
> cost evaporate.
>
> Should be fast for other jars as well, at least when META-INF/ is sparsely
> populated.
>
> Here's an updated patch:
>
> diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
> b/src/java.base/share/classes/java/util/jar/JarFile.java
> index 1ec0f5bdae..95472604c0 100644
> --- a/src/java.base/share/classes/java/util/jar/JarFile.java
> +++ b/src/java.base/share/classes/java/util/jar/JarFile.java
> @@ -161,7 +161,7 @@ public class JarFile extends ZipFile {
>      private final Runtime.Version version;  // current version
>      private final int versionFeature;       // version.feature()
>      private boolean isMultiRelease;         // is jar multi-release?
> -
> +    private int[] versions;                 // which versions does the
> jar contain
>      // indicates if Class-Path attribute present
>      private boolean hasClassPathAttribute;
>      // true if manifest checked for special attributes
> @@ -599,12 +599,13 @@ public class JarFile extends ZipFile {
>      }
>
>      private JarEntry getVersionedEntry(String name, JarEntry je) {
> -        if (BASE_VERSION_FEATURE < versionFeature) {
> +        int[] versions = this.versions;
> +        if (BASE_VERSION_FEATURE < versionFeature && versions != null &&
> versions.length > 0) {
>              if (!name.startsWith(META_INF)) {
>                  // search for versioned entry
> -                int v = versionFeature;
> -                while (v > BASE_VERSION_FEATURE) {
> -                    JarFileEntry vje = getEntry0(META_INF_VERSIONS + v +
> "/" + name);
> +                int v = versions.length - 1;
> +                while (v >= 0) {
> +                    JarFileEntry vje = getEntry0(META_INF_VERSIONS +
> versions[v] + "/" + name);
>                      if (vje != null) {
>                          return vje.withBasename(name);
>                      }
> @@ -1016,9 +1017,18 @@ public class JarFile extends ZipFile {
>                          byte[] lbuf = new byte[512];
>                          Attributes attr = new Attributes();
>                          attr.read(new Manifest.FastInputStream(
> -                            new ByteArrayInputStream(b)), lbuf);
> -                        isMultiRelease = Boolean.parseBoolean(
> -                            attr.getValue(Attributes.Name.MULTI_RELEASE));
> +                                new ByteArrayInputStream(b)), lbuf);
> +                        if(Boolean.parseBoolean(
> +
>  attr.getValue(Attributes.Name.MULTI_RELEASE))) {
> +                            isMultiRelease = true;
> +                            versions =
> Stream.of(JUZFA.getMetaInfEntryNames(this))
> +                                    .mapToInt(this::parseVersion)
> +                                    .filter(v -> v != -1 && v >=
> BASE_VERSION_FEATURE && v <= versionFeature)
> +                                    .distinct()
> +                                    .sorted()
> +                                    .toArray();
> +                        }
> +
>                      }
>                  }
>              }
> @@ -1026,6 +1036,27 @@ public class JarFile extends ZipFile {
>          }
>      }
>
> +    /**
> +     * If {@code entryName} is a a versioned entry, parse and return the
> version as an integer, otherwise return -1
> +     */
> +    private int parseVersion(String entryName) {
> +        if(!entryName.startsWith(META_INF_VERSIONS)) {
> +            return -1;
> +        }
> +
> +        int separator = entryName.indexOf("/",
> META_INF_VERSIONS.length());
> +
> +        if(separator == -1) {
> +            return -1;
> +        }
> +
> +        try {
> +            return Integer.parseInt(entryName,
> META_INF_VERSIONS.length(), separator, 10);
> +        } catch (NumberFormatException e) {
> +            return -1;
> +        }
> +    }
> +
>      synchronized void ensureInitialization() {
>          try {
>              maybeInstantiateVerifier();
>
> Eirik.
>
>


More information about the core-libs-dev mailing list