JarFile.getVersionedEntry scalability with new release cadence

Claes Redestad claes.redestad at oracle.com
Sun Apr 12 14:22:36 UTC 2020


Hi Eirik,

I think this is shaping up nicely.

If we ensure the array returned from getMetaInfVersions is already
sorted (by e.g. using a TreeMap during parse), we could remove the
stream expression. And if we check bounds in getVersionedEntry we don't
need the field in JarFile at all.

In initCEN I think you need to subtract 9 from nlen:

getMetaVersion(cen, pos + CENHDR + 9, nlen - 9)

I suspect this meant that most entries that would have been a
valid entry were filtered out, so your patch might have ended
up with an empty versions array in your tests.

And in getMetaVersion I think we can avoid creating Strings and parse
the version inline. By using 0 as the "not a version" value we can
simplify it further:

             int version = 0;
             for (int i = vstart; i < nend; i++) {
                 final byte c = name[i];
                 if (c == '/') {
                     return version;
                 }
                 if (c < '0' || c > '9') {
                     return 0;
                 }
                 version = version * 10 + c - '0';
                 // Check for overflow
                 if (version < 0) {
                     return 0;
                 }
             }

(Maybe we need to reject leading zeros..)

I've uploaded a version here for your convenience - let me know if
you're happy with the suggestions/edits:

http://cr.openjdk.java.net/~redestad/scratch/eirik_mr_versions.00/

Thanks!

/Claes

On 2020-04-12 13:26, Eirik Bjørsnøs wrote:
> 
> Claes,
> 
> Since the ZIP index is already scanned for META-INF/ names in 
> ZipFile.Source.initCEN, it might make sense to move version scanning 
> there. This would also allow identifying version strings at the byte 
> array level.
> 
> Here's a patch which implements this:
> 
> diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java 
> b/src/java.base/share/classes/java/util/jar/JarFile.java
> index 1ec0f5bdae..988cc837f0 100644
> --- a/src/java.base/share/classes/java/util/jar/JarFile.java
> +++ b/src/java.base/share/classes/java/util/jar/JarFile.java
> @@ -49,6 +49,7 @@ import java.util.Locale;
>   import java.util.NoSuchElementException;
>   import java.util.Objects;
>   import java.util.function.Function;
> +import java.util.stream.IntStream;
>   import java.util.stream.Stream;
>   import java.util.zip.ZipEntry;
>   import java.util.zip.ZipException;
> @@ -161,7 +162,7 @@ public class JarFile extends ZipFile {
>       private final Runtime.Version version;  // current version
>       private final int versionFeature;       // version.feature()
>       private boolean isMultiRelease;         // is jar multi-release?
> -
> +    private int[] versions;                 // which versions does the 
> jar contain
>       // indicates if Class-Path attribute present
>       private boolean hasClassPathAttribute;
>       // true if manifest checked for special attributes
> @@ -599,12 +600,13 @@ public class JarFile extends ZipFile {
>       }
> 
>       private JarEntry getVersionedEntry(String name, JarEntry je) {
> -        if (BASE_VERSION_FEATURE < versionFeature) {
> +        int[] versions = this.versions;
> +        if (BASE_VERSION_FEATURE < versionFeature && versions != null 
> && versions.length > 0) {
>               if (!name.startsWith(META_INF)) {
>                   // search for versioned entry
> -                int v = versionFeature;
> -                while (v > BASE_VERSION_FEATURE) {
> -                    JarFileEntry vje = getEntry0(META_INF_VERSIONS + v 
> + "/" + name);
> +                int v = versions.length - 1;
> +                while (v >= 0) {
> +                    JarFileEntry vje = getEntry0(META_INF_VERSIONS + 
> versions[v] + "/" + name);
>                       if (vje != null) {
>                           return vje.withBasename(name);
>                       }
> @@ -1016,9 +1018,16 @@ public class JarFile extends ZipFile {
>                           byte[] lbuf = new byte[512];
>                           Attributes attr = new Attributes();
>                           attr.read(new Manifest.FastInputStream(
> -                            new ByteArrayInputStream(b)), lbuf);
> -                        isMultiRelease = Boolean.parseBoolean(
> -                            attr.getValue(Attributes.Name.MULTI_RELEASE));
> +                                new ByteArrayInputStream(b)), lbuf);
> +                        if(Boolean.parseBoolean(
> +                               
>   attr.getValue(Attributes.Name.MULTI_RELEASE))) {
> +                            isMultiRelease = true;
> +                            versions = 
> IntStream.of(JUZFA.getMetaInfVersions(this))
> +                                    .filter(v -> v >= 
> BASE_VERSION_FEATURE && v <= versionFeature)
> +                                    .sorted()
> +                                    .toArray();
> +                        }
> +
>                       }
>                   }
>               }
> diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java 
> b/src/java.base/share/classes/java/util/zip/ZipFile.java
> index 274e8788d1..25eeb57555 100644
> --- a/src/java.base/share/classes/java/util/zip/ZipFile.java
> +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
> @@ -48,6 +48,7 @@ import java.util.Iterator;
>   import java.util.Objects;
>   import java.util.NoSuchElementException;
>   import java.util.Set;
> +import java.util.HashSet;
>   import java.util.Spliterator;
>   import java.util.Spliterators;
>   import java.util.WeakHashMap;
> @@ -1063,6 +1064,11 @@ public class ZipFile implements ZipConstants, 
> Closeable {
>                       return zip.getMetaInfEntryNames();
>                   }
>                   @Override
> +                public int[] getMetaInfVersions(ZipFile zip) {
> +                    final int[] metaVersions = zip.res.zsrc.metaVersions;
> +                    return metaVersions == null ? new int[0] : 
> metaVersions;
> +                }
> +                @Override
>                   public JarEntry getEntry(ZipFile zip, String name,
>                       Function<String, JarEntry> func) {
>                       return (JarEntry)zip.getEntry(name, func);
> @@ -1097,6 +1103,7 @@ public class ZipFile implements ZipConstants, 
> Closeable {
>           private byte[] comment;              // zip file comment
>                                                // list of meta entries 
> in META-INF dir
>           private int[] metanames;
> +        private int[] metaVersions;          // list of unique versions 
> found in META-INF/versions/
>           private final boolean startsWithLoc; // true, if zip file 
> starts with LOCSIG (usually true)
> 
>           // A Hashmap for all entries.
> @@ -1424,6 +1431,8 @@ public class ZipFile implements ZipConstants, 
> Closeable {
> 
>               // list for all meta entries
>               ArrayList<Integer> metanamesList = null;
> +            // Set of all version numbers seen in META-INF/versions/
> +            Set<Integer> metaVersionsSet = null;
> 
>               // Iterate through the entries in the central directory
>               int i = 0;
> @@ -1461,6 +1470,12 @@ public class ZipFile implements ZipConstants, 
> Closeable {
>                       if (metanamesList == null)
>                           metanamesList = new ArrayList<>(4);
>                       metanamesList.add(pos);
> +                    int version = getMetaVersion(cen, pos + CENHDR + 9, 
> nlen) > +                    if(version != -1) {
> +                        if(metaVersionsSet == null)
> +                            metaVersionsSet = new HashSet<>();
> +                        metaVersionsSet.add(version);
> +                    }
>                   }
>                   // skip ext and comment
>                   pos += (CENHDR + nlen + elen + clen);
> @@ -1473,6 +1488,13 @@ public class ZipFile implements ZipConstants, 
> Closeable {
>                       metanames[j] = metanamesList.get(j);
>                   }
>               }
> +            if(metaVersionsSet != null) {
> +                metaVersions = new int[metaVersionsSet.size()];
> +                int c = 0;
> +                for (Integer version : metaVersionsSet) {
> +                    metaVersions[c++] = version;
> +                }
> +            }
>               if (pos + ENDHDR != cen.length) {
>                   zerror("invalid CEN header (bad header size)");
>               }
> @@ -1556,6 +1578,49 @@ public class ZipFile implements ZipConstants, 
> Closeable {
>                   && (name[off]         ) == '/';
>           }
> 
> +        /*
> +         * If bytes represents a non-directory name beginning
> +         * with "versions/", and continuing with an integer (version)
> +         * followed by a '/', then return the parsed version integer.
> +         * Otherwise, return -1
> +         */
> +        private static int getMetaVersion(byte[] name, int off, int len) {
> +            int nend = off+len;
> +            if(! (len > 9                        // "versions/".length()
> +                    && name[off + len - 1] != '/'  // non-directory
> +                    && (name[off++] | 0x20) == 'v'
> +                    && (name[off++] | 0x20) == 'e'
> +                    && (name[off++] | 0x20) == 'r'
> +                    && (name[off++] | 0x20) == 's'
> +                    && (name[off++] | 0x20) == 'i'
> +                    && (name[off++] | 0x20) == 'o'
> +                    && (name[off++] | 0x20) == 'n'
> +                    && (name[off++] | 0x20) == 's'
> +                    && (name[off++]         ) == '/')) {
> +                return -1;
> +            }
> +            int vstart = off;
> +            for(int i = vstart; i < nend; i++) {
> +                final byte c = name[i];
> +                if(c != '/' && (c < '0' || c > '9')) {
> +                    return -1;
> +                }
> +                if(c == '/') {
> +                    int vlen = i - vstart;
> +
> +                    if(vlen < 1) {
> +                        return -1;
> +                    }
> +                    try {
> +                        return Integer.parseInt(new String(name, 
> vstart, vlen, UTF_8.INSTANCE));
> +                    } catch (NumberFormatException e) {
> +                        return -1;
> +                    }
> +                }
> +            }
> +            return -1;
> +        }
> +
>           /**
>            * Returns the number of CEN headers in a central directory.
>            * Will not throw, even if the zip file is corrupt.
> diff --git 
> a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java 
> b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
> index 10187e2f50..0d808b9286 100644
> --- 
> a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
> +++ 
> b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
> @@ -35,6 +35,7 @@ import java.util.zip.ZipFile;
>   public interface JavaUtilZipFileAccess {
>       public boolean startsWithLocHeader(ZipFile zip);
>       public String[] getMetaInfEntryNames(ZipFile zip);
> +    public int[] getMetaInfVersions(ZipFile zip);
>       public JarEntry getEntry(ZipFile zip, String name, 
> Function<String, JarEntry> func);
>       public Enumeration<JarEntry> entries(ZipFile zip, Function<String, 
> JarEntry> func);
>       public Stream<JarEntry> stream(ZipFile zip, Function<String, 
> JarEntry> func);
> 
> 
> 
> 
> On Sun, Apr 12, 2020 at 10:50 AM Eirik Bjørsnøs <eirbjo at gmail.com 
> <mailto:eirbjo at gmail.com>> wrote:
> 
>         I think you could tune away a significant part of that up front
>         cost by
>         using JUZFA.entryNameStream(this) instead of
>         this.stream().map(ZipEntry::getName). This will avoid expanding each
>         entry into a JarEntry internally. Perhaps this gets the up-front
>         overhead down to more acceptable levels..?
> 
> 
>     I found JUZFA.getMetaInfEntryNames which made the up front scanning
>     cost evaporate.
> 
>     Should be fast for other jars as well, at least when META-INF/ is
>     sparsely populated.
> 
>     Here's an updated patch:
> 
>     diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
>     b/src/java.base/share/classes/java/util/jar/JarFile.java
>     index 1ec0f5bdae..95472604c0 100644
>     --- a/src/java.base/share/classes/java/util/jar/JarFile.java
>     +++ b/src/java.base/share/classes/java/util/jar/JarFile.java
>     @@ -161,7 +161,7 @@ public class JarFile extends ZipFile {
>           private final Runtime.Version version;  // current version
>           private final int versionFeature;       // version.feature()
>           private boolean isMultiRelease;         // is jar multi-release?
>     -
>     +    private int[] versions;                 // which versions does
>     the jar contain
>           // indicates if Class-Path attribute present
>           private boolean hasClassPathAttribute;
>           // true if manifest checked for special attributes
>     @@ -599,12 +599,13 @@ public class JarFile extends ZipFile {
>           }
> 
>           private JarEntry getVersionedEntry(String name, JarEntry je) {
>     -        if (BASE_VERSION_FEATURE < versionFeature) {
>     +        int[] versions = this.versions;
>     +        if (BASE_VERSION_FEATURE < versionFeature && versions !=
>     null && versions.length > 0) {
>                   if (!name.startsWith(META_INF)) {
>                       // search for versioned entry
>     -                int v = versionFeature;
>     -                while (v > BASE_VERSION_FEATURE) {
>     -                    JarFileEntry vje = getEntry0(META_INF_VERSIONS
>     + v + "/" + name);
>     +                int v = versions.length - 1;
>     +                while (v >= 0) {
>     +                    JarFileEntry vje = getEntry0(META_INF_VERSIONS
>     + versions[v] + "/" + name);
>                           if (vje != null) {
>                               return vje.withBasename(name);
>                           }
>     @@ -1016,9 +1017,18 @@ public class JarFile extends ZipFile {
>                               byte[] lbuf = new byte[512];
>                               Attributes attr = new Attributes();
>                               attr.read(new Manifest.FastInputStream(
>     -                            new ByteArrayInputStream(b)), lbuf);
>     -                        isMultiRelease = Boolean.parseBoolean(
>     -                          
>       attr.getValue(Attributes.Name.MULTI_RELEASE));
>     +                                new ByteArrayInputStream(b)), lbuf);
>     +                        if(Boolean.parseBoolean(
>     +                              
>       attr.getValue(Attributes.Name.MULTI_RELEASE))) {
>     +                            isMultiRelease = true;
>     +                            versions =
>     Stream.of(JUZFA.getMetaInfEntryNames(this))
>     +                                    .mapToInt(this::parseVersion)
>     +                                    .filter(v -> v != -1 && v >=
>     BASE_VERSION_FEATURE && v <= versionFeature)
>     +                                    .distinct()
>     +                                    .sorted()
>     +                                    .toArray();
>     +                        }
>     +
>                           }
>                       }
>                   }
>     @@ -1026,6 +1036,27 @@ public class JarFile extends ZipFile {
>               }
>           }
> 
>     +    /**
>     +     * If {@code entryName} is a a versioned entry, parse and
>     return the version as an integer, otherwise return -1
>     +     */
>     +    private int parseVersion(String entryName) {
>     +        if(!entryName.startsWith(META_INF_VERSIONS)) {
>     +            return -1;
>     +        }
>     +
>     +        int separator = entryName.indexOf("/",
>     META_INF_VERSIONS.length());
>     +
>     +        if(separator == -1) {
>     +            return -1;
>     +        }
>     +
>     +        try {
>     +            return Integer.parseInt(entryName,
>     META_INF_VERSIONS.length(), separator, 10);
>     +        } catch (NumberFormatException e) {
>     +            return -1;
>     +        }
>     +    }
>     +
>           synchronized void ensureInitialization() {
>               try {
>                   maybeInstantiateVerifier();
> 
>     Eirik.
> 


More information about the core-libs-dev mailing list