JarFile.getVersionedEntry scalability with new release cadence
Claes Redestad
claes.redestad at oracle.com
Sun Apr 12 14:22:36 UTC 2020
Hi Eirik,
I think this is shaping up nicely.
If we ensure the array returned from getMetaInfVersions is already
sorted (by e.g. using a TreeMap during parse), we could remove the
stream expression. And if we check bounds in getVersionedEntry we don't
need the field in JarFile at all.
In initCEN I think you need to subtract 9 from nlen:
getMetaVersion(cen, pos + CENHDR + 9, nlen - 9)
I suspect this meant that most entries that would have been a
valid entry were filtered out, so your patch might have ended
up with an empty versions array in your tests.
And in getMetaVersion I think we can avoid creating Strings and parse
the version inline. By using 0 as the "not a version" value we can
simplify it further:
int version = 0;
for (int i = vstart; i < nend; i++) {
final byte c = name[i];
if (c == '/') {
return version;
}
if (c < '0' || c > '9') {
return 0;
}
version = version * 10 + c - '0';
// Check for overflow
if (version < 0) {
return 0;
}
}
(Maybe we need to reject leading zeros..)
I've uploaded a version here for your convenience - let me know if
you're happy with the suggestions/edits:
http://cr.openjdk.java.net/~redestad/scratch/eirik_mr_versions.00/
Thanks!
/Claes
On 2020-04-12 13:26, Eirik Bjørsnøs wrote:
>
> Claes,
>
> Since the ZIP index is already scanned for META-INF/ names in
> ZipFile.Source.initCEN, it might make sense to move version scanning
> there. This would also allow identifying version strings at the byte
> array level.
>
> Here's a patch which implements this:
>
> diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
> b/src/java.base/share/classes/java/util/jar/JarFile.java
> index 1ec0f5bdae..988cc837f0 100644
> --- a/src/java.base/share/classes/java/util/jar/JarFile.java
> +++ b/src/java.base/share/classes/java/util/jar/JarFile.java
> @@ -49,6 +49,7 @@ import java.util.Locale;
> import java.util.NoSuchElementException;
> import java.util.Objects;
> import java.util.function.Function;
> +import java.util.stream.IntStream;
> import java.util.stream.Stream;
> import java.util.zip.ZipEntry;
> import java.util.zip.ZipException;
> @@ -161,7 +162,7 @@ public class JarFile extends ZipFile {
> private final Runtime.Version version; // current version
> private final int versionFeature; // version.feature()
> private boolean isMultiRelease; // is jar multi-release?
> -
> + private int[] versions; // which versions does the
> jar contain
> // indicates if Class-Path attribute present
> private boolean hasClassPathAttribute;
> // true if manifest checked for special attributes
> @@ -599,12 +600,13 @@ public class JarFile extends ZipFile {
> }
>
> private JarEntry getVersionedEntry(String name, JarEntry je) {
> - if (BASE_VERSION_FEATURE < versionFeature) {
> + int[] versions = this.versions;
> + if (BASE_VERSION_FEATURE < versionFeature && versions != null
> && versions.length > 0) {
> if (!name.startsWith(META_INF)) {
> // search for versioned entry
> - int v = versionFeature;
> - while (v > BASE_VERSION_FEATURE) {
> - JarFileEntry vje = getEntry0(META_INF_VERSIONS + v
> + "/" + name);
> + int v = versions.length - 1;
> + while (v >= 0) {
> + JarFileEntry vje = getEntry0(META_INF_VERSIONS +
> versions[v] + "/" + name);
> if (vje != null) {
> return vje.withBasename(name);
> }
> @@ -1016,9 +1018,16 @@ public class JarFile extends ZipFile {
> byte[] lbuf = new byte[512];
> Attributes attr = new Attributes();
> attr.read(new Manifest.FastInputStream(
> - new ByteArrayInputStream(b)), lbuf);
> - isMultiRelease = Boolean.parseBoolean(
> - attr.getValue(Attributes.Name.MULTI_RELEASE));
> + new ByteArrayInputStream(b)), lbuf);
> + if(Boolean.parseBoolean(
> +
> attr.getValue(Attributes.Name.MULTI_RELEASE))) {
> + isMultiRelease = true;
> + versions =
> IntStream.of(JUZFA.getMetaInfVersions(this))
> + .filter(v -> v >=
> BASE_VERSION_FEATURE && v <= versionFeature)
> + .sorted()
> + .toArray();
> + }
> +
> }
> }
> }
> diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java
> b/src/java.base/share/classes/java/util/zip/ZipFile.java
> index 274e8788d1..25eeb57555 100644
> --- a/src/java.base/share/classes/java/util/zip/ZipFile.java
> +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
> @@ -48,6 +48,7 @@ import java.util.Iterator;
> import java.util.Objects;
> import java.util.NoSuchElementException;
> import java.util.Set;
> +import java.util.HashSet;
> import java.util.Spliterator;
> import java.util.Spliterators;
> import java.util.WeakHashMap;
> @@ -1063,6 +1064,11 @@ public class ZipFile implements ZipConstants,
> Closeable {
> return zip.getMetaInfEntryNames();
> }
> @Override
> + public int[] getMetaInfVersions(ZipFile zip) {
> + final int[] metaVersions = zip.res.zsrc.metaVersions;
> + return metaVersions == null ? new int[0] :
> metaVersions;
> + }
> + @Override
> public JarEntry getEntry(ZipFile zip, String name,
> Function<String, JarEntry> func) {
> return (JarEntry)zip.getEntry(name, func);
> @@ -1097,6 +1103,7 @@ public class ZipFile implements ZipConstants,
> Closeable {
> private byte[] comment; // zip file comment
> // list of meta entries
> in META-INF dir
> private int[] metanames;
> + private int[] metaVersions; // list of unique versions
> found in META-INF/versions/
> private final boolean startsWithLoc; // true, if zip file
> starts with LOCSIG (usually true)
>
> // A Hashmap for all entries.
> @@ -1424,6 +1431,8 @@ public class ZipFile implements ZipConstants,
> Closeable {
>
> // list for all meta entries
> ArrayList<Integer> metanamesList = null;
> + // Set of all version numbers seen in META-INF/versions/
> + Set<Integer> metaVersionsSet = null;
>
> // Iterate through the entries in the central directory
> int i = 0;
> @@ -1461,6 +1470,12 @@ public class ZipFile implements ZipConstants,
> Closeable {
> if (metanamesList == null)
> metanamesList = new ArrayList<>(4);
> metanamesList.add(pos);
> + int version = getMetaVersion(cen, pos + CENHDR + 9,
> nlen) > + if(version != -1) {
> + if(metaVersionsSet == null)
> + metaVersionsSet = new HashSet<>();
> + metaVersionsSet.add(version);
> + }
> }
> // skip ext and comment
> pos += (CENHDR + nlen + elen + clen);
> @@ -1473,6 +1488,13 @@ public class ZipFile implements ZipConstants,
> Closeable {
> metanames[j] = metanamesList.get(j);
> }
> }
> + if(metaVersionsSet != null) {
> + metaVersions = new int[metaVersionsSet.size()];
> + int c = 0;
> + for (Integer version : metaVersionsSet) {
> + metaVersions[c++] = version;
> + }
> + }
> if (pos + ENDHDR != cen.length) {
> zerror("invalid CEN header (bad header size)");
> }
> @@ -1556,6 +1578,49 @@ public class ZipFile implements ZipConstants,
> Closeable {
> && (name[off] ) == '/';
> }
>
> + /*
> + * If bytes represents a non-directory name beginning
> + * with "versions/", and continuing with an integer (version)
> + * followed by a '/', then return the parsed version integer.
> + * Otherwise, return -1
> + */
> + private static int getMetaVersion(byte[] name, int off, int len) {
> + int nend = off+len;
> + if(! (len > 9 // "versions/".length()
> + && name[off + len - 1] != '/' // non-directory
> + && (name[off++] | 0x20) == 'v'
> + && (name[off++] | 0x20) == 'e'
> + && (name[off++] | 0x20) == 'r'
> + && (name[off++] | 0x20) == 's'
> + && (name[off++] | 0x20) == 'i'
> + && (name[off++] | 0x20) == 'o'
> + && (name[off++] | 0x20) == 'n'
> + && (name[off++] | 0x20) == 's'
> + && (name[off++] ) == '/')) {
> + return -1;
> + }
> + int vstart = off;
> + for(int i = vstart; i < nend; i++) {
> + final byte c = name[i];
> + if(c != '/' && (c < '0' || c > '9')) {
> + return -1;
> + }
> + if(c == '/') {
> + int vlen = i - vstart;
> +
> + if(vlen < 1) {
> + return -1;
> + }
> + try {
> + return Integer.parseInt(new String(name,
> vstart, vlen, UTF_8.INSTANCE));
> + } catch (NumberFormatException e) {
> + return -1;
> + }
> + }
> + }
> + return -1;
> + }
> +
> /**
> * Returns the number of CEN headers in a central directory.
> * Will not throw, even if the zip file is corrupt.
> diff --git
> a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
> b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
> index 10187e2f50..0d808b9286 100644
> ---
> a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
> +++
> b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
> @@ -35,6 +35,7 @@ import java.util.zip.ZipFile;
> public interface JavaUtilZipFileAccess {
> public boolean startsWithLocHeader(ZipFile zip);
> public String[] getMetaInfEntryNames(ZipFile zip);
> + public int[] getMetaInfVersions(ZipFile zip);
> public JarEntry getEntry(ZipFile zip, String name,
> Function<String, JarEntry> func);
> public Enumeration<JarEntry> entries(ZipFile zip, Function<String,
> JarEntry> func);
> public Stream<JarEntry> stream(ZipFile zip, Function<String,
> JarEntry> func);
>
>
>
>
> On Sun, Apr 12, 2020 at 10:50 AM Eirik Bjørsnøs <eirbjo at gmail.com
> <mailto:eirbjo at gmail.com>> wrote:
>
> I think you could tune away a significant part of that up front
> cost by
> using JUZFA.entryNameStream(this) instead of
> this.stream().map(ZipEntry::getName). This will avoid expanding each
> entry into a JarEntry internally. Perhaps this gets the up-front
> overhead down to more acceptable levels..?
>
>
> I found JUZFA.getMetaInfEntryNames which made the up front scanning
> cost evaporate.
>
> Should be fast for other jars as well, at least when META-INF/ is
> sparsely populated.
>
> Here's an updated patch:
>
> diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
> b/src/java.base/share/classes/java/util/jar/JarFile.java
> index 1ec0f5bdae..95472604c0 100644
> --- a/src/java.base/share/classes/java/util/jar/JarFile.java
> +++ b/src/java.base/share/classes/java/util/jar/JarFile.java
> @@ -161,7 +161,7 @@ public class JarFile extends ZipFile {
> private final Runtime.Version version; // current version
> private final int versionFeature; // version.feature()
> private boolean isMultiRelease; // is jar multi-release?
> -
> + private int[] versions; // which versions does
> the jar contain
> // indicates if Class-Path attribute present
> private boolean hasClassPathAttribute;
> // true if manifest checked for special attributes
> @@ -599,12 +599,13 @@ public class JarFile extends ZipFile {
> }
>
> private JarEntry getVersionedEntry(String name, JarEntry je) {
> - if (BASE_VERSION_FEATURE < versionFeature) {
> + int[] versions = this.versions;
> + if (BASE_VERSION_FEATURE < versionFeature && versions !=
> null && versions.length > 0) {
> if (!name.startsWith(META_INF)) {
> // search for versioned entry
> - int v = versionFeature;
> - while (v > BASE_VERSION_FEATURE) {
> - JarFileEntry vje = getEntry0(META_INF_VERSIONS
> + v + "/" + name);
> + int v = versions.length - 1;
> + while (v >= 0) {
> + JarFileEntry vje = getEntry0(META_INF_VERSIONS
> + versions[v] + "/" + name);
> if (vje != null) {
> return vje.withBasename(name);
> }
> @@ -1016,9 +1017,18 @@ public class JarFile extends ZipFile {
> byte[] lbuf = new byte[512];
> Attributes attr = new Attributes();
> attr.read(new Manifest.FastInputStream(
> - new ByteArrayInputStream(b)), lbuf);
> - isMultiRelease = Boolean.parseBoolean(
> -
> attr.getValue(Attributes.Name.MULTI_RELEASE));
> + new ByteArrayInputStream(b)), lbuf);
> + if(Boolean.parseBoolean(
> +
> attr.getValue(Attributes.Name.MULTI_RELEASE))) {
> + isMultiRelease = true;
> + versions =
> Stream.of(JUZFA.getMetaInfEntryNames(this))
> + .mapToInt(this::parseVersion)
> + .filter(v -> v != -1 && v >=
> BASE_VERSION_FEATURE && v <= versionFeature)
> + .distinct()
> + .sorted()
> + .toArray();
> + }
> +
> }
> }
> }
> @@ -1026,6 +1036,27 @@ public class JarFile extends ZipFile {
> }
> }
>
> + /**
> + * If {@code entryName} is a a versioned entry, parse and
> return the version as an integer, otherwise return -1
> + */
> + private int parseVersion(String entryName) {
> + if(!entryName.startsWith(META_INF_VERSIONS)) {
> + return -1;
> + }
> +
> + int separator = entryName.indexOf("/",
> META_INF_VERSIONS.length());
> +
> + if(separator == -1) {
> + return -1;
> + }
> +
> + try {
> + return Integer.parseInt(entryName,
> META_INF_VERSIONS.length(), separator, 10);
> + } catch (NumberFormatException e) {
> + return -1;
> + }
> + }
> +
> synchronized void ensureInitialization() {
> try {
> maybeInstantiateVerifier();
>
> Eirik.
>
More information about the core-libs-dev
mailing list