JarFile.getVersionedEntry scalability with new release cadence
Eirik Bjørsnøs
eirbjo at gmail.com
Sun Apr 12 11:26:11 UTC 2020
Claes,
Since the ZIP index is already scanned for META-INF/ names in
ZipFile.Source.initCEN, it might make sense to move version scanning there.
This would also allow identifying version strings at the byte array level.
Here's a patch which implements this:
diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
b/src/java.base/share/classes/java/util/jar/JarFile.java
index 1ec0f5bdae..988cc837f0 100644
--- a/src/java.base/share/classes/java/util/jar/JarFile.java
+++ b/src/java.base/share/classes/java/util/jar/JarFile.java
@@ -49,6 +49,7 @@ import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.function.Function;
+import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
@@ -161,7 +162,7 @@ public class JarFile extends ZipFile {
private final Runtime.Version version; // current version
private final int versionFeature; // version.feature()
private boolean isMultiRelease; // is jar multi-release?
-
+ private int[] versions; // which versions does the jar
contain
// indicates if Class-Path attribute present
private boolean hasClassPathAttribute;
// true if manifest checked for special attributes
@@ -599,12 +600,13 @@ public class JarFile extends ZipFile {
}
private JarEntry getVersionedEntry(String name, JarEntry je) {
- if (BASE_VERSION_FEATURE < versionFeature) {
+ int[] versions = this.versions;
+ if (BASE_VERSION_FEATURE < versionFeature && versions != null &&
versions.length > 0) {
if (!name.startsWith(META_INF)) {
// search for versioned entry
- int v = versionFeature;
- while (v > BASE_VERSION_FEATURE) {
- JarFileEntry vje = getEntry0(META_INF_VERSIONS + v +
"/" + name);
+ int v = versions.length - 1;
+ while (v >= 0) {
+ JarFileEntry vje = getEntry0(META_INF_VERSIONS +
versions[v] + "/" + name);
if (vje != null) {
return vje.withBasename(name);
}
@@ -1016,9 +1018,16 @@ public class JarFile extends ZipFile {
byte[] lbuf = new byte[512];
Attributes attr = new Attributes();
attr.read(new Manifest.FastInputStream(
- new ByteArrayInputStream(b)), lbuf);
- isMultiRelease = Boolean.parseBoolean(
- attr.getValue(Attributes.Name.MULTI_RELEASE));
+ new ByteArrayInputStream(b)), lbuf);
+ if(Boolean.parseBoolean(
+
attr.getValue(Attributes.Name.MULTI_RELEASE))) {
+ isMultiRelease = true;
+ versions =
IntStream.of(JUZFA.getMetaInfVersions(this))
+ .filter(v -> v >= BASE_VERSION_FEATURE
&& v <= versionFeature)
+ .sorted()
+ .toArray();
+ }
+
}
}
}
diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java
b/src/java.base/share/classes/java/util/zip/ZipFile.java
index 274e8788d1..25eeb57555 100644
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -48,6 +48,7 @@ import java.util.Iterator;
import java.util.Objects;
import java.util.NoSuchElementException;
import java.util.Set;
+import java.util.HashSet;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.WeakHashMap;
@@ -1063,6 +1064,11 @@ public class ZipFile implements ZipConstants,
Closeable {
return zip.getMetaInfEntryNames();
}
@Override
+ public int[] getMetaInfVersions(ZipFile zip) {
+ final int[] metaVersions = zip.res.zsrc.metaVersions;
+ return metaVersions == null ? new int[0] :
metaVersions;
+ }
+ @Override
public JarEntry getEntry(ZipFile zip, String name,
Function<String, JarEntry> func) {
return (JarEntry)zip.getEntry(name, func);
@@ -1097,6 +1103,7 @@ public class ZipFile implements ZipConstants,
Closeable {
private byte[] comment; // zip file comment
// list of meta entries in
META-INF dir
private int[] metanames;
+ private int[] metaVersions; // list of unique versions
found in META-INF/versions/
private final boolean startsWithLoc; // true, if zip file starts
with LOCSIG (usually true)
// A Hashmap for all entries.
@@ -1424,6 +1431,8 @@ public class ZipFile implements ZipConstants,
Closeable {
// list for all meta entries
ArrayList<Integer> metanamesList = null;
+ // Set of all version numbers seen in META-INF/versions/
+ Set<Integer> metaVersionsSet = null;
// Iterate through the entries in the central directory
int i = 0;
@@ -1461,6 +1470,12 @@ public class ZipFile implements ZipConstants,
Closeable {
if (metanamesList == null)
metanamesList = new ArrayList<>(4);
metanamesList.add(pos);
+ int version = getMetaVersion(cen, pos + CENHDR + 9,
nlen);
+ if(version != -1) {
+ if(metaVersionsSet == null)
+ metaVersionsSet = new HashSet<>();
+ metaVersionsSet.add(version);
+ }
}
// skip ext and comment
pos += (CENHDR + nlen + elen + clen);
@@ -1473,6 +1488,13 @@ public class ZipFile implements ZipConstants,
Closeable {
metanames[j] = metanamesList.get(j);
}
}
+ if(metaVersionsSet != null) {
+ metaVersions = new int[metaVersionsSet.size()];
+ int c = 0;
+ for (Integer version : metaVersionsSet) {
+ metaVersions[c++] = version;
+ }
+ }
if (pos + ENDHDR != cen.length) {
zerror("invalid CEN header (bad header size)");
}
@@ -1556,6 +1578,49 @@ public class ZipFile implements ZipConstants,
Closeable {
&& (name[off] ) == '/';
}
+ /*
+ * If bytes represents a non-directory name beginning
+ * with "versions/", and continuing with an integer (version)
+ * followed by a '/', then return the parsed version integer.
+ * Otherwise, return -1
+ */
+ private static int getMetaVersion(byte[] name, int off, int len) {
+ int nend = off+len;
+ if(! (len > 9 // "versions/".length()
+ && name[off + len - 1] != '/' // non-directory
+ && (name[off++] | 0x20) == 'v'
+ && (name[off++] | 0x20) == 'e'
+ && (name[off++] | 0x20) == 'r'
+ && (name[off++] | 0x20) == 's'
+ && (name[off++] | 0x20) == 'i'
+ && (name[off++] | 0x20) == 'o'
+ && (name[off++] | 0x20) == 'n'
+ && (name[off++] | 0x20) == 's'
+ && (name[off++] ) == '/')) {
+ return -1;
+ }
+ int vstart = off;
+ for(int i = vstart; i < nend; i++) {
+ final byte c = name[i];
+ if(c != '/' && (c < '0' || c > '9')) {
+ return -1;
+ }
+ if(c == '/') {
+ int vlen = i - vstart;
+
+ if(vlen < 1) {
+ return -1;
+ }
+ try {
+ return Integer.parseInt(new String(name, vstart,
vlen, UTF_8.INSTANCE));
+ } catch (NumberFormatException e) {
+ return -1;
+ }
+ }
+ }
+ return -1;
+ }
+
/**
* Returns the number of CEN headers in a central directory.
* Will not throw, even if the zip file is corrupt.
diff --git
a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
index 10187e2f50..0d808b9286 100644
---
a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
+++
b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
@@ -35,6 +35,7 @@ import java.util.zip.ZipFile;
public interface JavaUtilZipFileAccess {
public boolean startsWithLocHeader(ZipFile zip);
public String[] getMetaInfEntryNames(ZipFile zip);
+ public int[] getMetaInfVersions(ZipFile zip);
public JarEntry getEntry(ZipFile zip, String name, Function<String,
JarEntry> func);
public Enumeration<JarEntry> entries(ZipFile zip, Function<String,
JarEntry> func);
public Stream<JarEntry> stream(ZipFile zip, Function<String, JarEntry>
func);
On Sun, Apr 12, 2020 at 10:50 AM Eirik Bjørsnøs <eirbjo at gmail.com> wrote:
> I think you could tune away a significant part of that up front cost by
>> using JUZFA.entryNameStream(this) instead of
>> this.stream().map(ZipEntry::getName). This will avoid expanding each
>> entry into a JarEntry internally. Perhaps this gets the up-front
>> overhead down to more acceptable levels..?
>>
>
> I found JUZFA.getMetaInfEntryNames which made the up front scanning
> cost evaporate.
>
> Should be fast for other jars as well, at least when META-INF/ is sparsely
> populated.
>
> Here's an updated patch:
>
> diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
> b/src/java.base/share/classes/java/util/jar/JarFile.java
> index 1ec0f5bdae..95472604c0 100644
> --- a/src/java.base/share/classes/java/util/jar/JarFile.java
> +++ b/src/java.base/share/classes/java/util/jar/JarFile.java
> @@ -161,7 +161,7 @@ public class JarFile extends ZipFile {
> private final Runtime.Version version; // current version
> private final int versionFeature; // version.feature()
> private boolean isMultiRelease; // is jar multi-release?
> -
> + private int[] versions; // which versions does the
> jar contain
> // indicates if Class-Path attribute present
> private boolean hasClassPathAttribute;
> // true if manifest checked for special attributes
> @@ -599,12 +599,13 @@ public class JarFile extends ZipFile {
> }
>
> private JarEntry getVersionedEntry(String name, JarEntry je) {
> - if (BASE_VERSION_FEATURE < versionFeature) {
> + int[] versions = this.versions;
> + if (BASE_VERSION_FEATURE < versionFeature && versions != null &&
> versions.length > 0) {
> if (!name.startsWith(META_INF)) {
> // search for versioned entry
> - int v = versionFeature;
> - while (v > BASE_VERSION_FEATURE) {
> - JarFileEntry vje = getEntry0(META_INF_VERSIONS + v +
> "/" + name);
> + int v = versions.length - 1;
> + while (v >= 0) {
> + JarFileEntry vje = getEntry0(META_INF_VERSIONS +
> versions[v] + "/" + name);
> if (vje != null) {
> return vje.withBasename(name);
> }
> @@ -1016,9 +1017,18 @@ public class JarFile extends ZipFile {
> byte[] lbuf = new byte[512];
> Attributes attr = new Attributes();
> attr.read(new Manifest.FastInputStream(
> - new ByteArrayInputStream(b)), lbuf);
> - isMultiRelease = Boolean.parseBoolean(
> - attr.getValue(Attributes.Name.MULTI_RELEASE));
> + new ByteArrayInputStream(b)), lbuf);
> + if(Boolean.parseBoolean(
> +
> attr.getValue(Attributes.Name.MULTI_RELEASE))) {
> + isMultiRelease = true;
> + versions =
> Stream.of(JUZFA.getMetaInfEntryNames(this))
> + .mapToInt(this::parseVersion)
> + .filter(v -> v != -1 && v >=
> BASE_VERSION_FEATURE && v <= versionFeature)
> + .distinct()
> + .sorted()
> + .toArray();
> + }
> +
> }
> }
> }
> @@ -1026,6 +1036,27 @@ public class JarFile extends ZipFile {
> }
> }
>
> + /**
> + * If {@code entryName} is a a versioned entry, parse and return the
> version as an integer, otherwise return -1
> + */
> + private int parseVersion(String entryName) {
> + if(!entryName.startsWith(META_INF_VERSIONS)) {
> + return -1;
> + }
> +
> + int separator = entryName.indexOf("/",
> META_INF_VERSIONS.length());
> +
> + if(separator == -1) {
> + return -1;
> + }
> +
> + try {
> + return Integer.parseInt(entryName,
> META_INF_VERSIONS.length(), separator, 10);
> + } catch (NumberFormatException e) {
> + return -1;
> + }
> + }
> +
> synchronized void ensureInitialization() {
> try {
> maybeInstantiateVerifier();
>
> Eirik.
>
>
More information about the core-libs-dev
mailing list