JDK-8242959 residual cleanup: Move encoding checks into ZipCoder
Eirik Bjørsnøs
eirbjo at gmail.com
Sat Jan 16 09:53:29 UTC 2021
Hi,
The following is purely a code cleanup/refactoring proposal for ZipFile
internals:
ZipFile.Source.initCEN currently checks that each entry name is encoded
using bytes valid in the entry's encoding:
if (zc.isUTF8() || (flag & USE_UTF8) != 0) {
checkUTF8(cen, pos + CENHDR, nlen);
} else {
checkEncoding(zc, cen, pos + CENHDR, nlen);
}
I find this unnecessarily complicated for a couple of reasons:
1: The if repeats flag checking logic which already exists in zipCoderForPos
2: checkUTF and checkEncoding are encoding-specific methods and as such
would have a better home in ZipCoder
As a cleanup, I propose that we move checkEncoding into ZipCoder and make
checkUTF an override of checkEncoding in UTF8ZipCoder.
Here's a patch which implements the proposal. Sponsors are welcome :-)
diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java
b/src/java.base/share/classes/java/util/zip/ZipCoder.java
index 13012aab0f6..cb1ab256891 100644
--- a/src/java.base/share/classes/java/util/zip/ZipCoder.java
+++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java
@@ -54,6 +54,14 @@ class ZipCoder {
return new ZipCoder(charset);
}
+ void checkEncoding(byte[] a, int pos, int nlen) throws ZipException {
+ try {
+ toString(a, pos, nlen);
+ } catch(Exception e) {
+ throw new ZipException("invalid CEN header (bad entry name)");
+ }
+ }
+
String toString(byte[] ba, int off, int length) {
try {
return decoder().decode(ByteBuffer.wrap(ba, off,
length)).toString();
@@ -203,6 +211,25 @@ class ZipCoder {
return true;
}
+ @Override
+ void checkEncoding(byte[] a, int pos, int len) throws ZipException
{
+ try {
+ int end = pos + len;
+ while (pos < end) {
+ // ASCII fast-path: When checking that a range of
bytes is
+ // valid UTF-8, we can avoid some allocation by
skipping
+ // past bytes in the 0-127 range
+ if (a[pos] < 0) {
+ ZipCoder.toStringUTF8(a, pos, end - pos);
+ break;
+ }
+ pos++;
+ }
+ } catch(Exception e) {
+ throw new ZipException("invalid CEN header (bad entry
name)");
+ }
+ }
+
@Override
String toString(byte[] ba, int off, int length) {
return JLA.newStringUTF8NoRepl(ba, off, length);
diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java
b/src/java.base/share/classes/java/util/zip/ZipFile.java
index 4c53855abfe..07edc06999b 100644
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -1309,31 +1309,6 @@ public class ZipFile implements ZipConstants,
Closeable {
}
}
- private static final void checkUTF8(byte[] a, int pos, int len)
throws ZipException {
- try {
- int end = pos + len;
- while (pos < end) {
- // ASCII fast-path: When checking that a range of
bytes is
- // valid UTF-8, we can avoid some allocation by
skipping
- // past bytes in the 0-127 range
- if (a[pos] < 0) {
- ZipCoder.toStringUTF8(a, pos, end - pos);
- break;
- }
- pos++;
- }
- } catch(Exception e) {
- zerror("invalid CEN header (bad entry name)");
- }
- }
-
- private final void checkEncoding(ZipCoder zc, byte[] a, int pos,
int nlen) throws ZipException {
- try {
- zc.toString(a, pos, nlen);
- } catch(Exception e) {
- zerror("invalid CEN header (bad entry name)");
- }
- }
private static class End {
int centot; // 4 bytes
@@ -1520,13 +1495,10 @@ public class ZipFile implements ZipConstants,
Closeable {
zerror("invalid CEN header (bad compression method: "
+ method + ")");
if (entryPos + nlen > limit)
zerror("invalid CEN header (bad header size)");
- if (zc.isUTF8() || (flag & USE_UTF8) != 0) {
- checkUTF8(cen, pos + CENHDR, nlen);
- } else {
- checkEncoding(zc, cen, pos + CENHDR, nlen);
- }
+ ZipCoder zcp = zipCoderForPos(pos);
+ zcp.checkEncoding(cen, pos + CENHDR, nlen);
// Record the CEN offset and the name hash in our hash
cell.
- hash = zipCoderForPos(pos).normalizedHash(cen, entryPos,
nlen);
+ hash = zcp.normalizedHash(cen, entryPos, nlen);
hsh = (hash & 0x7fffffff) % tablelen;
next = table[hsh];
table[hsh] = idx;
More information about the core-libs-dev
mailing list