8276207: Properties.loadFromXML/storeToXML works incorrectly for supplementary characters
Anirvan Sarkar
powers.anirvan at gmail.com
Sun Oct 31 10:56:56 UTC 2021
Hi,
Since it seems that the mailing list is scrubbing attachments, patch is
mentioned inline below.
diff a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
--- a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
+++ b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights
reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -1991,24 +1991,22 @@
case ';':
// Convert the character entity to a
character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1,
mBuffIdx - idx), 10);
- if (i >= 0xffff) {
- panic(FAULT);
+ // Restore the buffer offset
+ mBuffIdx = idx - 1;
+ for(char character : Character.toChars(i))
{
+ if (character == ' ' || mInp.next !=
null) {
+ bappend(character, flag);
+ } else {
+ bappend(character);
+ }
}
- ch = (char) i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
- // Restore the buffer offset
- mBuffIdx = idx - 1;
- if (ch == ' ' || mInp.next != null) {
- bappend(ch, flag);
- } else {
- bappend(ch);
- }
st = -1;
break;
case 'a':
// If the entity buffer is empty and
ch == 'x'
@@ -2032,24 +2030,22 @@
case ';':
// Convert the character entity to a
character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1,
mBuffIdx - idx), 16);
- if (i >= 0xffff) {
- panic(FAULT);
+ // Restore the buffer offset
+ mBuffIdx = idx - 1;
+ for(char character : Character.toChars(i))
{
+ if (character == ' ' || mInp.next !=
null) {
+ bappend(character, flag);
+ } else {
+ bappend(character);
+ }
}
- ch = (char) i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
- // Restore the buffer offset
- mBuffIdx = idx - 1;
- if (ch == ' ' || mInp.next != null) {
- bappend(ch, flag);
- } else {
- bappend(ch);
- }
st = -1;
break;
default:
panic(FAULT);
diff
a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
---
a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
+++
b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights
reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -358,10 +358,19 @@
*/
public void setDoIndent(boolean doIndent) {
_doIndent = doIndent;
}
+ /**
+ * Writes character reference in hex format.
+ */
+ private void writeCharRef(int codePoint) throws XMLStreamException {
+ _writer.write(ENCODING_PREFIX);
+ _writer.write(Integer.toHexString(codePoint));
+ _writer.write(SEMICOLON);
+ }
+
/**
* Writes XML content to underlying writer. Escapes characters unless
* escaping character feature is turned off.
*/
private void writeXMLContent(char[] content, int start, int length,
boolean escapeChars)
@@ -381,14 +390,19 @@
char ch = content[index];
if (!_writer.canEncode(ch)) {
_writer.write(content, startWritePos, index -
startWritePos);
- // Escape this char as underlying encoder cannot handle it
- _writer.write(ENCODING_PREFIX);
- _writer.write(Integer.toHexString(ch));
- _writer.write(SEMICOLON);
+ // Check if current and next characters forms a surrogate
pair
+ // and escape it to avoid generation of invalid xml content
+ if ( index != end - 1 && Character.isSurrogatePair(ch,
content[index+1])) {
+ writeCharRef(Character.toCodePoint(ch,
content[index+1]));
+ index++;
+ } else {
+ writeCharRef(ch);
+ }
+
startWritePos = index + 1;
continue;
}
switch (ch) {
@@ -453,14 +467,19 @@
char ch = content.charAt(index);
if (!_writer.canEncode(ch)) {
_writer.write(content, startWritePos, index -
startWritePos);
- // Escape this char as underlying encoder cannot handle it
- _writer.write(ENCODING_PREFIX);
- _writer.write(Integer.toHexString(ch));
- _writer.write(SEMICOLON);
+ // Check if current and next characters forms a surrogate
pair
+ // and escape it to avoid generation of invalid xml content
+ if ( index != end - 1 && Character.isSurrogatePair(ch,
content.charAt(index+1))) {
+ writeCharRef(Character.toCodePoint(ch,
content.charAt(index+1)));
+ index++;
+ } else {
+ writeCharRef(ch);
+ }
+
startWritePos = index + 1;
continue;
}
switch (ch) {
diff a/test/jdk/java/util/Properties/LoadAndStoreXML.java
b/test/jdk/java/util/Properties/LoadAndStoreXML.java
--- a/test/jdk/java/util/Properties/LoadAndStoreXML.java
+++ b/test/jdk/java/util/Properties/LoadAndStoreXML.java
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
@@ -21,11 +21,11 @@
* questions.
*/
/*
* @test
- * @bug 8000354 8000685 8004371 8043119
+ * @bug 8000354 8000685 8004371 8043119 8276207
* @summary Basic test of storeToXML and loadToXML
* @run main/othervm -Djava.security.manager=allow LoadAndStoreXML
*/
import java.io.ByteArrayInputStream;
@@ -136,10 +136,11 @@
props.put("k1", "foo");
props.put("k2", "bar");
props.put("k3",
"\u0020\u0391\u0392\u0393\u0394\u0395\u0396\u0397");
props.put("k4", "\u7532\u9aa8\u6587");
props.put("k5", "<java.home>/conf/jaxp.properties");
+ props.put("k6", "\uD834\uDD1E");
TestOutputStream out = new TestOutputStream();
props.storeToXML(out, null, encoding);
if (!out.isOpen())
throw new RuntimeException("OutputStream closed by
storeToXML");
@@ -241,10 +242,60 @@
}
}
}
}
+ /**
+ * Test loadFromXML with supplementary characters
+ */
+ static void testLoadWithSupplementaryCharacters() throws IOException {
+ System.out.println("testLoadWithSupplementaryCharacters");
+
+ Properties expected = new Properties();
+ expected.put("\uD834\uDD1E", "\uD834\uDD1E");
+
+ String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+ "<!DOCTYPE properties SYSTEM \"
http://java.sun.com/dtd/properties.dtd\">" +
+ "<properties>" +
+ "<entry key=\"𝄞\">𝄞</entry>" +
+ "</properties>";
+
+ ByteArrayInputStream in = new
ByteArrayInputStream(s.getBytes("UTF-8"));
+ Properties props = new Properties();
+ props.loadFromXML(in);
+
+ if (!props.equals(expected)) {
+ System.err.println("loaded: " + props + ", expected: " +
expected);
+ throw new RuntimeException("Test failed");
+ }
+ }
+
+ /**
+ * Test storeToXML with supplementary characters
+ */
+ static void testStoreWithSupplementaryCharacters() throws IOException {
+ System.out.println("testStoreWithSupplementaryCharacters");
+
+ String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
System.lineSeparator() +
+ "<!DOCTYPE properties SYSTEM \"
http://java.sun.com/dtd/properties.dtd\">" + System.lineSeparator() +
+ "<properties>" + System.lineSeparator() +
+ "<entry key=\"Musical Symbols\">𝄞</entry>" +
System.lineSeparator() +
+ "</properties>" + System.lineSeparator();
+
+ Properties props = new Properties();
+ props.put("Musical Symbols", "\uD834\uDD1E");
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ props.storeToXML(out, null, "UTF-8");
+
+ String outXml = out.toString("UTF-8");
+
+ if (!outXml.equals(s)) {
+ System.err.println("stored: " + outXml + ", expected: " + s);
+ throw new RuntimeException("Test failed");
+ }
+ }
+
public static void main(String[] args) throws IOException {
testLoadAndStore("UTF-8", false);
testLoadAndStore("UTF-16", false);
testLoadAndStore("UTF-16BE", false);
@@ -252,10 +303,12 @@
testLoadAndStore("UTF-16BE", true);
testLoadAndStore("UTF-16LE", true);
testLoadWithoutEncoding();
testLoadWithBadEncoding();
testStoreWithBadEncoding();
+ testLoadWithSupplementaryCharacters();
+ testStoreWithSupplementaryCharacters();
// malformed documents
String src = System.getProperty("test.src");
String subdir = "invalidxml";
Path dir = (src == null) ? Paths.get(subdir) : Paths.get(src,
subdir);
On Sun, 31 Oct 2021 at 19:38, Anirvan Sarkar <powers.anirvan at gmail.com>
wrote:
> Hi,
>
> Properties.loadFromXML/storeToXML works incorrectly for supplementary
> characters after JDK-8042889[1] was integrated in JDK 9.
>
> Properties.storeToXML now generates incorrect character references for
> supplementary characters. This is similar to JDK-8145974[2] which was fixed
> in the java.xml module in JDK 9.
>
> Properties.loadFromXML now fails to parse character references for
> supplementary characters and throws InvalidPropertiesFormatException.
>
> Sample program which demonstrates these issues is in the JBS[3].
>
> Proposed patch to fix this issue is attached.
> JDK Tier 1 tests are all green.
> If it looks fine then I will create a pull request based on this.
>
> [1] : https://bugs.openjdk.java.net/browse/JDK-8042889
> [2] : https://bugs.openjdk.java.net/browse/JDK-8145974
> [3] : https://bugs.openjdk.java.net/browse/JDK-8276207
>
> --
> Anirvan
>
--
Anirvan
More information about the core-libs-dev
mailing list