8276207: Properties.loadFromXML/storeToXML works incorrectly for supplementary characters
Joe Wang
huizhe.wang at oracle.com
Tue Nov 2 05:09:02 UTC 2021
Hi Anirvan,
The change looks good to me. Please go ahead with creating a pull
request. It would be easier for reviewers to pull the patch and look at
it. On first glance, test "testStoreWithSupplementaryCharacters" may be
a bit sensitive to file format. It may be solved with a roundtrip
(store/read) and comparing key/value of the entry, or test that the
output contains then entry element.
Thanks,
Joe
On 10/31/21 3:56 AM, Anirvan Sarkar wrote:
> Hi,
>
> Since it seems that the mailing list is scrubbing attachments, patch is
> mentioned inline below.
>
> diff a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> --- a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> +++ b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> @@ -1,7 +1,7 @@
> /*
> - * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights
> reserved.
> + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
> reserved.
> * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> *
> * This code is free software; you can redistribute it and/or modify it
> * under the terms of the GNU General Public License version 2 only, as
> * published by the Free Software Foundation. Oracle designates this
> @@ -1991,24 +1991,22 @@
> case ';':
> // Convert the character entity to a
> character
> try {
> int i = Integer.parseInt(
> new String(mBuff, idx + 1,
> mBuffIdx - idx), 10);
> - if (i >= 0xffff) {
> - panic(FAULT);
> + // Restore the buffer offset
> + mBuffIdx = idx - 1;
> + for(char character : Character.toChars(i))
> {
> + if (character == ' ' || mInp.next !=
> null) {
> + bappend(character, flag);
> + } else {
> + bappend(character);
> + }
> }
> - ch = (char) i;
> } catch (NumberFormatException nfe) {
> panic(FAULT);
> }
> - // Restore the buffer offset
> - mBuffIdx = idx - 1;
> - if (ch == ' ' || mInp.next != null) {
> - bappend(ch, flag);
> - } else {
> - bappend(ch);
> - }
> st = -1;
> break;
>
> case 'a':
> // If the entity buffer is empty and
> ch == 'x'
> @@ -2032,24 +2030,22 @@
> case ';':
> // Convert the character entity to a
> character
> try {
> int i = Integer.parseInt(
> new String(mBuff, idx + 1,
> mBuffIdx - idx), 16);
> - if (i >= 0xffff) {
> - panic(FAULT);
> + // Restore the buffer offset
> + mBuffIdx = idx - 1;
> + for(char character : Character.toChars(i))
> {
> + if (character == ' ' || mInp.next !=
> null) {
> + bappend(character, flag);
> + } else {
> + bappend(character);
> + }
> }
> - ch = (char) i;
> } catch (NumberFormatException nfe) {
> panic(FAULT);
> }
> - // Restore the buffer offset
> - mBuffIdx = idx - 1;
> - if (ch == ' ' || mInp.next != null) {
> - bappend(ch, flag);
> - } else {
> - bappend(ch);
> - }
> st = -1;
> break;
>
> default:
> panic(FAULT);
> diff
> a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> ---
> a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> +++
> b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> @@ -1,7 +1,7 @@
> /*
> - * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights
> reserved.
> + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
> reserved.
> * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> *
> * This code is free software; you can redistribute it and/or modify it
> * under the terms of the GNU General Public License version 2 only, as
> * published by the Free Software Foundation. Oracle designates this
> @@ -358,10 +358,19 @@
> */
> public void setDoIndent(boolean doIndent) {
> _doIndent = doIndent;
> }
>
> + /**
> + * Writes character reference in hex format.
> + */
> + private void writeCharRef(int codePoint) throws XMLStreamException {
> + _writer.write(ENCODING_PREFIX);
> + _writer.write(Integer.toHexString(codePoint));
> + _writer.write(SEMICOLON);
> + }
> +
> /**
> * Writes XML content to underlying writer. Escapes characters unless
> * escaping character feature is turned off.
> */
> private void writeXMLContent(char[] content, int start, int length,
> boolean escapeChars)
> @@ -381,14 +390,19 @@
> char ch = content[index];
>
> if (!_writer.canEncode(ch)) {
> _writer.write(content, startWritePos, index -
> startWritePos);
>
> - // Escape this char as underlying encoder cannot handle it
> - _writer.write(ENCODING_PREFIX);
> - _writer.write(Integer.toHexString(ch));
> - _writer.write(SEMICOLON);
> + // Check if current and next characters forms a surrogate
> pair
> + // and escape it to avoid generation of invalid xml content
> + if ( index != end - 1 && Character.isSurrogatePair(ch,
> content[index+1])) {
> + writeCharRef(Character.toCodePoint(ch,
> content[index+1]));
> + index++;
> + } else {
> + writeCharRef(ch);
> + }
> +
> startWritePos = index + 1;
> continue;
> }
>
> switch (ch) {
> @@ -453,14 +467,19 @@
> char ch = content.charAt(index);
>
> if (!_writer.canEncode(ch)) {
> _writer.write(content, startWritePos, index -
> startWritePos);
>
> - // Escape this char as underlying encoder cannot handle it
> - _writer.write(ENCODING_PREFIX);
> - _writer.write(Integer.toHexString(ch));
> - _writer.write(SEMICOLON);
> + // Check if current and next characters forms a surrogate
> pair
> + // and escape it to avoid generation of invalid xml content
> + if ( index != end - 1 && Character.isSurrogatePair(ch,
> content.charAt(index+1))) {
> + writeCharRef(Character.toCodePoint(ch,
> content.charAt(index+1)));
> + index++;
> + } else {
> + writeCharRef(ch);
> + }
> +
> startWritePos = index + 1;
> continue;
> }
>
> switch (ch) {
> diff a/test/jdk/java/util/Properties/LoadAndStoreXML.java
> b/test/jdk/java/util/Properties/LoadAndStoreXML.java
> --- a/test/jdk/java/util/Properties/LoadAndStoreXML.java
> +++ b/test/jdk/java/util/Properties/LoadAndStoreXML.java
> @@ -1,7 +1,7 @@
> /*
> - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
> reserved.
> * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> *
> * This code is free software; you can redistribute it and/or modify it
> * under the terms of the GNU General Public License version 2 only, as
> * published by the Free Software Foundation.
> @@ -21,11 +21,11 @@
> * questions.
> */
>
> /*
> * @test
> - * @bug 8000354 8000685 8004371 8043119
> + * @bug 8000354 8000685 8004371 8043119 8276207
> * @summary Basic test of storeToXML and loadToXML
> * @run main/othervm -Djava.security.manager=allow LoadAndStoreXML
> */
>
> import java.io.ByteArrayInputStream;
> @@ -136,10 +136,11 @@
> props.put("k1", "foo");
> props.put("k2", "bar");
> props.put("k3",
> "\u0020\u0391\u0392\u0393\u0394\u0395\u0396\u0397");
> props.put("k4", "\u7532\u9aa8\u6587");
> props.put("k5", "<java.home>/conf/jaxp.properties");
> + props.put("k6", "\uD834\uDD1E");
>
> TestOutputStream out = new TestOutputStream();
> props.storeToXML(out, null, encoding);
> if (!out.isOpen())
> throw new RuntimeException("OutputStream closed by
> storeToXML");
> @@ -241,10 +242,60 @@
> }
> }
> }
> }
>
> + /**
> + * Test loadFromXML with supplementary characters
> + */
> + static void testLoadWithSupplementaryCharacters() throws IOException {
> + System.out.println("testLoadWithSupplementaryCharacters");
> +
> + Properties expected = new Properties();
> + expected.put("\uD834\uDD1E", "\uD834\uDD1E");
> +
> + String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
> + "<!DOCTYPE properties SYSTEM \"
> http://java.sun.com/dtd/properties.dtd\">" +
> + "<properties>" +
> + "<entry key=\"𝄞\">𝄞</entry>" +
> + "</properties>";
> +
> + ByteArrayInputStream in = new
> ByteArrayInputStream(s.getBytes("UTF-8"));
> + Properties props = new Properties();
> + props.loadFromXML(in);
> +
> + if (!props.equals(expected)) {
> + System.err.println("loaded: " + props + ", expected: " +
> expected);
> + throw new RuntimeException("Test failed");
> + }
> + }
> +
> + /**
> + * Test storeToXML with supplementary characters
> + */
> + static void testStoreWithSupplementaryCharacters() throws IOException {
> + System.out.println("testStoreWithSupplementaryCharacters");
> +
> + String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
> System.lineSeparator() +
> + "<!DOCTYPE properties SYSTEM \"
> http://java.sun.com/dtd/properties.dtd\">" + System.lineSeparator() +
> + "<properties>" + System.lineSeparator() +
> + "<entry key=\"Musical Symbols\">𝄞</entry>" +
> System.lineSeparator() +
> + "</properties>" + System.lineSeparator();
> +
> + Properties props = new Properties();
> + props.put("Musical Symbols", "\uD834\uDD1E");
> + ByteArrayOutputStream out = new ByteArrayOutputStream();
> + props.storeToXML(out, null, "UTF-8");
> +
> + String outXml = out.toString("UTF-8");
> +
> + if (!outXml.equals(s)) {
> + System.err.println("stored: " + outXml + ", expected: " + s);
> + throw new RuntimeException("Test failed");
> + }
> + }
> +
> public static void main(String[] args) throws IOException {
>
> testLoadAndStore("UTF-8", false);
> testLoadAndStore("UTF-16", false);
> testLoadAndStore("UTF-16BE", false);
> @@ -252,10 +303,12 @@
> testLoadAndStore("UTF-16BE", true);
> testLoadAndStore("UTF-16LE", true);
> testLoadWithoutEncoding();
> testLoadWithBadEncoding();
> testStoreWithBadEncoding();
> + testLoadWithSupplementaryCharacters();
> + testStoreWithSupplementaryCharacters();
>
> // malformed documents
> String src = System.getProperty("test.src");
> String subdir = "invalidxml";
> Path dir = (src == null) ? Paths.get(subdir) : Paths.get(src,
> subdir);
>
> On Sun, 31 Oct 2021 at 19:38, Anirvan Sarkar <powers.anirvan at gmail.com>
> wrote:
>
>> Hi,
>>
>> Properties.loadFromXML/storeToXML works incorrectly for supplementary
>> characters after JDK-8042889[1] was integrated in JDK 9.
>>
>> Properties.storeToXML now generates incorrect character references for
>> supplementary characters. This is similar to JDK-8145974[2] which was fixed
>> in the java.xml module in JDK 9.
>>
>> Properties.loadFromXML now fails to parse character references for
>> supplementary characters and throws InvalidPropertiesFormatException.
>>
>> Sample program which demonstrates these issues is in the JBS[3].
>>
>> Proposed patch to fix this issue is attached.
>> JDK Tier 1 tests are all green.
>> If it looks fine then I will create a pull request based on this.
>>
>> [1] : https://bugs.openjdk.java.net/browse/JDK-8042889
>> [2] : https://bugs.openjdk.java.net/browse/JDK-8145974
>> [3] : https://bugs.openjdk.java.net/browse/JDK-8276207
>>
>> --
>> Anirvan
>>
>
More information about the core-libs-dev
mailing list