8276207: Properties.loadFromXML/storeToXML works incorrectly for supplementary characters
Anirvan Sarkar
powers.anirvan at gmail.com
Tue Nov 2 19:07:27 UTC 2021
Hi Joe,
Thanks. I have created a pull request[1].
The new entry in "testLoadAndStore" will do the roundtrip as you have
described.
So "testStoreWithSupplementaryCharacters" is redundant.
I have removed this method before creating the pull request.
[1] : https://github.com/openjdk/jdk/pull/6216
On Tue, 2 Nov 2021 at 14:09, Joe Wang <huizhe.wang at oracle.com> wrote:
> Hi Anirvan,
>
> The change looks good to me. Please go ahead with creating a pull
> request. It would be easier for reviewers to pull the patch and look at
> it. On first glance, test "testStoreWithSupplementaryCharacters" may be
> a bit sensitive to file format. It may be solved with a roundtrip
> (store/read) and comparing key/value of the entry, or test that the
> output contains then entry element.
>
> Thanks,
> Joe
>
> On 10/31/21 3:56 AM, Anirvan Sarkar wrote:
> > Hi,
> >
> > Since it seems that the mailing list is scrubbing attachments, patch is
> > mentioned inline below.
> >
> > diff a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> > b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> > --- a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> > +++ b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
> > @@ -1,7 +1,7 @@
> > /*
> > - * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights
> > reserved.
> > + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
> > reserved.
> > * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> > *
> > * This code is free software; you can redistribute it and/or modify it
> > * under the terms of the GNU General Public License version 2 only, as
> > * published by the Free Software Foundation. Oracle designates this
> > @@ -1991,24 +1991,22 @@
> > case ';':
> > // Convert the character entity
> to a
> > character
> > try {
> > int i = Integer.parseInt(
> > new String(mBuff, idx + 1,
> > mBuffIdx - idx), 10);
> > - if (i >= 0xffff) {
> > - panic(FAULT);
> > + // Restore the buffer offset
> > + mBuffIdx = idx - 1;
> > + for(char character :
> Character.toChars(i))
> > {
> > + if (character == ' ' || mInp.next !=
> > null) {
> > + bappend(character, flag);
> > + } else {
> > + bappend(character);
> > + }
> > }
> > - ch = (char) i;
> > } catch (NumberFormatException nfe) {
> > panic(FAULT);
> > }
> > - // Restore the buffer offset
> > - mBuffIdx = idx - 1;
> > - if (ch == ' ' || mInp.next != null) {
> > - bappend(ch, flag);
> > - } else {
> > - bappend(ch);
> > - }
> > st = -1;
> > break;
> >
> > case 'a':
> > // If the entity buffer is empty
> and
> > ch == 'x'
> > @@ -2032,24 +2030,22 @@
> > case ';':
> > // Convert the character entity
> to a
> > character
> > try {
> > int i = Integer.parseInt(
> > new String(mBuff, idx + 1,
> > mBuffIdx - idx), 16);
> > - if (i >= 0xffff) {
> > - panic(FAULT);
> > + // Restore the buffer offset
> > + mBuffIdx = idx - 1;
> > + for(char character :
> Character.toChars(i))
> > {
> > + if (character == ' ' || mInp.next !=
> > null) {
> > + bappend(character, flag);
> > + } else {
> > + bappend(character);
> > + }
> > }
> > - ch = (char) i;
> > } catch (NumberFormatException nfe) {
> > panic(FAULT);
> > }
> > - // Restore the buffer offset
> > - mBuffIdx = idx - 1;
> > - if (ch == ' ' || mInp.next != null) {
> > - bappend(ch, flag);
> > - } else {
> > - bappend(ch);
> > - }
> > st = -1;
> > break;
> >
> > default:
> > panic(FAULT);
> > diff
> >
> a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> >
> b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> > ---
> >
> a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> > +++
> >
> b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
> > @@ -1,7 +1,7 @@
> > /*
> > - * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights
> > reserved.
> > + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
> > reserved.
> > * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> > *
> > * This code is free software; you can redistribute it and/or modify it
> > * under the terms of the GNU General Public License version 2 only, as
> > * published by the Free Software Foundation. Oracle designates this
> > @@ -358,10 +358,19 @@
> > */
> > public void setDoIndent(boolean doIndent) {
> > _doIndent = doIndent;
> > }
> >
> > + /**
> > + * Writes character reference in hex format.
> > + */
> > + private void writeCharRef(int codePoint) throws XMLStreamException {
> > + _writer.write(ENCODING_PREFIX);
> > + _writer.write(Integer.toHexString(codePoint));
> > + _writer.write(SEMICOLON);
> > + }
> > +
> > /**
> > * Writes XML content to underlying writer. Escapes characters
> unless
> > * escaping character feature is turned off.
> > */
> > private void writeXMLContent(char[] content, int start, int length,
> > boolean escapeChars)
> > @@ -381,14 +390,19 @@
> > char ch = content[index];
> >
> > if (!_writer.canEncode(ch)) {
> > _writer.write(content, startWritePos, index -
> > startWritePos);
> >
> > - // Escape this char as underlying encoder cannot handle
> it
> > - _writer.write(ENCODING_PREFIX);
> > - _writer.write(Integer.toHexString(ch));
> > - _writer.write(SEMICOLON);
> > + // Check if current and next characters forms a
> surrogate
> > pair
> > + // and escape it to avoid generation of invalid xml
> content
> > + if ( index != end - 1 && Character.isSurrogatePair(ch,
> > content[index+1])) {
> > + writeCharRef(Character.toCodePoint(ch,
> > content[index+1]));
> > + index++;
> > + } else {
> > + writeCharRef(ch);
> > + }
> > +
> > startWritePos = index + 1;
> > continue;
> > }
> >
> > switch (ch) {
> > @@ -453,14 +467,19 @@
> > char ch = content.charAt(index);
> >
> > if (!_writer.canEncode(ch)) {
> > _writer.write(content, startWritePos, index -
> > startWritePos);
> >
> > - // Escape this char as underlying encoder cannot handle
> it
> > - _writer.write(ENCODING_PREFIX);
> > - _writer.write(Integer.toHexString(ch));
> > - _writer.write(SEMICOLON);
> > + // Check if current and next characters forms a
> surrogate
> > pair
> > + // and escape it to avoid generation of invalid xml
> content
> > + if ( index != end - 1 && Character.isSurrogatePair(ch,
> > content.charAt(index+1))) {
> > + writeCharRef(Character.toCodePoint(ch,
> > content.charAt(index+1)));
> > + index++;
> > + } else {
> > + writeCharRef(ch);
> > + }
> > +
> > startWritePos = index + 1;
> > continue;
> > }
> >
> > switch (ch) {
> > diff a/test/jdk/java/util/Properties/LoadAndStoreXML.java
> > b/test/jdk/java/util/Properties/LoadAndStoreXML.java
> > --- a/test/jdk/java/util/Properties/LoadAndStoreXML.java
> > +++ b/test/jdk/java/util/Properties/LoadAndStoreXML.java
> > @@ -1,7 +1,7 @@
> > /*
> > - * Copyright (c) 2012, Oracle and/or its affiliates. All rights
> reserved.
> > + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
> > reserved.
> > * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> > *
> > * This code is free software; you can redistribute it and/or modify it
> > * under the terms of the GNU General Public License version 2 only, as
> > * published by the Free Software Foundation.
> > @@ -21,11 +21,11 @@
> > * questions.
> > */
> >
> > /*
> > * @test
> > - * @bug 8000354 8000685 8004371 8043119
> > + * @bug 8000354 8000685 8004371 8043119 8276207
> > * @summary Basic test of storeToXML and loadToXML
> > * @run main/othervm -Djava.security.manager=allow LoadAndStoreXML
> > */
> >
> > import java.io.ByteArrayInputStream;
> > @@ -136,10 +136,11 @@
> > props.put("k1", "foo");
> > props.put("k2", "bar");
> > props.put("k3",
> > "\u0020\u0391\u0392\u0393\u0394\u0395\u0396\u0397");
> > props.put("k4", "\u7532\u9aa8\u6587");
> > props.put("k5", "<java.home>/conf/jaxp.properties");
> > + props.put("k6", "\uD834\uDD1E");
> >
> > TestOutputStream out = new TestOutputStream();
> > props.storeToXML(out, null, encoding);
> > if (!out.isOpen())
> > throw new RuntimeException("OutputStream closed by
> > storeToXML");
> > @@ -241,10 +242,60 @@
> > }
> > }
> > }
> > }
> >
> > + /**
> > + * Test loadFromXML with supplementary characters
> > + */
> > + static void testLoadWithSupplementaryCharacters() throws
> IOException {
> > + System.out.println("testLoadWithSupplementaryCharacters");
> > +
> > + Properties expected = new Properties();
> > + expected.put("\uD834\uDD1E", "\uD834\uDD1E");
> > +
> > + String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
> > + "<!DOCTYPE properties SYSTEM \"
> > http://java.sun.com/dtd/properties.dtd\">" +
> > + "<properties>" +
> > + "<entry key=\"𝄞\">𝄞</entry>" +
> > + "</properties>";
> > +
> > + ByteArrayInputStream in = new
> > ByteArrayInputStream(s.getBytes("UTF-8"));
> > + Properties props = new Properties();
> > + props.loadFromXML(in);
> > +
> > + if (!props.equals(expected)) {
> > + System.err.println("loaded: " + props + ", expected: " +
> > expected);
> > + throw new RuntimeException("Test failed");
> > + }
> > + }
> > +
> > + /**
> > + * Test storeToXML with supplementary characters
> > + */
> > + static void testStoreWithSupplementaryCharacters() throws
> IOException {
> > + System.out.println("testStoreWithSupplementaryCharacters");
> > +
> > + String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
> > System.lineSeparator() +
> > + "<!DOCTYPE properties SYSTEM \"
> > http://java.sun.com/dtd/properties.dtd\">" + System.lineSeparator() +
> > + "<properties>" + System.lineSeparator() +
> > + "<entry key=\"Musical Symbols\">𝄞</entry>" +
> > System.lineSeparator() +
> > + "</properties>" + System.lineSeparator();
> > +
> > + Properties props = new Properties();
> > + props.put("Musical Symbols", "\uD834\uDD1E");
> > + ByteArrayOutputStream out = new ByteArrayOutputStream();
> > + props.storeToXML(out, null, "UTF-8");
> > +
> > + String outXml = out.toString("UTF-8");
> > +
> > + if (!outXml.equals(s)) {
> > + System.err.println("stored: " + outXml + ", expected: " +
> s);
> > + throw new RuntimeException("Test failed");
> > + }
> > + }
> > +
> > public static void main(String[] args) throws IOException {
> >
> > testLoadAndStore("UTF-8", false);
> > testLoadAndStore("UTF-16", false);
> > testLoadAndStore("UTF-16BE", false);
> > @@ -252,10 +303,12 @@
> > testLoadAndStore("UTF-16BE", true);
> > testLoadAndStore("UTF-16LE", true);
> > testLoadWithoutEncoding();
> > testLoadWithBadEncoding();
> > testStoreWithBadEncoding();
> > + testLoadWithSupplementaryCharacters();
> > + testStoreWithSupplementaryCharacters();
> >
> > // malformed documents
> > String src = System.getProperty("test.src");
> > String subdir = "invalidxml";
> > Path dir = (src == null) ? Paths.get(subdir) : Paths.get(src,
> > subdir);
> >
> > On Sun, 31 Oct 2021 at 19:38, Anirvan Sarkar <powers.anirvan at gmail.com>
> > wrote:
> >
> >> Hi,
> >>
> >> Properties.loadFromXML/storeToXML works incorrectly for supplementary
> >> characters after JDK-8042889[1] was integrated in JDK 9.
> >>
> >> Properties.storeToXML now generates incorrect character references for
> >> supplementary characters. This is similar to JDK-8145974[2] which was
> fixed
> >> in the java.xml module in JDK 9.
> >>
> >> Properties.loadFromXML now fails to parse character references for
> >> supplementary characters and throws InvalidPropertiesFormatException.
> >>
> >> Sample program which demonstrates these issues is in the JBS[3].
> >>
> >> Proposed patch to fix this issue is attached.
> >> JDK Tier 1 tests are all green.
> >> If it looks fine then I will create a pull request based on this.
> >>
> >> [1] : https://bugs.openjdk.java.net/browse/JDK-8042889
> >> [2] : https://bugs.openjdk.java.net/browse/JDK-8145974
> >> [3] : https://bugs.openjdk.java.net/browse/JDK-8276207
> >>
> >> --
> >> Anirvan
> >>
> >
>
>
--
Anirvan
More information about the core-libs-dev
mailing list