8276207: Properties.loadFromXML/storeToXML works incorrectly for supplementary characters

Anirvan Sarkar powers.anirvan at gmail.com
Sun Oct 31 10:56:56 UTC 2021


Hi,

Since it seems that the mailing list is scrubbing attachments, patch is
mentioned inline below.

diff a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
--- a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
+++ b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights
reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
@@ -1991,24 +1991,22 @@
                         case ';':
                             //          Convert the character entity to a
character
                             try {
                                 int i = Integer.parseInt(
                                         new String(mBuff, idx + 1,
mBuffIdx - idx), 10);
-                                if (i >= 0xffff) {
-                                    panic(FAULT);
+                                //          Restore the buffer offset
+                                mBuffIdx = idx - 1;
+                                for(char character : Character.toChars(i))
{
+                                    if (character == ' ' || mInp.next !=
null) {
+                                        bappend(character, flag);
+                                    } else {
+                                        bappend(character);
+                                    }
                                 }
-                                ch = (char) i;
                             } catch (NumberFormatException nfe) {
                                 panic(FAULT);
                             }
-                            //          Restore the buffer offset
-                            mBuffIdx = idx - 1;
-                            if (ch == ' ' || mInp.next != null) {
-                                bappend(ch, flag);
-                            } else {
-                                bappend(ch);
-                            }
                             st = -1;
                             break;

                         case 'a':
                             //          If the entity buffer is empty and
ch == 'x'
@@ -2032,24 +2030,22 @@
                         case ';':
                             //          Convert the character entity to a
character
                             try {
                                 int i = Integer.parseInt(
                                         new String(mBuff, idx + 1,
mBuffIdx - idx), 16);
-                                if (i >= 0xffff) {
-                                    panic(FAULT);
+                                //          Restore the buffer offset
+                                mBuffIdx = idx - 1;
+                                for(char character : Character.toChars(i))
{
+                                    if (character == ' ' || mInp.next !=
null) {
+                                        bappend(character, flag);
+                                    } else {
+                                        bappend(character);
+                                    }
                                 }
-                                ch = (char) i;
                             } catch (NumberFormatException nfe) {
                                 panic(FAULT);
                             }
-                            //          Restore the buffer offset
-                            mBuffIdx = idx - 1;
-                            if (ch == ' ' || mInp.next != null) {
-                                bappend(ch, flag);
-                            } else {
-                                bappend(ch);
-                            }
                             st = -1;
                             break;

                         default:
                             panic(FAULT);
diff
a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
---
a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
+++
b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights
reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
@@ -358,10 +358,19 @@
      */
     public void setDoIndent(boolean doIndent) {
         _doIndent = doIndent;
     }

+    /**
+     * Writes character reference in hex format.
+     */
+    private void writeCharRef(int codePoint) throws XMLStreamException {
+        _writer.write(ENCODING_PREFIX);
+        _writer.write(Integer.toHexString(codePoint));
+        _writer.write(SEMICOLON);
+    }
+
     /**
      * Writes XML content to underlying writer. Escapes characters unless
      * escaping character feature is turned off.
      */
     private void writeXMLContent(char[] content, int start, int length,
boolean escapeChars)
@@ -381,14 +390,19 @@
             char ch = content[index];

             if (!_writer.canEncode(ch)) {
                 _writer.write(content, startWritePos, index -
startWritePos);

-                // Escape this char as underlying encoder cannot handle it
-                _writer.write(ENCODING_PREFIX);
-                _writer.write(Integer.toHexString(ch));
-                _writer.write(SEMICOLON);
+                // Check if current and next characters forms a surrogate
pair
+                // and escape it to avoid generation of invalid xml content
+                if ( index != end - 1 && Character.isSurrogatePair(ch,
content[index+1])) {
+                    writeCharRef(Character.toCodePoint(ch,
content[index+1]));
+                    index++;
+                } else {
+                    writeCharRef(ch);
+                }
+
                 startWritePos = index + 1;
                 continue;
             }

             switch (ch) {
@@ -453,14 +467,19 @@
             char ch = content.charAt(index);

             if (!_writer.canEncode(ch)) {
                 _writer.write(content, startWritePos, index -
startWritePos);

-                // Escape this char as underlying encoder cannot handle it
-                _writer.write(ENCODING_PREFIX);
-                _writer.write(Integer.toHexString(ch));
-                _writer.write(SEMICOLON);
+                // Check if current and next characters forms a surrogate
pair
+                // and escape it to avoid generation of invalid xml content
+                if ( index != end - 1 && Character.isSurrogatePair(ch,
content.charAt(index+1))) {
+                    writeCharRef(Character.toCodePoint(ch,
content.charAt(index+1)));
+                    index++;
+                } else {
+                    writeCharRef(ch);
+                }
+
                 startWritePos = index + 1;
                 continue;
             }

             switch (ch) {
diff a/test/jdk/java/util/Properties/LoadAndStoreXML.java
b/test/jdk/java/util/Properties/LoadAndStoreXML.java
--- a/test/jdk/java/util/Properties/LoadAndStoreXML.java
+++ b/test/jdk/java/util/Properties/LoadAndStoreXML.java
@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.
@@ -21,11 +21,11 @@
  * questions.
  */

 /*
  * @test
- * @bug 8000354 8000685 8004371 8043119
+ * @bug 8000354 8000685 8004371 8043119 8276207
  * @summary Basic test of storeToXML and loadToXML
  * @run main/othervm -Djava.security.manager=allow LoadAndStoreXML
  */

 import java.io.ByteArrayInputStream;
@@ -136,10 +136,11 @@
         props.put("k1", "foo");
         props.put("k2", "bar");
         props.put("k3",
"\u0020\u0391\u0392\u0393\u0394\u0395\u0396\u0397");
         props.put("k4", "\u7532\u9aa8\u6587");
         props.put("k5", "<java.home>/conf/jaxp.properties");
+        props.put("k6", "\uD834\uDD1E");

         TestOutputStream out = new TestOutputStream();
         props.storeToXML(out, null, encoding);
         if (!out.isOpen())
             throw new RuntimeException("OutputStream closed by
storeToXML");
@@ -241,10 +242,60 @@
                 }
             }
         }
     }

+    /**
+     * Test loadFromXML with supplementary characters
+     */
+    static void testLoadWithSupplementaryCharacters() throws IOException {
+        System.out.println("testLoadWithSupplementaryCharacters");
+
+        Properties expected = new Properties();
+        expected.put("\uD834\uDD1E", "\uD834\uDD1E");
+
+        String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                   "<!DOCTYPE properties SYSTEM \"
http://java.sun.com/dtd/properties.dtd\">" +
+                   "<properties>" +
+                   "<entry key=\"𝄞\">&#x1d11e;</entry>" +
+                   "</properties>";
+
+        ByteArrayInputStream in = new
ByteArrayInputStream(s.getBytes("UTF-8"));
+        Properties props = new Properties();
+        props.loadFromXML(in);
+
+        if (!props.equals(expected)) {
+            System.err.println("loaded: " + props + ", expected: " +
expected);
+            throw new RuntimeException("Test failed");
+        }
+    }
+
+    /**
+     * Test storeToXML with supplementary characters
+     */
+    static void testStoreWithSupplementaryCharacters() throws IOException {
+        System.out.println("testStoreWithSupplementaryCharacters");
+
+        String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
System.lineSeparator() +
+                   "<!DOCTYPE properties SYSTEM \"
http://java.sun.com/dtd/properties.dtd\">" + System.lineSeparator() +
+                   "<properties>" + System.lineSeparator() +
+                   "<entry key=\"Musical Symbols\">&#x1d11e;</entry>" +
System.lineSeparator() +
+                   "</properties>" + System.lineSeparator();
+
+        Properties props = new Properties();
+        props.put("Musical Symbols", "\uD834\uDD1E");
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        props.storeToXML(out, null, "UTF-8");
+
+        String outXml = out.toString("UTF-8");
+
+        if (!outXml.equals(s)) {
+            System.err.println("stored: " + outXml + ", expected: " + s);
+            throw new RuntimeException("Test failed");
+        }
+    }
+
     public static void main(String[] args) throws IOException {

         testLoadAndStore("UTF-8", false);
         testLoadAndStore("UTF-16", false);
         testLoadAndStore("UTF-16BE", false);
@@ -252,10 +303,12 @@
         testLoadAndStore("UTF-16BE", true);
         testLoadAndStore("UTF-16LE", true);
         testLoadWithoutEncoding();
         testLoadWithBadEncoding();
         testStoreWithBadEncoding();
+        testLoadWithSupplementaryCharacters();
+        testStoreWithSupplementaryCharacters();

         // malformed documents
         String src = System.getProperty("test.src");
         String subdir = "invalidxml";
         Path dir = (src == null) ? Paths.get(subdir) : Paths.get(src,
subdir);

On Sun, 31 Oct 2021 at 19:38, Anirvan Sarkar <powers.anirvan at gmail.com>
wrote:

> Hi,
>
> Properties.loadFromXML/storeToXML works incorrectly for supplementary
> characters after JDK-8042889[1] was integrated in JDK 9.
>
> Properties.storeToXML now generates incorrect character references for
> supplementary characters. This is similar to JDK-8145974[2] which was fixed
> in the java.xml module in JDK 9.
>
> Properties.loadFromXML now fails to parse character references for
> supplementary characters and throws InvalidPropertiesFormatException.
>
> Sample program which demonstrates these issues is in the JBS[3].
>
> Proposed patch to fix this issue is attached.
> JDK Tier 1 tests are all green.
> If it looks fine then I will create a pull request based on this.
>
> [1] : https://bugs.openjdk.java.net/browse/JDK-8042889
> [2] : https://bugs.openjdk.java.net/browse/JDK-8145974
> [3] : https://bugs.openjdk.java.net/browse/JDK-8276207
>
> --
> Anirvan
>


-- 
Anirvan


More information about the core-libs-dev mailing list