Wrong encoding after XML identity transformation

huizhe wang huizhe.wang at oracle.com
Thu Apr 17 15:43:54 UTC 2014


Hi Nico,

The JDK is a few patches short of Xalan 2.7.1. At least one of the 
patches caused performance regression, which was the main reason they 
were not brought in. We plan to fix the issue and bring JDK to Xalan 
version 2.7.1.

Could you file at https://bugs.openjdk.java.net/browse/JDK? Please 
select category "xml" and javax.xml.transform.

Thanks,
Joe

On 3/29/2014 12:47 PM, Nico R. wrote:
> Hello again!
>
> A few hours ago, I wrote:
> […]
>> I’m attaching my test code, which I hope is correct and readable.
> It seems that the list software stripped off the attachment with the
> test program, so here it is again, inline: :-/
>
>
>
>
> // This document is encoded in UTF-8, with no BOM and with LF line endings.
>
> import java.io.ByteArrayInputStream;
> import java.io.ByteArrayOutputStream;
> import java.io.PrintStream;
>
> import java.util.ArrayList;
> import java.util.List;
>
> import javax.xml.parsers.DocumentBuilder;
> import javax.xml.parsers.DocumentBuilderFactory;
>
> import javax.xml.transform.OutputKeys;
> import javax.xml.transform.Transformer;
> import javax.xml.transform.TransformerFactory;
> import javax.xml.transform.TransformerFactoryConfigurationError;
> import javax.xml.transform.dom.DOMSource;
> import javax.xml.transform.stream.StreamResult;
>
> import org.w3c.dom.Document;
> import org.w3c.dom.Element;
> import org.w3c.dom.Text;
>
> public class TransformerTest {
>
>      /** convenience method, prints a byte array in human-readable ASCII
> chars */
>      private static void printArray(byte[] data, PrintStream out) {
>          if (data == null) {
>              out.println("(null)");
>              return;
>          }
>
>          if (data.length == 0) {
>              out.println("(empty array)");
>              return;
>          }
>
>          for (int i = 0; ; ) {
>              byte b = data[i];
>              if (b >= 0x20 && b <= 0x7F) {
>                  out.print(" '" + (char)b + "'");
>              } else {
>                  out.format("0x%02X", (int)b & 0xFF);
>              }
>
>              if (++i == data.length) break;
>
>              if ((i % 16) == 0) {
>                  out.println(',');
>              } else {
>                  out.print(", ");
>              }
>          }
>          out.println();
>      }
>
>      /**
>       * parses a {@code Document} from an {@code InputStream} (and not from a
>       * {@code StringReader}, in order to make sure it’s not a problem when
>       * parsing internal Unicode {@code String}s)
>       */
>      private static Document makeDocument() throws Exception {
>          DocumentBuilder builder =
>                  DocumentBuilderFactory.newInstance().newDocumentBuilder();
>
>          byte[] docBytes = {
>              '<', '?', 'x', 'm', 'l', ' ',
>
>              'v', 'e', 'r', 's', 'i', 'o', 'n', '=',
>              '"', '1', '.', '0', '"', ' ',
>
>          // document is encoded in US-ASCII, but labelled as a superset
>              'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g', '=',
>              '"', 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '"',
>
>              '?', '>',
>
>              '<', 'x', '/', '>'
>          };
>
>          return builder.parse(new ByteArrayInputStream(docBytes));
>      }
>
>      /** prepares a list of transformer factories to test */
>      private static List<TransformerFactory> makeTransformerFactories() {
>          String[] facNames = {
>              "org.apache.xalan.processor.TransformerFactoryImpl",
>              "org.apache.xalan.xsltc.trax.TransformerFactoryImpl",
>
> "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl"
>          };
>
>          List<TransformerFactory> transFactories = new ArrayList<>();
>
>          for (String facName : facNames) {
>              try {
>                  TransformerFactory factory =
>                          TransformerFactory.newInstance(facName, null);
>                  transFactories.add(factory);
>              } catch (TransformerFactoryConfigurationError ex) {
>                  System.out.println("cannot build instance of " + facName);
>              }
>          }
>
>          //transFactories.add(TransformerFactory.newInstance());  //
> default impl
>
>          return transFactories;
>      }
>
>      private static byte[] transformWith(TransformerFactory transFac,
> Document doc) throws Exception {
>          Transformer trans = transFac.newTransformer();
>          trans.setOutputProperty(OutputKeys.METHOD, "xml");
>          trans.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
>          System.out.println("transformer factory: " +
> transFac.getClass().getName());
>          System.out.println("transformer: " + trans.getClass().getName());
>
>          ByteArrayOutputStream bout = new ByteArrayOutputStream(128);
>
>          trans.transform(new DOMSource(doc), new StreamResult(bout));
>
>          byte[] data = bout.toByteArray();
>
>          return data;
>      }
>
>      public static void main(String[] t) throws Exception {
>          Document doc = makeDocument();
>
>          System.out.println(
>                  "encoding declared in document : " +
>                      doc.getXmlEncoding());
>
>
>          // now insert a text node containing a character which can be
>          // represented in ISO-8859-1, as well as (differently) in UTF-8,
> but not
>          // in US-ASCII
>
>          Element dstRoot = doc.getDocumentElement();
>          Text text = doc.createTextNode("schön");
>          dstRoot.insertBefore(text, null);
>          System.out.println("root element text: " +
> dstRoot.getTextContent());
>
>
>          List<TransformerFactory> factories = makeTransformerFactories();
>          for (TransformerFactory factory : factories) {
>              System.out.println();
>              byte[] bytes = transformWith(factory, doc);
>
>              System.out.println("resulting bytes:");
>              printArray(bytes, System.out);
>          }
>      }
>
> }
>
>
>
>




More information about the core-libs-dev mailing list