/hg/icedtea6: Added encoding support for netx.
mwong at icedtea.classpath.org
mwong at icedtea.classpath.org
Thu Jul 8 12:59:57 PDT 2010
changeset 867af494861c in /hg/icedtea6
details: http://icedtea.classpath.org/hg/icedtea6?cmd=changeset;node=867af494861c
author: Man Lung Wong <mwong at redhat.com>
date: Thu Jul 08 15:54:35 2010 -0400
Added encoding support for netx.
diffstat:
2 files changed, 79 insertions(+), 3 deletions(-)
ChangeLog | 10 ++++
netx/net/sourceforge/jnlp/Parser.java | 72 +++++++++++++++++++++++++++++++--
diffs (110 lines):
diff -r cc1498495367 -r 867af494861c ChangeLog
--- a/ChangeLog Thu Jul 08 20:37:25 2010 +0100
+++ b/ChangeLog Thu Jul 08 15:54:35 2010 -0400
@@ -1,3 +1,13 @@ 2010-07-08 Andrew John Hughes <ahughes
+2010-07-08 Man Lung Wong <mwong at redhat.com>
+
+ * netx/net/sourceforge/jnlp/Parser.java:
+ (getRootNode): Used BufferedInputStream instead of InputStream to
+ have mark and reset method available. Passed the encoding to the
+ constructor of InputStreamReader, such that the stream will now
+ be parsed with the encoding the jnlp file is in.
+ (getEncoding): A new method which checks the first four bytes of input
+ and determines what the files encoding is.
+
2010-07-08 Andrew John Hughes <ahughes at redhat.com>
* Makefile.am:
diff -r cc1498495367 -r 867af494861c netx/net/sourceforge/jnlp/Parser.java
--- a/netx/net/sourceforge/jnlp/Parser.java Thu Jul 08 20:37:25 2010 +0100
+++ b/netx/net/sourceforge/jnlp/Parser.java Thu Jul 08 15:54:35 2010 -0400
@@ -1168,12 +1168,16 @@ class Parser {
Node document = new Node(TinyParser.parseXML(input));
Node jnlpNode = getChildNode(document, "jnlp"); // skip comments
*/
+
+ //A BufferedInputStream is used to allow marking and reseting
+ //of a stream.
+ BufferedInputStream bs = new BufferedInputStream(input);
/* NANO */
final XMLElement xml = new XMLElement();
final PipedInputStream pin = new PipedInputStream();
- final PipedOutputStream pout = new PipedOutputStream(pin);
- final InputStreamReader isr = new InputStreamReader(input);
+ final PipedOutputStream pout = new PipedOutputStream(pin);
+ final InputStreamReader isr = new InputStreamReader(bs, getEncoding(bs));
// Clean the jnlp xml file of all comments before passing
// it to the parser.
new Thread(
@@ -1196,7 +1200,69 @@ class Parser {
throw new ParseException(R("PBadXML"), ex);
}
}
+
+ /**
+ * Returns the name of the encoding used in this InputStream.
+ *
+ * @param input the InputStream
+ * @return a String representation of encoding
+ */
+ private static String getEncoding(InputStream input) throws IOException{
+ //Fixme: This only recognizes UTF-8, UTF-16, and
+ //UTF-32, which is enough to parse the prolog portion of xml to
+ //find out the exact encoding (if it exists). The reason being
+ //there could be other encodings, such as ISO 8859 which is 8-bits
+ //but it supports latin characters.
+ //So what needs to be done is to parse the prolog and retrieve
+ //the exact encoding from it.
+ int[] s = new int[4];
+ String encoding = "UTF-8";
+
+ //Determine what the first four bytes are and store
+ //them into an int array.
+ input.mark(4);
+ for (int i = 0; i < 4; i++) {
+ s[i] = input.read();
+ }
+ input.reset();
+
+ //Set the encoding base on what the first four bytes of the
+ //inputstream turn out to be (following the information from
+ //www.w3.org/TR/REC-xml/#sec-guessing).
+ if (s[0] == 255) {
+ if (s[1] == 254) {
+ if (s[2] != 0 || s[3] != 0) {
+ encoding = "UnicodeLittle";
+ } else {
+ encoding = "X-UTF-32LE-BOM";
+ }
+ }
+ } else if (s[0] == 254 && s[1] == 255 && (s[2] != 0 ||
+ s[3] != 0)) {
+ encoding = "UTF-16";
+
+ } else if (s[0] == 0 && s[1] == 0 && s[2] == 254 &&
+ s[3] == 255) {
+ encoding = "X-UTF-32BE-BOM";
+
+ } else if (s[0] == 0 && s[1] == 0 && s[2] == 0 &&
+ s[3] == 60) {
+ encoding = "UTF-32BE";
+
+ } else if (s[0] == 60 && s[1] == 0 && s[2] == 0 &&
+ s[3] == 0) {
+ encoding = "UTF-32LE";
+
+ } else if (s[0] == 0 && s[1] == 60 && s[2] == 0 &&
+ s[3] == 63) {
+ encoding = "UTF-16BE";
+ } else if (s[0] == 60 && s[1] == 0 && s[2] == 63 &&
+ s[3] == 0) {
+ encoding = "UTF-16LE";
+ }
+
+ return encoding;
+ }
}
-
More information about the distro-pkg-dev
mailing list