Replace BOM detection and XML guess logic with BOMInputStream

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/io/trunk@1004092 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/java/org/apache/commons/io/input/XmlStreamReader.java b/src/java/org/apache/commons/io/input/XmlStreamReader.java
index 3db3ca3..c17383a 100644
--- a/src/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -32,6 +32,8 @@
 import java.util.regex.Matcher;
 import java.text.MessageFormat;
 
+import org.apache.commons.io.ByteOrderMark;
+
 /**
  * Character stream that handles all the necessary Voodo to figure out the
  * charset encoding of the XML document within the stream.
@@ -75,6 +77,12 @@
 
     private static final String EBCDIC = "CP1047";
 
+    private static final ByteOrderMark XML_UTF_8    = new ByteOrderMark(UTF_8,    0x3C, 0x3F, 0x78, 0x6D);
+    private static final ByteOrderMark XML_UTF_16BE = new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F);
+    private static final ByteOrderMark XML_UTF_16LE = new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00);
+    private static final ByteOrderMark XML_EBCDIC   = new ByteOrderMark(EBCDIC,   0x4C, 0x6F, 0xA7, 0x94);
+
+
     private static String staticDefaultEncoding = null;
 
     private Reader reader;
@@ -406,9 +414,10 @@
 
     private void doRawStream(InputStream is, boolean lenient)
             throws IOException {
-        BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
-        String bomEnc = getBOMEncoding(pis);
-        String xmlGuessEnc = getXMLGuessEncoding(pis);
+        BOMInputStream bom = createBomStream(new BufferedInputStream(is, BUFFER_SIZE)); 
+        BOMInputStream pis = createXmlStream(bom);
+        String bomEnc      = (bom.hasBOM() ? bom.getBOM().getCharsetName() : null);
+        String xmlGuessEnc = (pis.hasBOM() ? pis.getBOM().getCharsetName() : null);
         String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
         String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, pis);
         prepareReader(pis, encoding);
@@ -416,17 +425,30 @@
 
     private void doHttpStream(InputStream is, String httpContentType,
             boolean lenient) throws IOException {
-        BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
+        BOMInputStream bom = createBomStream(new BufferedInputStream(is, BUFFER_SIZE)); 
+        BOMInputStream pis = createXmlStream(bom);
         String cTMime = getContentTypeMime(httpContentType);
         String cTEnc = getContentTypeEncoding(httpContentType);
-        String bomEnc = getBOMEncoding(pis);
-        String xmlGuessEnc = getXMLGuessEncoding(pis);
+        String bomEnc      = (bom.hasBOM() ? bom.getBOM().getCharsetName() : null);
+        String xmlGuessEnc = (pis.hasBOM() ? pis.getBOM().getCharsetName() : null);
         String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
         String encoding = calculateHttpEncoding(cTMime, cTEnc, bomEnc,
                 xmlGuessEnc, xmlEnc, pis, lenient);
         prepareReader(pis, encoding);
     }
 
+    private BOMInputStream createBomStream(InputStream delegate) {
+        BOMInputStream bis =
+            new BOMInputStream(delegate, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE);
+        return bis;
+    }
+
+    private BOMInputStream createXmlStream(InputStream delegate) {
+        BOMInputStream bis =
+            new BOMInputStream(delegate, true, XML_UTF_8, XML_UTF_16BE, XML_UTF_16LE, XML_EBCDIC);
+        return bis;
+    }
+
     private void prepareReader(InputStream is, String encoding)
             throws IOException {
         reader = new InputStreamReader(is, encoding);
@@ -556,70 +578,12 @@
         return encoding;
     }
 
-    // returns the BOM in the stream, NULL if not present,
-    // if there was BOM the in the stream it is consumed
-    private static String getBOMEncoding(BufferedInputStream is)
-            throws IOException {
-        String encoding = null;
-        int[] bytes = new int[3];
-        is.mark(3);
-        bytes[0] = is.read();
-        bytes[1] = is.read();
-        bytes[2] = is.read();
-
-        if (bytes[0] == 0xFE && bytes[1] == 0xFF) {
-            encoding = UTF_16BE;
-            is.reset();
-            is.read();
-            is.read();
-        } else if (bytes[0] == 0xFF && bytes[1] == 0xFE) {
-            encoding = UTF_16LE;
-            is.reset();
-            is.read();
-            is.read();
-        } else if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
-            encoding = UTF_8;
-        } else {
-            is.reset();
-        }
-        return encoding;
-    }
-
-    // returns the best guess for the encoding by looking the first bytes of the
-    // stream, '<?'
-    private static String getXMLGuessEncoding(BufferedInputStream is)
-            throws IOException {
-        String encoding = null;
-        int[] bytes = new int[4];
-        is.mark(4);
-        bytes[0] = is.read();
-        bytes[1] = is.read();
-        bytes[2] = is.read();
-        bytes[3] = is.read();
-        is.reset();
-
-        if (bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00
-                && bytes[3] == 0x3F) {
-            encoding = UTF_16BE;
-        } else if (bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F
-                && bytes[3] == 0x00) {
-            encoding = UTF_16LE;
-        } else if (bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78
-                && bytes[3] == 0x6D) {
-            encoding = UTF_8;
-        } else if (bytes[0] == 0x4C && bytes[1] == 0x6F && bytes[2] == 0xA7
-                && bytes[3] == 0x94) {
-            encoding = EBCDIC;
-        }
-        return encoding;
-    }
-
     public static final Pattern ENCODING_PATTERN = Pattern.compile(
             "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))",
             Pattern.MULTILINE);
 
     // returns the encoding declared in the <?xml encoding=...?>, NULL if none
-    private static String getXmlProlog(BufferedInputStream is, String guessedEnc)
+    private static String getXmlProlog(InputStream is, String guessedEnc)
             throws IOException {
         String encoding = null;
         if (guessedEnc != null) {