blob: f1e9f7370e92ef0e7b07ab7da5b687c55919d6db [file] [log] [blame]
package org.unicode.cldr.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.unicode.cldr.icu.LDMLConstants;
import org.xml.sax.InputSource;
public class DoctypeXmlStreamWrapper {
private static final String DOCTYPE = "<!DOCTYPE";
private static final char DOCTYPE_CHARS[] = DOCTYPE.toCharArray();
private static final byte DOCTYPE_BYTES[] = DOCTYPE.getBytes(StandardCharsets.UTF_8);
// the string to look for: xmlns="
private static final String XMLNS_EQUALS = LDMLConstants.XMLNS + "=\"";
/**
* Size of the input buffer, needs to be able to handle any expansion when the header is updated
*/
public static int BUFFER_MAX_SIZE = 1024;
/** Size of the first read, needs to contain xmlns="..." and be less than BUFFER_MAX_SIZE */
public static int BUFFER_READ_SIZE = 512;
/**
* Wrap an InputSource in something that will automatically insert a DTD reference in place of
* an xmlns directive.
*
* @throws IOException
*/
public static InputSource wrap(InputSource src) throws IOException {
Reader r = src.getCharacterStream();
InputStream is = src.getByteStream();
if (r != null) {
src.setCharacterStream(wrap(r));
} else if (is != null) {
src.setByteStream(wrap(is, src.getEncoding()));
} else {
throw new NullPointerException(
"Internal error: Character and Byte stream are both null");
}
return src;
}
/** wrap a byte oriented stream */
public static InputStream wrap(InputStream src, String encoding) throws IOException {
if (encoding == null) {
encoding = "UTF-8";
}
PushbackInputStream pr = new PushbackInputStream(src, BUFFER_MAX_SIZE);
byte inbuf[] = pr.readNBytes(BUFFER_READ_SIZE);
if (!hasDocType(inbuf, encoding)) {
inbuf = fixup(inbuf, encoding).getBytes(encoding);
}
pr.unread(inbuf);
return pr;
}
/** wrap a char oriented stream */
public static Reader wrap(Reader src) throws IOException {
PushbackReader pr = new PushbackReader(src, BUFFER_MAX_SIZE);
char inbuf[] = new char[BUFFER_READ_SIZE];
int readlen = pr.read(inbuf);
if (!hasDocType(inbuf, readlen)) {
char buf2[] = Arrays.copyOf(inbuf, readlen);
inbuf = fixup(new String(buf2)).toCharArray();
readlen = inbuf.length;
}
pr.unread(inbuf, 0, readlen);
return pr;
}
/** Fix an input byte array, including the DOCTYPE */
private static String fixup(byte[] inbuf, String encoding) {
try {
final String s = new String(inbuf, encoding);
return fixup(s);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("While parsing " + encoding, e);
}
}
/** Fix an input String, including DOCTYPE */
private static String fixup(final String s) {
// exit if nothing matches
for (final DtdType d : DtdType.values()) {
if (s.contains(XMLNS_EQUALS + d.getNsUrl())) {
return fixup(s, d);
}
}
// couldn't fix it, just pass through
return s;
}
/** Fix an input String given a specific DtdType. */
private static String fixup(String s, DtdType d) {
int n = s.indexOf("?>");
if (n == -1) {
throw new IllegalArgumentException("Invalid XML prefix: ?> not found.");
}
n += 2; // move the cut-point to the end of the "?>" sequence
final String doctype = "\n" + d.getDoctype() + "\n";
final String s2 = s.substring(0, n) + doctype + s.substring(n);
return s2;
}
private static final boolean hasDocType(byte[] inbuf, String encoding) {
if (inbuf == null || inbuf.length == 0) return false;
// Try as utf-8/ASCII bytes - this will be the common case
if (arrayContains(inbuf, inbuf.length, DOCTYPE_BYTES)) return true;
// break out here
if (encoding == null || encoding.equals("UTF-8")) return false;
// Try 2, with encoding
try {
final String s = new String(inbuf, encoding);
return s.contains(DOCTYPE);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("While parsing " + encoding, e);
}
}
private static final boolean hasDocType(char[] inbuf, int readlen) {
if (inbuf == null || readlen <= 0) {
return false;
}
return arrayContains(inbuf, readlen, DOCTYPE_CHARS);
}
private static boolean arrayContains(char[] inbuf, int inlen, char[] testbuf) {
final int testlen = testbuf.length;
int t = 0;
for (int i = 0; i < inlen; i++) {
if (inbuf[i] == testbuf[t]) {
t++;
if (t == testlen) return true;
} else {
t = 0;
}
}
return false;
}
private static boolean arrayContains(byte[] inbuf, int inlen, byte[] testbuf) {
final int testlen = testbuf.length;
int t = 0;
for (int i = 0; i < inlen; i++) {
if (inbuf[i] == testbuf[t]) {
t++;
if (t == testlen) return true;
} else {
t = 0;
}
}
return false;
}
}