blob: de6267823289ca1860de9c4ec671af390b6fb752 [file] [log] [blame]
/*
* Copyright 2000-2012 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intellij.util.text;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.openapi.vfs.CharsetToolkit;
import com.intellij.util.ArrayUtil;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
/**
* @author nik
*/
public class XmlCharsetDetector {
@NonNls private static final String XML_PROLOG_START = "<?xml";
@NonNls private static final byte[] XML_PROLOG_START_BYTES = CharsetToolkit.getUtf8Bytes(XML_PROLOG_START);
@NonNls private static final String ENCODING = "encoding";
@NonNls private static final byte[] ENCODING_BYTES = CharsetToolkit.getUtf8Bytes(ENCODING);
@NonNls private static final String XML_PROLOG_END = "?>";
@NonNls private static final byte[] XML_PROLOG_END_BYTES = CharsetToolkit.getUtf8Bytes(XML_PROLOG_END);
@Nullable
public static String extractXmlEncodingFromProlog(final byte[] bytes) {
int index = 0;
if (CharsetToolkit.hasUTF8Bom(bytes)) {
index = CharsetToolkit.UTF8_BOM.length;
}
index = skipWhiteSpace(index, bytes);
if (!ArrayUtil.startsWith(bytes, index, XML_PROLOG_START_BYTES)) return null;
index += XML_PROLOG_START_BYTES.length;
while (index < bytes.length) {
index = skipWhiteSpace(index, bytes);
if (ArrayUtil.startsWith(bytes, index, XML_PROLOG_END_BYTES)) return null;
if (ArrayUtil.startsWith(bytes, index, ENCODING_BYTES)) {
index += ENCODING_BYTES.length;
index = skipWhiteSpace(index, bytes);
if (index >= bytes.length || bytes[index] != '=') continue;
index++;
index = skipWhiteSpace(index, bytes);
if (index >= bytes.length || bytes[index] != '\'' && bytes[index] != '\"') continue;
byte quote = bytes[index];
index++;
StringBuilder encoding = new StringBuilder();
while (index < bytes.length) {
if (bytes[index] == quote) return encoding.toString();
encoding.append((char)bytes[index++]);
}
}
index++;
}
return null;
}
@Nullable
public static String extractXmlEncodingFromProlog(@NotNull String text) {
int index = 0;
index = skipWhiteSpace(index, text);
if (!StringUtil.startsWith(text, index, XML_PROLOG_START)) return null;
index += XML_PROLOG_START.length();
while (index < text.length()) {
index = skipWhiteSpace(index, text);
if (StringUtil.startsWith(text, index, XML_PROLOG_END)) return null;
if (StringUtil.startsWith(text, index, ENCODING)) {
index += ENCODING.length();
index = skipWhiteSpace(index, text);
if (index >= text.length() || text.charAt(index) != '=') continue;
index++;
index = skipWhiteSpace(index, text);
if (index >= text.length()) continue;
char quote = text.charAt(index);
if (quote != '\'' && quote != '\"') continue;
index++;
StringBuilder encoding = new StringBuilder();
while (index < text.length()) {
char c = text.charAt(index);
if (c == quote) return encoding.toString();
encoding.append(c);
index++;
}
}
index++;
}
return null;
}
private static int skipWhiteSpace(int start, @NotNull byte[] bytes) {
while (start < bytes.length) {
char c = (char)bytes[start];
if (!Character.isWhitespace(c)) break;
start++;
}
return start;
}
private static int skipWhiteSpace(int start, @NotNull String text) {
while (start < text.length()) {
char c = text.charAt(start);
if (!Character.isWhitespace(c)) break;
start++;
}
return start;
}
}