blob: bab32d9cc8c7d8207ae830489ad12fe4082d0b0a [file] [log] [blame]
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.tools.lint.checks;
import static com.android.utils.CharSequences.lastIndexOf;
import com.android.annotations.NonNull;
import com.android.resources.ResourceFolderType;
import com.android.tools.lint.detector.api.Category;
import com.android.tools.lint.detector.api.Implementation;
import com.android.tools.lint.detector.api.Issue;
import com.android.tools.lint.detector.api.LintFix;
import com.android.tools.lint.detector.api.Location;
import com.android.tools.lint.detector.api.ResourceXmlDetector;
import com.android.tools.lint.detector.api.Scope;
import com.android.tools.lint.detector.api.Severity;
import com.android.tools.lint.detector.api.XmlContext;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.w3c.dom.Document;
/**
* Checks that the encoding used in resource files is always UTF-8
*
* <p>TODO: Add a check which looks at files which do not specify the encoding and check the
* contents to see if it contains characters where it's ambiguous.
*/
public class Utf8Detector extends ResourceXmlDetector {
/** Detects non-utf8 encodings */
public static final Issue ISSUE =
Issue.create(
"EnforceUTF8",
"Encoding used in resource files is not UTF-8",
"XML supports encoding in a wide variety of character sets. However, not all "
+ "tools handle the XML encoding attribute correctly, and nearly all Android "
+ "apps use UTF-8, so by using UTF-8 you can protect yourself against subtle "
+ "bugs when using non-ASCII characters.\n"
+ "\n"
+ "In particular, the Android Gradle build system will merge resource XML files "
+ "assuming the resource files are using UTF-8 encoding.\n",
Category.I18N,
5,
Severity.FATAL,
new Implementation(Utf8Detector.class, Scope.RESOURCE_FILE_SCOPE));
/** See http://www.w3.org/TR/REC-xml/#NT-EncodingDecl */
private static final Pattern ENCODING_PATTERN = Pattern.compile("encoding=['\"](\\S*)['\"]");
/** Constructs a new {@link Utf8Detector} */
public Utf8Detector() {}
@Override
public void visitDocument(@NonNull XmlContext context, @NonNull Document document) {
if (context.getResourceFolderType() == ResourceFolderType.RAW) {
return;
}
CharSequence xml = context.getContents();
if (xml == null) {
return;
}
// AAPT: The prologue must be in the first line
int lineEnd = 0;
int max = xml.length();
for (; lineEnd < max; lineEnd++) {
char c = xml.charAt(lineEnd);
if (c == '\n' || c == '\r') {
break;
}
}
for (int i = 16; i < lineEnd - 5; i++) { // +4: Skip at least <?xml encoding="
if ((xml.charAt(i) == 'u' || xml.charAt(i) == 'U')
&& (xml.charAt(i + 1) == 't' || xml.charAt(i + 1) == 'T')
&& (xml.charAt(i + 2) == 'f' || xml.charAt(i + 2) == 'F')
&& (xml.charAt(i + 3) == '-' || xml.charAt(i + 3) == '_')
&& (xml.charAt(i + 4) == '8')) {
return;
}
}
int encodingIndex = lastIndexOf(xml, "encoding", lineEnd);
if (encodingIndex != -1) {
Matcher matcher = ENCODING_PATTERN.matcher(xml);
if (matcher.find(encodingIndex)) {
String encoding = matcher.group(1);
Location location =
Location.create(context.file, xml, matcher.start(1), matcher.end(1));
LintFix fix = fix().replace().all().with("utf-8").build();
context.report(
ISSUE,
null,
location,
String.format(
"%1$s: Not using UTF-8 as the file encoding. This can lead to subtle "
+ "bugs with non-ascii characters",
encoding),
fix);
}
}
}
}