| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.tools.lint.checks; |
| |
| import static com.android.SdkConstants.ATTR_LOCALE; |
| import static com.android.SdkConstants.ATTR_TRANSLATABLE; |
| import static com.android.SdkConstants.FD_RES_VALUES; |
| import static com.android.SdkConstants.TAG_PLURALS; |
| import static com.android.SdkConstants.TAG_STRING; |
| import static com.android.SdkConstants.TAG_STRING_ARRAY; |
| import static com.android.SdkConstants.TOOLS_URI; |
| import static com.android.tools.lint.checks.TypoLookup.isLetter; |
| import static com.google.common.base.Objects.equal; |
| |
| import com.android.annotations.NonNull; |
| import com.android.annotations.Nullable; |
| import com.android.ide.common.resources.configuration.LocaleQualifier; |
| import com.android.resources.ResourceFolderType; |
| import com.android.tools.lint.detector.api.Category; |
| import com.android.tools.lint.detector.api.Context; |
| import com.android.tools.lint.detector.api.Implementation; |
| import com.android.tools.lint.detector.api.Issue; |
| import com.android.tools.lint.detector.api.LintUtils; |
| import com.android.tools.lint.detector.api.Location; |
| import com.android.tools.lint.detector.api.ResourceXmlDetector; |
| import com.android.tools.lint.detector.api.Scope; |
| import com.android.tools.lint.detector.api.Severity; |
| import com.android.tools.lint.detector.api.Speed; |
| import com.android.tools.lint.detector.api.TextFormat; |
| import com.android.tools.lint.detector.api.XmlContext; |
| import com.google.common.base.Charsets; |
| |
| import org.w3c.dom.Attr; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.List; |
| |
| /** |
| * Check which looks for likely typos in Strings. |
| * <p> |
| * TODO: |
| * <ul> |
| * <li> Add check of Java String literals too! |
| * <li> Add support for <b>additional</b> languages. The typo detector is now |
| * multilingual and looks for typos-*locale*.txt files to use. However, |
| * we need to seed it with additional typo databases. I did some searching |
| * and came up with some alternatives. Here's the strategy I used: |
| * Used Google Translate to translate "Wikipedia Common Misspellings", and |
| * then I went to google.no, google.fr etc searching with that translation, and |
| * came up with what looks like wikipedia language local lists of typos. |
| * This is how I found the Norwegian one for example: |
| * <br> |
| * http://no.wikipedia.org/wiki/Wikipedia:Liste_over_alminnelige_stavefeil/Maskinform |
| * <br> |
| * Here are some additional possibilities not yet processed: |
| * <ul> |
| * <li> French: http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Liste_de_fautes_d'orthographe_courantes |
| * (couldn't find a machine-readable version there?) |
| * <li> Swedish: |
| * http://sv.wikipedia.org/wiki/Wikipedia:Lista_%C3%B6ver_vanliga_spr%C3%A5kfel |
| * (couldn't find a machine-readable version there?) |
| * <li> German |
| * http://de.wikipedia.org/wiki/Wikipedia:Liste_von_Tippfehlern/F%C3%BCr_Maschinen |
| * </ul> |
| * <li> Consider also digesting files like |
| * http://sv.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/Typos |
| * See http://en.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/User_manual. |
| * </ul> |
| */ |
| public class TypoDetector extends ResourceXmlDetector { |
| @Nullable private TypoLookup mLookup; |
| @Nullable private String mLastLanguage; |
| @Nullable private String mLastRegion; |
| @Nullable private String mLanguage; |
| @Nullable private String mRegion; |
| |
| /** The main issue discovered by this detector */ |
| public static final Issue ISSUE = Issue.create( |
| "Typos", //$NON-NLS-1$ |
| "Spelling error", |
| |
| "This check looks through the string definitions, and if it finds any words " + |
| "that look like likely misspellings, they are flagged.", |
| Category.MESSAGES, |
| 7, |
| Severity.WARNING, |
| new Implementation( |
| TypoDetector.class, |
| Scope.RESOURCE_FILE_SCOPE)); |
| |
| /** Constructs a new detector */ |
| public TypoDetector() { |
| } |
| |
| @Override |
| public boolean appliesTo(@NonNull ResourceFolderType folderType) { |
| return folderType == ResourceFolderType.VALUES; |
| } |
| |
| /** Look up the locale and region from the given parent folder name and store it |
| * in {@link #mLanguage} and {@link #mRegion} */ |
| private void initLocale(@NonNull String parent) { |
| mLanguage = null; |
| mRegion = null; |
| |
| if (parent.equals(FD_RES_VALUES)) { |
| return; |
| } |
| |
| LocaleQualifier locale = LintUtils.getLocale(parent); |
| if (locale != null) { |
| mLanguage = locale.getLanguage(); |
| mRegion = locale.hasRegion() ? locale.getRegion() : null; |
| } |
| } |
| |
| @Override |
| public void beforeCheckFile(@NonNull Context context) { |
| initLocale(context.file.getParentFile().getName()); |
| if (mLanguage == null) { |
| // Check to see if the user has specified the language for this folder |
| // using a tools:locale attribute |
| if (context instanceof XmlContext) { |
| Element root = ((XmlContext) context).document.getDocumentElement(); |
| if (root != null) { |
| String locale = root.getAttributeNS(TOOLS_URI, ATTR_LOCALE); |
| if (locale != null && !locale.isEmpty()) { |
| initLocale(FD_RES_VALUES + '-' + locale); |
| } |
| } |
| } |
| |
| if (mLanguage == null) { |
| mLanguage = "en"; //$NON-NLS-1$ |
| } |
| } |
| |
| if (!equal(mLastLanguage, mLanguage) || !equal(mLastRegion, mRegion)) { |
| mLookup = TypoLookup.get(context.getClient(), mLanguage, mRegion); |
| mLastLanguage = mLanguage; |
| mLastRegion = mRegion; |
| } |
| } |
| |
| @NonNull |
| @Override |
| public Speed getSpeed() { |
| return Speed.NORMAL; |
| } |
| |
| @Override |
| public Collection<String> getApplicableElements() { |
| return Arrays.asList( |
| TAG_STRING, |
| TAG_STRING_ARRAY, |
| TAG_PLURALS |
| ); |
| } |
| |
| @Override |
| public void visitElement(@NonNull XmlContext context, @NonNull Element element) { |
| if (mLookup == null) { |
| return; |
| } |
| |
| visit(context, element, element); |
| } |
| |
| private void visit(XmlContext context, Element parent, Node node) { |
| if (node.getNodeType() == Node.TEXT_NODE) { |
| // TODO: Figure out how to deal with entities |
| check(context, parent, node, node.getNodeValue()); |
| } else { |
| NodeList children = node.getChildNodes(); |
| for (int i = 0, n = children.getLength(); i < n; i++) { |
| visit(context, parent, children.item(i)); |
| } |
| } |
| } |
| |
| private void check(XmlContext context, Element element, Node node, String text) { |
| int max = text.length(); |
| int index = 0; |
| int lastWordBegin = -1; |
| int lastWordEnd = -1; |
| boolean checkedTypos = false; |
| |
| for (; index < max; index++) { |
| char c = text.charAt(index); |
| if (!Character.isWhitespace(c)) { |
| if (c == '@' || (c == '?')) { |
| // Don't look for typos in resource references; they are not |
| // user visible anyway |
| return; |
| } |
| break; |
| } |
| } |
| |
| while (index < max) { |
| for (; index < max; index++) { |
| char c = text.charAt(index); |
| if (c == '\\') { |
| index++; |
| } else if (Character.isLetter(c)) { |
| break; |
| } |
| } |
| if (index >= max) { |
| return; |
| } |
| int begin = index; |
| for (; index < max; index++) { |
| char c = text.charAt(index); |
| if (c == '\\') { |
| index++; |
| break; |
| } else if (!Character.isLetter(c)) { |
| break; |
| } else if (text.charAt(index) >= 0x80) { |
| // Switch to UTF-8 handling for this string |
| if (checkedTypos) { |
| // If we've already checked words we may have reported typos |
| // so create a substring from the current word and on. |
| byte[] utf8Text = text.substring(begin).getBytes(Charsets.UTF_8); |
| check(context, element, node, utf8Text, 0, utf8Text.length, text, begin); |
| } else { |
| // If all we've done so far is skip whitespace (common scenario) |
| // then no need to substring the text, just re-search with the |
| // UTF-8 routines |
| byte[] utf8Text = text.getBytes(Charsets.UTF_8); |
| check(context, element, node, utf8Text, 0, utf8Text.length, text, 0); |
| } |
| return; |
| } |
| } |
| |
| int end = index; |
| checkedTypos = true; |
| assert mLookup != null; |
| List<String> replacements = mLookup.getTypos(text, begin, end); |
| if (replacements != null && isTranslatable(element)) { |
| reportTypo(context, node, text, begin, replacements); |
| } |
| |
| checkRepeatedWords(context, element, node, text, lastWordBegin, lastWordEnd, begin, |
| end); |
| |
| lastWordBegin = begin; |
| lastWordEnd = end; |
| index = end + 1; |
| } |
| } |
| |
| private static void checkRepeatedWords(XmlContext context, Element element, Node node, |
| String text, int lastWordBegin, int lastWordEnd, int begin, int end) { |
| if (lastWordBegin != -1 && end - begin == lastWordEnd - lastWordBegin |
| && end - begin > 1) { |
| // See whether we have a repeated word |
| boolean different = false; |
| for (int i = lastWordBegin, j = begin; i < lastWordEnd; i++, j++) { |
| if (text.charAt(i) != text.charAt(j)) { |
| different = true; |
| break; |
| } |
| } |
| if (!different && onlySpace(text, lastWordEnd, begin) && isTranslatable(element)) { |
| reportRepeatedWord(context, node, text, lastWordBegin, begin, end); |
| } |
| } |
| } |
| |
| private static boolean onlySpace(String text, int fromInclusive, int toExclusive) { |
| for (int i = fromInclusive; i < toExclusive; i++) { |
| if (!Character.isWhitespace(text.charAt(i))) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| private void check(XmlContext context, Element element, Node node, byte[] utf8Text, |
| int byteStart, int byteEnd, String text, int charStart) { |
| int lastWordBegin = -1; |
| int lastWordEnd = -1; |
| int index = byteStart; |
| while (index < byteEnd) { |
| // Find beginning of word |
| while (index < byteEnd) { |
| byte b = utf8Text[index]; |
| if (b == '\\') { |
| index++; |
| charStart++; |
| if (index < byteEnd) { |
| b = utf8Text[index]; |
| } |
| } else if (isLetter(b)) { |
| break; |
| } |
| index++; |
| if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) { |
| // First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX |
| charStart++; |
| } |
| } |
| |
| if (index >= byteEnd) { |
| return; |
| } |
| int charEnd = charStart; |
| int begin = index; |
| |
| // Find end of word. Unicode has the nice property that even 2nd, 3rd and 4th |
| // bytes won't match these ASCII characters (because the high bit must be set there) |
| while (index < byteEnd) { |
| byte b = utf8Text[index]; |
| if (b == '\\') { |
| index++; |
| charEnd++; |
| if (index < byteEnd) { |
| b = utf8Text[index++]; |
| if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) { |
| charEnd++; |
| } |
| } |
| break; |
| } else if (!isLetter(b)) { |
| break; |
| } |
| index++; |
| if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) { |
| // First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX |
| charEnd++; |
| } |
| } |
| |
| int end = index; |
| List<String> replacements = mLookup.getTypos(utf8Text, begin, end); |
| if (replacements != null && isTranslatable(element)) { |
| reportTypo(context, node, text, charStart, replacements); |
| } |
| |
| checkRepeatedWords(context, element, node, text, lastWordBegin, lastWordEnd, charStart, |
| charEnd); |
| |
| lastWordBegin = charStart; |
| lastWordEnd = charEnd; |
| charStart = charEnd; |
| } |
| } |
| |
| private static boolean isTranslatable(Element element) { |
| Attr translatable = element.getAttributeNode(ATTR_TRANSLATABLE); |
| return translatable == null || Boolean.valueOf(translatable.getValue()); |
| } |
| |
| /** Report the typo found at the given offset and suggest the given replacements */ |
| private static void reportTypo(XmlContext context, Node node, String text, int begin, |
| List<String> replacements) { |
| if (replacements.size() < 2) { |
| return; |
| } |
| |
| String typo = replacements.get(0); |
| String word = text.substring(begin, begin + typo.length()); |
| |
| String first = null; |
| String message; |
| |
| boolean isCapitalized = Character.isUpperCase(word.charAt(0)); |
| StringBuilder sb = new StringBuilder(40); |
| for (int i = 1, n = replacements.size(); i < n; i++) { |
| String replacement = replacements.get(i); |
| if (first == null) { |
| first = replacement; |
| } |
| if (sb.length() > 0) { |
| sb.append(" or "); |
| } |
| sb.append('"'); |
| if (isCapitalized) { |
| sb.append(Character.toUpperCase(replacement.charAt(0))); |
| sb.append(replacement.substring(1)); |
| } else { |
| sb.append(replacement); |
| } |
| sb.append('"'); |
| } |
| |
| if (first != null && first.equalsIgnoreCase(word)) { |
| if (first.equals(word)) { |
| return; |
| } |
| message = String.format( |
| "\"%1$s\" is usually capitalized as \"%2$s\"", |
| word, first); |
| } else { |
| message = String.format( |
| "\"%1$s\" is a common misspelling; did you mean %2$s ?", |
| word, sb.toString()); |
| } |
| |
| int end = begin + word.length(); |
| context.report(ISSUE, node, context.getLocation(node, begin, end), message); |
| } |
| |
| /** Reports a repeated word */ |
| private static void reportRepeatedWord(XmlContext context, Node node, String text, |
| int lastWordBegin, |
| int begin, int end) { |
| String message = String.format( |
| "Repeated word \"%1$s\" in message: possible typo", |
| text.substring(begin, end)); |
| Location location = context.getLocation(node, lastWordBegin, end); |
| context.report(ISSUE, node, location, message); |
| } |
| |
| /** Returns the suggested replacements, if any, for the given typo. The error |
| * message <b>must</b> be one supplied by lint. |
| * |
| * @param errorMessage the error message |
| * @param format the format of the error message |
| * @return a list of replacement words suggested by the error message |
| */ |
| @Nullable |
| public static List<String> getSuggestions(@NonNull String errorMessage, |
| @NonNull TextFormat format) { |
| errorMessage = format.toText(errorMessage); |
| |
| // The words are all in quotes; the first word is the misspelling, |
| // the other words are the suggested replacements |
| List<String> words = new ArrayList<String>(); |
| // Skip the typo |
| int index = errorMessage.indexOf('"'); |
| index = errorMessage.indexOf('"', index + 1); |
| index++; |
| |
| while (true) { |
| index = errorMessage.indexOf('"', index); |
| if (index == -1) { |
| break; |
| } |
| index++; |
| int start = index; |
| index = errorMessage.indexOf('"', index); |
| if (index == -1) { |
| index = errorMessage.length(); |
| } |
| words.add(errorMessage.substring(start, index)); |
| index++; |
| } |
| |
| return words; |
| } |
| |
| /** |
| * Returns the typo word in the error message from this detector |
| * |
| * @param errorMessage the error message produced earlier by this detector |
| * @param format the format of the error message |
| * @return the typo |
| */ |
| @Nullable |
| public static String getTypo(@NonNull String errorMessage, @NonNull TextFormat format) { |
| errorMessage = format.toText(errorMessage); |
| // The words are all in quotes |
| int index = errorMessage.indexOf('"'); |
| int start = index + 1; |
| index = errorMessage.indexOf('"', start); |
| if (index != -1) { |
| return errorMessage.substring(start, index); |
| } |
| |
| return null; |
| } |
| } |