lint/libs/lint-checks/src/main/java/com/android/tools/lint/checks/TypoDetector.java - platform/tools/base - Git at Google

 /*
  * Copyright (C) 2011 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.android.tools.lint.checks;

 import static com.android.SdkConstants.ATTR_TRANSLATABLE;
 import static com.android.SdkConstants.TAG_PLURALS;
 import static com.android.SdkConstants.TAG_STRING;
 import static com.android.SdkConstants.TAG_STRING_ARRAY;
 import static com.android.tools.lint.checks.TypoLookup.isLetter;
 import static com.google.common.base.Objects.equal;

 import com.android.annotations.NonNull;
 import com.android.annotations.Nullable;
 import com.android.ide.common.resources.configuration.LocaleQualifier;
 import com.android.resources.ResourceFolderType;
 import com.android.tools.lint.detector.api.Category;
 import com.android.tools.lint.detector.api.Context;
 import com.android.tools.lint.detector.api.Implementation;
 import com.android.tools.lint.detector.api.Issue;
 import com.android.tools.lint.detector.api.Lint;
 import com.android.tools.lint.detector.api.LintFix;
 import com.android.tools.lint.detector.api.Location;
 import com.android.tools.lint.detector.api.ResourceXmlDetector;
 import com.android.tools.lint.detector.api.Scope;
 import com.android.tools.lint.detector.api.Severity;
 import com.android.tools.lint.detector.api.XmlContext;
 import com.android.utils.StringHelper;
 import com.google.common.base.Charsets;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import org.w3c.dom.Attr;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;

 /**
  * Check which looks for likely typos in Strings.
  *
  * <p>TODO:
  *
  * <ul>
  *   <li>Add check of Java String literals too!
  *   <li>Add support for <b>additional</b> languages. The typo detector is now multilingual and
  *       looks for typos-*locale*.txt files to use. However, we need to seed it with additional typo
  *       databases. I did some searching and came up with some alternatives. Here's the strategy I
  *       used: Used Google Translate to translate "Wikipedia Common Misspellings", and then I went
  *       to google.no, google.fr etc searching with that translation, and came up with what looks
  *       like wikipedia language local lists of typos. This is how I found the Norwegian one for
  *       example: <br>
  *       http://no.wikipedia.org/wiki/Wikipedia:Liste_over_alminnelige_stavefeil/Maskinform <br>
  *       Here are some additional possibilities not yet processed:
  *       <ul>
  *         <li>French:
  *             http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Liste_de_fautes_d'orthographe_courantes
  *             (couldn't find a machine-readable version there?)
  *         <li>Swedish: http://sv.wikipedia.org/wiki/Wikipedia:Lista_%C3%B6ver_vanliga_spr%C3%A5kfel
  *             (couldn't find a machine-readable version there?)
  *         <li>German
  *             http://de.wikipedia.org/wiki/Wikipedia:Liste_von_Tippfehlern/F%C3%BCr_Maschinen
  *       </ul>
  *   <li>Consider also digesting files like
  *       http://sv.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/Typos See
  *       http://en.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/User_manual.
  * </ul>
  */
 public class TypoDetector extends ResourceXmlDetector {
     @Nullable private TypoLookup mLookup;
     @Nullable private String mLastLanguage;
     @Nullable private String mLastRegion;
     @Nullable private String mLanguage;
     @Nullable private String mRegion;

     /** The main issue discovered by this detector */
     public static final Issue ISSUE =
             Issue.create(
                     "Typos",
                     "Spelling error",
                     "This check looks through the string definitions, and if it finds any words "
                             + "that look like likely misspellings, they are flagged.",
                     Category.MESSAGES,
                     7,
                     Severity.WARNING,
                     new Implementation(TypoDetector.class, Scope.RESOURCE_FILE_SCOPE));

     /** Constructs a new detector */
     public TypoDetector() {}

     @Override
     public boolean appliesTo(@NonNull ResourceFolderType folderType) {
         return folderType == ResourceFolderType.VALUES;
     }

     /**
      * Look up the locale and region from the given parent folder name and store it in {@link
      * #mLanguage} and {@link #mRegion}
      */
     private void initLocale(@NonNull XmlContext context) {
         mLanguage = null;
         mRegion = null;

         LocaleQualifier locale = Lint.getLocale(context);
         if (locale != null && locale.hasLanguage()) {
             mLanguage = locale.getLanguage();
             mRegion = locale.hasRegion() ? locale.getRegion() : null;
         }
     }

     @Override
     public void beforeCheckFile(@NonNull Context context) {
         initLocale((XmlContext) context);
         if (mLanguage == null) {
             mLanguage = "en";
         }

         if (!equal(mLastLanguage, mLanguage) || !equal(mLastRegion, mRegion)) {
             mLookup = TypoLookup.Companion.get(context.getClient(), mLanguage, mRegion);
             mLastLanguage = mLanguage;
             mLastRegion = mRegion;
         }
     }

     @Override
     public Collection<String> getApplicableElements() {
         return Arrays.asList(TAG_STRING, TAG_STRING_ARRAY, TAG_PLURALS);
     }

     @Override
     public void visitElement(@NonNull XmlContext context, @NonNull Element element) {
         if (mLookup == null) {
             return;
         }

         visit(context, element, element);
     }

     private void visit(XmlContext context, Element parent, Node node) {
         if (node.getNodeType() == Node.TEXT_NODE) {
             // TODO: Figure out how to deal with entities
             check(context, parent, node, node.getNodeValue());
         } else {
             NodeList children = node.getChildNodes();
             for (int i = 0, n = children.getLength(); i < n; i++) {
                 visit(context, parent, children.item(i));
             }
         }
     }

     private void check(XmlContext context, Element element, Node node, String text) {
         int max = text.length();
         int index = 0;
         int lastWordBegin = -1;
         int lastWordEnd = -1;
         boolean checkedTypos = false;

         for (; index < max; index++) {
             char c = text.charAt(index);
             if (!Character.isWhitespace(c)) {
                 if (c == '@' || (c == '?')) {
                     // Don't look for typos in resource references; they are not
                     // user visible anyway
                     return;
                 }
                 break;
             }
         }

         while (index < max) {
             for (; index < max; index++) {
                 char c = text.charAt(index);
                 if (c == '\\') {
                     index++;
                 } else if (Character.isLetter(c)) {
                     break;
                 }
             }
             if (index >= max) {
                 return;
             }
             int begin = index;
             for (; index < max; index++) {
                 char c = text.charAt(index);
                 if (c == '\\') {
                     index++;
                     break;
                 } else if (!Character.isLetter(c) && c != '_') {
                     break;
                 } else if (text.charAt(index) >= 0x80) {
                     // Switch to UTF-8 handling for this string
                     if (checkedTypos) {
                         // If we've already checked words we may have reported typos
                         // so create a substring from the current word and on.
                         byte[] utf8Text = text.substring(begin).getBytes(Charsets.UTF_8);
                         check(context, element, node, utf8Text, 0, utf8Text.length, text, begin);
                     } else {
                         // If all we've done so far is skip whitespace (common scenario)
                         // then no need to substring the text, just re-search with the
                         // UTF-8 routines
                         byte[] utf8Text = text.getBytes(Charsets.UTF_8);
                         check(context, element, node, utf8Text, 0, utf8Text.length, text, 0);
                     }
                     return;
                 }
             }

             int end = index;
             checkedTypos = true;
             assert mLookup != null;
             List<String> replacements = mLookup.getTypos(text, begin, end);
             if (replacements != null && isTranslatable(element)) {
                 reportTypo(context, node, text, begin, replacements);
             }

             checkRepeatedWords(
                     context, element, node, text, lastWordBegin, lastWordEnd, begin, end);

             lastWordBegin = begin;
             lastWordEnd = end;
             index = end + 1;
         }
     }

     private void checkRepeatedWords(
             XmlContext context,
             Element element,
             Node node,
             String text,
             int lastWordBegin,
             int lastWordEnd,
             int begin,
             int end) {
         if (lastWordBegin != -1 && end - begin == lastWordEnd - lastWordBegin && end - begin > 1) {
             // See whether we have a repeated word
             boolean different = false;
             for (int i = lastWordBegin, j = begin; i < lastWordEnd; i++, j++) {
                 if (text.charAt(i) != text.charAt(j)) {
                     different = true;
                     break;
                 }
             }
             if (!different && onlySpace(text, lastWordEnd, begin) && isTranslatable(element)) {
                 reportRepeatedWord(context, node, text, lastWordBegin, begin, end);
             }
         }
     }

     private static boolean onlySpace(String text, int fromInclusive, int toExclusive) {
         for (int i = fromInclusive; i < toExclusive; i++) {
             if (!Character.isWhitespace(text.charAt(i))) {
                 return false;
             }
         }

         return true;
     }

     private void check(
             XmlContext context,
             Element element,
             Node node,
             byte[] utf8Text,
             int byteStart,
             int byteEnd,
             String text,
             int charStart) {
         int lastWordBegin = -1;
         int lastWordEnd = -1;
         int index = byteStart;
         while (index < byteEnd) {
             // Find beginning of word
             while (index < byteEnd) {
                 byte b = utf8Text[index];
                 if (b == '\\') {
                     index++;
                     charStart++;
                     if (index < byteEnd) {
                         b = utf8Text[index];
                     }
                 } else if (isLetter(b)) {
                     break;
                 }
                 index++;
                 if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) {
                     // First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX
                     charStart++;
                 }
             }

             if (index >= byteEnd) {
                 return;
             }
             int charEnd = charStart;
             int begin = index;

             // Find end of word. Unicode has the nice property that even 2nd, 3rd and 4th
             // bytes won't match these ASCII characters (because the high bit must be set there)
             while (index < byteEnd) {
                 byte b = utf8Text[index];
                 if (b == '\\') {
                     index++;
                     charEnd++;
                     if (index < byteEnd) {
                         b = utf8Text[index++];
                         if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) {
                             charEnd++;
                         }
                     }
                     break;
                 } else if (!isLetter(b)) {
                     break;
                 }
                 index++;
                 if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) {
                     // First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX
                     charEnd++;
                 }
             }

             int end = index;
             List<String> replacements = mLookup.getTypos(utf8Text, begin, end);
             if (replacements != null && isTranslatable(element)) {
                 reportTypo(context, node, text, charStart, replacements);
             }

             checkRepeatedWords(
                     context, element, node, text, lastWordBegin, lastWordEnd, charStart, charEnd);

             lastWordBegin = charStart;
             lastWordEnd = charEnd;
             charStart = charEnd;
         }
     }

     private static boolean isTranslatable(Element element) {
         Attr translatable = element.getAttributeNode(ATTR_TRANSLATABLE);
         return translatable == null || Boolean.valueOf(translatable.getValue());
     }

     /** Report the typo found at the given offset and suggest the given replacements */
     private void reportTypo(
             XmlContext context, Node node, String text, int begin, List<String> replacements) {
         if (replacements.size() < 2) {
             return;
         }

         String typo = replacements.get(0);
         String word = text.substring(begin, begin + typo.length());

         String first = null;
         String message;

         LintFix.GroupBuilder fixBuilder = fix().alternatives();
         boolean isCapitalized = Character.isUpperCase(word.charAt(0));
         StringBuilder sb = new StringBuilder(40);
         for (int i = 1, n = replacements.size(); i < n; i++) {
             String replacement = replacements.get(i);
             if (first == null) {
                 first = replacement;
             }
             if (sb.length() > 0) {
                 sb.append(" or ");
             }
             sb.append('"');

             if (isCapitalized) {
                 replacement = StringHelper.usLocaleCapitalize(replacement);
             }
             sb.append(replacement);
             fixBuilder.add(
                     fix().name("Replace with \"" + replacement + "\"")
                             .replace()
                             .text(word)
                             .with(replacement)
                             .build());
             sb.append('"');
         }
         LintFix fix = fixBuilder.build();

         if (first != null && first.equalsIgnoreCase(word)) {
             if (first.equals(word)) {
                 return;
             }
             message = String.format("\"%1$s\" is usually capitalized as \"%2$s\"", word, first);
         } else {
             message =
                     String.format(
                             "\"%1$s\" is a common misspelling; did you mean %2$s ?",
                             word, sb.toString());
         }

         int end = begin + word.length();
         context.report(ISSUE, node, context.getLocation(node, begin, end), message, fix);
     }

     /** Reports a repeated word */
     private void reportRepeatedWord(
             XmlContext context, Node node, String text, int lastWordBegin, int begin, int end) {
         String word = text.substring(begin, end);

         if (isAllowed(word)) {
             return;
         }

         String message = String.format("Repeated word \"%1$s\" in message: possible typo", word);

         String replace;
         if (lastWordBegin > 1 && text.charAt(lastWordBegin - 1) == ' ') {
             replace = ' ' + word;
         } else if (end < text.length() - 1 && text.charAt(end) == ' ') {
             replace = word + ' ';
         } else {
             replace = word;
         }
         LintFix fix = fix().name("Delete repeated word").replace().text(replace).with("").build();

         Location location = context.getLocation(node, lastWordBegin, end);
         context.report(ISSUE, node, location, message, fix);
     }

     private static boolean isAllowed(@NonNull String word) {
         // See https://en.wikipedia.org/wiki/Reduplication

         // Capitalized: names or place names. There are various places
         // with repeated words, such as Pago Pago
         // https://en.wikipedia.org/wiki/List_of_reduplicated_place_names
         if (Character.isUpperCase(word.charAt(0))) {
             return true;
         }

         // Some known/common-ish exceptions:
         switch (word) {
             case "that": // e.g. "I know that that will not work."
             case "yadda":
             case "bye":
             case "choo":
             case "night":
             case "dot":
             case "tsk":
             case "no":
                 return true;
         }
         return false;
     }
 }
	/*
	* Copyright (C) 2011 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.android.tools.lint.checks;

	import static com.android.SdkConstants.ATTR_TRANSLATABLE;
	import static com.android.SdkConstants.TAG_PLURALS;
	import static com.android.SdkConstants.TAG_STRING;
	import static com.android.SdkConstants.TAG_STRING_ARRAY;
	import static com.android.tools.lint.checks.TypoLookup.isLetter;
	import static com.google.common.base.Objects.equal;

	import com.android.annotations.NonNull;
	import com.android.annotations.Nullable;
	import com.android.ide.common.resources.configuration.LocaleQualifier;
	import com.android.resources.ResourceFolderType;
	import com.android.tools.lint.detector.api.Category;
	import com.android.tools.lint.detector.api.Context;
	import com.android.tools.lint.detector.api.Implementation;
	import com.android.tools.lint.detector.api.Issue;
	import com.android.tools.lint.detector.api.Lint;
	import com.android.tools.lint.detector.api.LintFix;
	import com.android.tools.lint.detector.api.Location;
	import com.android.tools.lint.detector.api.ResourceXmlDetector;
	import com.android.tools.lint.detector.api.Scope;
	import com.android.tools.lint.detector.api.Severity;
	import com.android.tools.lint.detector.api.XmlContext;
	import com.android.utils.StringHelper;
	import com.google.common.base.Charsets;
	import java.util.Arrays;
	import java.util.Collection;
	import java.util.List;
	import org.w3c.dom.Attr;
	import org.w3c.dom.Element;
	import org.w3c.dom.Node;
	import org.w3c.dom.NodeList;

	/**
	* Check which looks for likely typos in Strings.
	*
	* <p>TODO:
	*
	* <ul>
	* <li>Add check of Java String literals too!
	* <li>Add support for <b>additional</b> languages. The typo detector is now multilingual and
	* looks for typos-locale.txt files to use. However, we need to seed it with additional typo
	* databases. I did some searching and came up with some alternatives. Here's the strategy I
	* used: Used Google Translate to translate "Wikipedia Common Misspellings", and then I went
	* to google.no, google.fr etc searching with that translation, and came up with what looks
	* like wikipedia language local lists of typos. This is how I found the Norwegian one for
	* example: <br>
	* http://no.wikipedia.org/wiki/Wikipedia:Liste_over_alminnelige_stavefeil/Maskinform <br>
	* Here are some additional possibilities not yet processed:
	* <ul>
	* <li>French:
	* http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Liste_de_fautes_d'orthographe_courantes
	* (couldn't find a machine-readable version there?)
	* <li>Swedish: http://sv.wikipedia.org/wiki/Wikipedia:Lista_%C3%B6ver_vanliga_spr%C3%A5kfel
	* (couldn't find a machine-readable version there?)
	* <li>German
	* http://de.wikipedia.org/wiki/Wikipedia:Liste_von_Tippfehlern/F%C3%BCr_Maschinen
	* </ul>
	* <li>Consider also digesting files like
	* http://sv.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/Typos See
	* http://en.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/User_manual.
	* </ul>
	*/
	public class TypoDetector extends ResourceXmlDetector {
	@Nullable private TypoLookup mLookup;
	@Nullable private String mLastLanguage;
	@Nullable private String mLastRegion;
	@Nullable private String mLanguage;
	@Nullable private String mRegion;

	/** The main issue discovered by this detector */
	public static final Issue ISSUE =
	Issue.create(
	"Typos",
	"Spelling error",
	"This check looks through the string definitions, and if it finds any words "
	+ "that look like likely misspellings, they are flagged.",
	Category.MESSAGES,
	7,
	Severity.WARNING,
	new Implementation(TypoDetector.class, Scope.RESOURCE_FILE_SCOPE));

	/** Constructs a new detector */
	public TypoDetector() {}

	@Override
	public boolean appliesTo(@NonNull ResourceFolderType folderType) {
	return folderType == ResourceFolderType.VALUES;
	}

	/**
	* Look up the locale and region from the given parent folder name and store it in {@link
	* #mLanguage} and {@link #mRegion}
	*/
	private void initLocale(@NonNull XmlContext context) {
	mLanguage = null;
	mRegion = null;

	LocaleQualifier locale = Lint.getLocale(context);
	if (locale != null && locale.hasLanguage()) {
	mLanguage = locale.getLanguage();
	mRegion = locale.hasRegion() ? locale.getRegion() : null;
	}
	}

	@Override
	public void beforeCheckFile(@NonNull Context context) {
	initLocale((XmlContext) context);
	if (mLanguage == null) {
	mLanguage = "en";
	}

	if (!equal(mLastLanguage, mLanguage) \|\| !equal(mLastRegion, mRegion)) {
	mLookup = TypoLookup.Companion.get(context.getClient(), mLanguage, mRegion);
	mLastLanguage = mLanguage;
	mLastRegion = mRegion;
	}
	}

	@Override
	public Collection<String> getApplicableElements() {
	return Arrays.asList(TAG_STRING, TAG_STRING_ARRAY, TAG_PLURALS);
	}

	@Override
	public void visitElement(@NonNull XmlContext context, @NonNull Element element) {
	if (mLookup == null) {
	return;
	}

	visit(context, element, element);
	}

	private void visit(XmlContext context, Element parent, Node node) {
	if (node.getNodeType() == Node.TEXT_NODE) {
	// TODO: Figure out how to deal with entities
	check(context, parent, node, node.getNodeValue());
	} else {
	NodeList children = node.getChildNodes();
	for (int i = 0, n = children.getLength(); i < n; i++) {
	visit(context, parent, children.item(i));
	}
	}
	}

	private void check(XmlContext context, Element element, Node node, String text) {
	int max = text.length();
	int index = 0;
	int lastWordBegin = -1;
	int lastWordEnd = -1;
	boolean checkedTypos = false;

	for (; index < max; index++) {
	char c = text.charAt(index);
	if (!Character.isWhitespace(c)) {
	if (c == '@' \|\| (c == '?')) {
	// Don't look for typos in resource references; they are not
	// user visible anyway
	return;
	}
	break;
	}
	}

	while (index < max) {
	for (; index < max; index++) {
	char c = text.charAt(index);
	if (c == '\\') {
	index++;
	} else if (Character.isLetter(c)) {
	break;
	}
	}
	if (index >= max) {
	return;
	}
	int begin = index;
	for (; index < max; index++) {
	char c = text.charAt(index);
	if (c == '\\') {
	index++;
	break;
	} else if (!Character.isLetter(c) && c != '_') {
	break;
	} else if (text.charAt(index) >= 0x80) {
	// Switch to UTF-8 handling for this string
	if (checkedTypos) {
	// If we've already checked words we may have reported typos
	// so create a substring from the current word and on.
	byte[] utf8Text = text.substring(begin).getBytes(Charsets.UTF_8);
	check(context, element, node, utf8Text, 0, utf8Text.length, text, begin);
	} else {
	// If all we've done so far is skip whitespace (common scenario)
	// then no need to substring the text, just re-search with the
	// UTF-8 routines
	byte[] utf8Text = text.getBytes(Charsets.UTF_8);
	check(context, element, node, utf8Text, 0, utf8Text.length, text, 0);
	}
	return;
	}
	}

	int end = index;
	checkedTypos = true;
	assert mLookup != null;
	List<String> replacements = mLookup.getTypos(text, begin, end);
	if (replacements != null && isTranslatable(element)) {
	reportTypo(context, node, text, begin, replacements);
	}

	checkRepeatedWords(
	context, element, node, text, lastWordBegin, lastWordEnd, begin, end);

	lastWordBegin = begin;
	lastWordEnd = end;
	index = end + 1;
	}
	}

	private void checkRepeatedWords(
	XmlContext context,
	Element element,
	Node node,
	String text,
	int lastWordBegin,
	int lastWordEnd,
	int begin,
	int end) {
	if (lastWordBegin != -1 && end - begin == lastWordEnd - lastWordBegin && end - begin > 1) {
	// See whether we have a repeated word
	boolean different = false;
	for (int i = lastWordBegin, j = begin; i < lastWordEnd; i++, j++) {
	if (text.charAt(i) != text.charAt(j)) {
	different = true;
	break;
	}
	}
	if (!different && onlySpace(text, lastWordEnd, begin) && isTranslatable(element)) {
	reportRepeatedWord(context, node, text, lastWordBegin, begin, end);
	}
	}
	}

	private static boolean onlySpace(String text, int fromInclusive, int toExclusive) {
	for (int i = fromInclusive; i < toExclusive; i++) {
	if (!Character.isWhitespace(text.charAt(i))) {
	return false;
	}
	}

	return true;
	}

	private void check(
	XmlContext context,
	Element element,
	Node node,
	byte[] utf8Text,
	int byteStart,
	int byteEnd,
	String text,
	int charStart) {
	int lastWordBegin = -1;
	int lastWordEnd = -1;
	int index = byteStart;
	while (index < byteEnd) {
	// Find beginning of word
	while (index < byteEnd) {
	byte b = utf8Text[index];
	if (b == '\\') {
	index++;
	charStart++;
	if (index < byteEnd) {
	b = utf8Text[index];
	}
	} else if (isLetter(b)) {
	break;
	}
	index++;
	if ((b & 0x80) == 0 \|\| (b & 0xC0) == 0xC0) {
	// First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX
	charStart++;
	}
	}

	if (index >= byteEnd) {
	return;
	}
	int charEnd = charStart;
	int begin = index;

	// Find end of word. Unicode has the nice property that even 2nd, 3rd and 4th
	// bytes won't match these ASCII characters (because the high bit must be set there)
	while (index < byteEnd) {
	byte b = utf8Text[index];
	if (b == '\\') {
	index++;
	charEnd++;
	if (index < byteEnd) {
	b = utf8Text[index++];
	if ((b & 0x80) == 0 \|\| (b & 0xC0) == 0xC0) {
	charEnd++;
	}
	}
	break;
	} else if (!isLetter(b)) {
	break;
	}
	index++;
	if ((b & 0x80) == 0 \|\| (b & 0xC0) == 0xC0) {
	// First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX
	charEnd++;
	}
	}

	int end = index;
	List<String> replacements = mLookup.getTypos(utf8Text, begin, end);
	if (replacements != null && isTranslatable(element)) {
	reportTypo(context, node, text, charStart, replacements);
	}

	checkRepeatedWords(
	context, element, node, text, lastWordBegin, lastWordEnd, charStart, charEnd);

	lastWordBegin = charStart;
	lastWordEnd = charEnd;
	charStart = charEnd;
	}
	}

	private static boolean isTranslatable(Element element) {
	Attr translatable = element.getAttributeNode(ATTR_TRANSLATABLE);
	return translatable == null \|\| Boolean.valueOf(translatable.getValue());
	}

	/** Report the typo found at the given offset and suggest the given replacements */
	private void reportTypo(
	XmlContext context, Node node, String text, int begin, List<String> replacements) {
	if (replacements.size() < 2) {
	return;
	}

	String typo = replacements.get(0);
	String word = text.substring(begin, begin + typo.length());

	String first = null;
	String message;

	LintFix.GroupBuilder fixBuilder = fix().alternatives();
	boolean isCapitalized = Character.isUpperCase(word.charAt(0));
	StringBuilder sb = new StringBuilder(40);
	for (int i = 1, n = replacements.size(); i < n; i++) {
	String replacement = replacements.get(i);
	if (first == null) {
	first = replacement;
	}
	if (sb.length() > 0) {
	sb.append(" or ");
	}
	sb.append('"');

	if (isCapitalized) {
	replacement = StringHelper.usLocaleCapitalize(replacement);
	}
	sb.append(replacement);
	fixBuilder.add(
	fix().name("Replace with \"" + replacement + "\"")
	.replace()
	.text(word)
	.with(replacement)
	.build());
	sb.append('"');
	}
	LintFix fix = fixBuilder.build();

	if (first != null && first.equalsIgnoreCase(word)) {
	if (first.equals(word)) {
	return;
	}
	message = String.format("\"%1$s\" is usually capitalized as \"%2$s\"", word, first);
	} else {
	message =
	String.format(
	"\"%1$s\" is a common misspelling; did you mean %2$s ?",
	word, sb.toString());
	}

	int end = begin + word.length();
	context.report(ISSUE, node, context.getLocation(node, begin, end), message, fix);
	}

	/** Reports a repeated word */
	private void reportRepeatedWord(
	XmlContext context, Node node, String text, int lastWordBegin, int begin, int end) {
	String word = text.substring(begin, end);

	if (isAllowed(word)) {
	return;
	}

	String message = String.format("Repeated word \"%1$s\" in message: possible typo", word);

	String replace;
	if (lastWordBegin > 1 && text.charAt(lastWordBegin - 1) == ' ') {
	replace = ' ' + word;
	} else if (end < text.length() - 1 && text.charAt(end) == ' ') {
	replace = word + ' ';
	} else {
	replace = word;
	}
	LintFix fix = fix().name("Delete repeated word").replace().text(replace).with("").build();

	Location location = context.getLocation(node, lastWordBegin, end);
	context.report(ISSUE, node, location, message, fix);
	}

	private static boolean isAllowed(@NonNull String word) {
	// See https://en.wikipedia.org/wiki/Reduplication

	// Capitalized: names or place names. There are various places
	// with repeated words, such as Pago Pago
	// https://en.wikipedia.org/wiki/List_of_reduplicated_place_names
	if (Character.isUpperCase(word.charAt(0))) {
	return true;
	}

	// Some known/common-ish exceptions:
	switch (word) {
	case "that": // e.g. "I know that that will not work."
	case "yadda":
	case "bye":
	case "choo":
	case "night":
	case "dot":
	case "tsk":
	case "no":
	return true;
	}
	return false;
	}
	}