blob: 84fad158f656003a2930de9b2f9dc5534835e50f [file] [log] [blame]
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import android.text.TextUtils;
import android.util.Log;
import java.util.concurrent.ConcurrentHashMap;
public final class AutoCorrection {
private static final boolean DBG = LatinImeLogger.sDBG;
private static final String TAG = AutoCorrection.class.getSimpleName();
private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
private AutoCorrection() {
// Purely static class: can't instantiate.
}
public static boolean isValidWord(final ConcurrentHashMap<String, Dictionary> dictionaries,
CharSequence word, boolean ignoreCase) {
if (TextUtils.isEmpty(word)) {
return false;
}
final CharSequence lowerCasedWord = word.toString().toLowerCase();
for (final String key : dictionaries.keySet()) {
final Dictionary dictionary = dictionaries.get(key);
// It's unclear how realistically 'dictionary' can be null, but the monkey is somehow
// managing to get null in here. Presumably the language is changing to a language with
// no main dictionary and the monkey manages to type a whole word before the thread
// that reads the dictionary is started or something?
// Ideally the passed map would come out of a {@link java.util.concurrent.Future} and
// would be immutable once it's finished initializing, but concretely a null test is
// probably good enough for the time being.
if (null == dictionary) continue;
if (dictionary.isValidWord(word)
|| (ignoreCase && dictionary.isValidWord(lowerCasedWord))) {
return true;
}
}
return false;
}
public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries,
CharSequence word) {
if (TextUtils.isEmpty(word)) {
return Dictionary.NOT_A_PROBABILITY;
}
int maxFreq = -1;
for (final String key : dictionaries.keySet()) {
final Dictionary dictionary = dictionaries.get(key);
if (null == dictionary) continue;
final int tempFreq = dictionary.getFrequency(word);
if (tempFreq >= maxFreq) {
maxFreq = tempFreq;
}
}
return maxFreq;
}
// Returns true if this is in any of the dictionaries.
public static boolean isInTheDictionary(
final ConcurrentHashMap<String, Dictionary> dictionaries,
final CharSequence word, final boolean ignoreCase) {
return isValidWord(dictionaries, word, ignoreCase);
}
public static boolean suggestionExceedsAutoCorrectionThreshold(SuggestedWordInfo suggestion,
CharSequence consideredWord, float autoCorrectionThreshold) {
if (null != suggestion) {
// Shortlist a whitelisted word
if (suggestion.mKind == SuggestedWordInfo.KIND_WHITELIST) return true;
final int autoCorrectionSuggestionScore = suggestion.mScore;
// TODO: when the normalized score of the first suggestion is nearly equals to
// the normalized score of the second suggestion, behave less aggressive.
final float normalizedScore = BinaryDictionary.calcNormalizedScore(
consideredWord.toString(), suggestion.mWord.toString(),
autoCorrectionSuggestionScore);
if (DBG) {
Log.d(TAG, "Normalized " + consideredWord + "," + suggestion + ","
+ autoCorrectionSuggestionScore + ", " + normalizedScore
+ "(" + autoCorrectionThreshold + ")");
}
if (normalizedScore >= autoCorrectionThreshold) {
if (DBG) {
Log.d(TAG, "Auto corrected by S-threshold.");
}
return !shouldBlockAutoCorrectionBySafetyNet(consideredWord.toString(),
suggestion.mWord);
}
}
return false;
}
// TODO: Resolve the inconsistencies between the native auto correction algorithms and
// this safety net
public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord,
final CharSequence suggestion) {
// Safety net for auto correction.
// Actually if we hit this safety net, it's a bug.
// If user selected aggressive auto correction mode, there is no need to use the safety
// net.
// If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH,
// we should not use net because relatively edit distance can be big.
final int typedWordLength = typedWord.length();
if (typedWordLength < MINIMUM_SAFETY_NET_CHAR_LENGTH) {
return false;
}
final int maxEditDistanceOfNativeDictionary =
(typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString());
if (DBG) {
Log.d(TAG, "Autocorrected edit distance = " + distance
+ ", " + maxEditDistanceOfNativeDictionary);
}
if (distance > maxEditDistanceOfNativeDictionary) {
if (DBG) {
Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion);
Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. "
+ "Turning off auto-correction.");
}
return true;
} else {
return false;
}
}
}