Refine relationship between context word and possible OTP
Rather than simply looking for a context word anywhere in the message
containing a possible OTP, require that
A. The two be within 50 characters of each other (the maximum currently
seen is 35 characters
B. The two be in the same sentence, or the context word be in the
preceding sentence.
Also removes "confirm" and "confirmation" from the context words, as
there are a large number of "confirmation numbers" that are not OTP
related
Test: atest NotificationOtpDetectionHelperTest
Fixes: 367357496
Flag: EXEMPT bugfix
Change-Id: I43b291d2fe005147c232011bd525b6c922ffc733
diff --git a/java/src/android/ext/services/notification/NotificationOtpDetectionHelper.java b/java/src/android/ext/services/notification/NotificationOtpDetectionHelper.java
index 5b484aa..d8f1566 100644
--- a/java/src/android/ext/services/notification/NotificationOtpDetectionHelper.java
+++ b/java/src/android/ext/services/notification/NotificationOtpDetectionHelper.java
@@ -212,24 +212,50 @@
private static final String[] ENGLISH_CONTEXT_WORDS = new String[] {
"pin", "pass[-\\s]?(code|word)", "TAN", "otp", "2fa", "(two|2)[-\\s]?factor",
"log[-\\s]?in", "auth(enticat(e|ion))?", "code", "secret", "verif(y|ication)",
- "confirm(ation)?", "one(\\s|-)?time", "access", "validat(e|ion)"
+ "one(\\s|-)?time", "access", "validat(e|ion)"
};
/**
* Creates a regular expression to match any of a series of individual words, case insensitive.
+ * It also verifies the position of the word, relative to the OTP match
*/
private static Matcher createDictionaryRegex(String[] words) {
- StringBuilder regex = new StringBuilder("(?i)\\b(");
+ StringBuilder regex = new StringBuilder("(?i)(");
for (int i = 0; i < words.length; i++) {
- regex.append(words[i]);
+ regex.append(findContextWordWithCode(words[i]));
if (i != words.length - 1) {
regex.append("|");
}
}
- regex.append(")\\b");
+ regex.append(")");
return Pattern.compile(regex.toString()).matcher("");
}
+ /**
+ * Creates a regular expression that will find a context word, if that word occurs in the
+ * sentence preceding an OTP, or in the same sentence as an OTP (before or after). In both
+ * cases, the context word must occur within 50 characters of the suspected OTP
+ * @param contextWord The context word we expect to find around the OTP match
+ * @return A string representing a regular expression that will determine if we found a context
+ * word occurring before an otp match, or after it, but in the same sentence.
+ */
+ private static String findContextWordWithCode(String contextWord) {
+ String boundedContext = "\\b" + contextWord + "\\b";
+ // Asserts that we find the OTP code within 50 characters after the context word, with at
+ // most one sentence punctuation between the OTP code and the context word (i.e. they are
+ // in the same sentence, or the context word is in the previous sentence)
+ String contextWordBeforeOtpInSameOrPreviousSentence =
+ String.format("(%s(?=.{1,50}%s)[^.?!]*[.?!]?[^.?!]*%s)",
+ boundedContext, ALL_OTP, ALL_OTP);
+ // Asserts that we find the context word within 50 characters after the OTP code, with no
+ // sentence punctuation between the OTP code and the context word (i.e. they are in the same
+ // sentence)
+ String contextWordAfterOtpSameSentence =
+ String.format("(%s)[^.!?]{1,50}%s", ALL_OTP, boundedContext);
+ return String.format("(%s|%s)", contextWordBeforeOtpInSameOrPreviousSentence,
+ contextWordAfterOtpSameSentence);
+ }
+
static {
EXTRA_LANG_OTP_REGEX.put(ULocale.ENGLISH.toLanguageTag(), ThreadLocal.withInitial(() ->
createDictionaryRegex(ENGLISH_CONTEXT_WORDS)));
diff --git a/java/tests/src/android/ext/services/notification/NotificationOtpDetectionHelperTest.kt b/java/tests/src/android/ext/services/notification/NotificationOtpDetectionHelperTest.kt
index 0dec895..02d54df 100644
--- a/java/tests/src/android/ext/services/notification/NotificationOtpDetectionHelperTest.kt
+++ b/java/tests/src/android/ext/services/notification/NotificationOtpDetectionHelperTest.kt
@@ -28,6 +28,7 @@
import android.icu.util.ULocale
import android.os.Build
import android.os.Build.VERSION.SDK_INT
+import android.view.textclassifier.TextClassificationManager
import android.view.textclassifier.TextClassifier
import android.view.textclassifier.TextLanguage
import android.view.textclassifier.TextLinks
@@ -447,6 +448,12 @@
val englishContextWordsCase = listOf("LOGIN", "logIn", "LoGiN")
// Strings with a context word somewhere in the substring
val englishContextSubstrings = listOf("pins", "gaping", "backspin")
+ val codeInNextSentence = "context word: code. This sentence has the actual value of 434343"
+ val codeInNextSentenceTooFar =
+ "context word: code. ${"f".repeat(60)} This sentence has the actual value of 434343"
+ val codeTwoSentencesAfterContext = "context word: code. One sentence. actual value 34343"
+ val codeInSentenceBeforeContext = "34343 is a number. This number is a code"
+ val codeTooFarBeforeContext = "34343 ${"f".repeat(60)} code"
addMatcherTestResult(expected = false, englishFalsePositive, textClassifier = tc)
for (context in englishContextWords) {
@@ -461,6 +468,11 @@
val anotherFalsePositive = "$falseContext $englishFalsePositive"
addMatcherTestResult(expected = false, anotherFalsePositive, textClassifier = tc)
}
+ addMatcherTestResult(expected = true, codeInNextSentence, textClassifier = tc)
+ addMatcherTestResult(expected = false, codeTwoSentencesAfterContext, textClassifier = tc)
+ addMatcherTestResult(expected = false, codeInSentenceBeforeContext, textClassifier = tc)
+ addMatcherTestResult(expected = false, codeInNextSentenceTooFar, textClassifier = tc)
+ addMatcherTestResult(expected = false, codeTooFarBeforeContext, textClassifier = tc)
}
@Test