java/src/com/android/textclassifier/OtpDetector.java - platform/external/libtextclassifier - Git at Google

 /*
  * Copyright (C) 2025 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package com.android.textclassifier;

 import static java.lang.String.format;

 import android.icu.util.ULocale;
 import android.util.ArrayMap;
 import android.view.textclassifier.TextClassifier;
 import android.view.textclassifier.TextLanguage;

 import androidx.annotation.NonNull;
 import androidx.annotation.Nullable;

 import java.util.HashSet;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 /**
  * Class with helper methods to detecting One-Time Password (OTP) codes in a text.
  *
  * <p>This class is designed to be lightweight with minimal dependencies, allowing it
  * to be easily exported and built as a standalone library.
  */
 public class OtpDetector {
   private static final int PATTERN_FLAGS =
       Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.MULTILINE;

   private static ThreadLocal<Matcher> compileToRegex(String pattern) {
     return ThreadLocal.withInitial(() -> Pattern.compile(pattern, PATTERN_FLAGS).matcher(""));
   }

   private static final float TC_THRESHOLD = 0.6f;

   private static final ArrayMap<String, ThreadLocal<Matcher>> EXTRA_LANG_OTP_REGEX =
       new ArrayMap<>();

   private static final ThreadLocal<Matcher> OTP_REGEX = compileToRegex(RegExStrings.ALL_OTP);

   /**
    * A combination of common false positives. These matches are expected to be longer than (or equal
    * in length to) otp matches
    */
   private static final ThreadLocal<Matcher> FALSE_POSITIVE_REGEX =
       compileToRegex(RegExStrings.FALSE_POSITIVE);

   /**
    * Creates a regular expression to match any of a series of individual words, case insensitive. It
    * also verifies the position of the word, relative to the OTP match
    */
   private static ThreadLocal<Matcher> createDictionaryRegex(String[] words) {
     StringBuilder regex = new StringBuilder("(");
     for (int i = 0; i < words.length; i++) {
       String boundedWord = "\\b" + words[i] + "\\b";
       regex.append(boundedWord);
       if (i != words.length - 1) {
         regex.append("|");
       }
     }
     regex.append(")");
     return compileToRegex(regex.toString());
   }

   static {
     EXTRA_LANG_OTP_REGEX.put(
         ULocale.ENGLISH.toLanguageTag(), createDictionaryRegex(RegExStrings.englishContextWords));
   }

   /**
    * Checks if a string of text might contain an OTP, based on several regular expressions, and
    * potentially using a textClassifier to eliminate false positives.
    *
    * <p><b>Note:</b> This method is meant to be called in Android V only. Android B+ should make
    * TextClassifier request to determine if the text contains OTP.</p>
    *
    * <p><b>Important:</b> Signature of this method to be kept intact since it is intended for
    * use by external modules via an exported library.
    *
    * @param text The input text to scan for OTP keywords. Must not be null.
    * @param tc TextClassifier instance to be used to find the language of the text.
    * @return {@code true} if an OTP is determined to be in the text, {@code false} otherwise.
    */
   public static boolean containsOtp(
           @NonNull String text,
           @NonNull TextClassifier tc) {
     if (!containsOtpLikePattern(text)) {
       return false;
     }

     TextLanguage language = getTextLanguage(text, tc);
     return containsOtpWithLanguage(text, language);
   }

   /**
    * Checks if the input text likely contains a language-specific keyword commonly associated with
    * OTP, based on the provided language hint.
    *
    * <p>This method first attempts to determine a high-confidence {@link ULocale} corresponding to
    * the given {@link TextLanguage}. If a reliable locale cannot be determined, it assumes no
    * relevant OTP keyword is present for that language. Otherwise, it delegates to
    * {@link #hasLanguageSpecificOtpWord} to perform the actual check using the language tag derived
    * from the determined locale.
    *
    * @param text The input text to scan for OTP keywords. Must not be null.
    * @param language The language hint for the input text, used to determine the appropriate locale
    * for keyword matching. Must not be null.
    * @return {@code true} if the text is determined to contain a language-specific OTP keyword
    * matching the language hint, {@code false} otherwise (including cases where the language
    * could not be confidently identified or no specific OTP keyword is found).
    */
   protected static boolean containsOtpWithLanguage(@NonNull String text, @NonNull TextLanguage language) {
     ULocale uLocale = getLanguageWithRegex(language);
     if (uLocale == null) {
       return false;
     }
     return hasLanguageSpecificOtpWord(text, uLocale.toLanguageTag());
   }

   /**
    * Checks if the given text contains a pattern resembling an OTP.
    *
    * <p>This method attempts to identify such patterns by matching against a regular expression.
    * Avoids false positives by checking for common patterns that might be mistaken for OTPs, such
    * as phone numbers or dates.</p>
    *
    * @param text The text to be checked.
    * @return {@code true} if the text contains an OTP-like pattern, {@code false} otherwise.
    */
   protected static boolean containsOtpLikePattern(String text) {
     Set<String> otpMatches = getAllMatches(text, OTP_REGEX.get());
     if (otpMatches.isEmpty()) {
       return false;
     }
     Set<String> falsePositives = getAllMatches(text, FALSE_POSITIVE_REGEX.get());

     // This optional, but having this would help with performance
     // Example: "Your OTP code is 1234 and this is sent on 01-01-2001"
     // At this point -> otpMatches: [1234, 01-01-2001] falsePositives=[01-01-2001]
     // It filters "01-01-2001" in advance and continues to next checks with otpMatches: [1234]
     otpMatches.removeAll(falsePositives);

     // Following is to handle text like: "Your OTP can't be shared at this point, please call
     // (888) 888-8888"
     // otpMatches: [888-8888] falsePositives=[(888) 888-8888] final=[]
     for (String otpMatch : otpMatches) {
       boolean currentOtpIsFalsePositive = false;
       for (String falsePositive : falsePositives) {
         if (falsePositive.contains(otpMatch)) {
           currentOtpIsFalsePositive = true;
           break;
         }
       }
       if (!currentOtpIsFalsePositive) {
         return true;
       }
     }
     return false;
   }

   /**
    * Checks if the given text contains a language-specific word or phrase associated with OTPs.
    * This method uses regular expressions defined for specific languages to identify these words.
    *
    * @param text The text to check.
    * @param languageTag The language tag (e.g., "en", "es", "fr") for which to check.
    * @return {@code true} if the text contains a language-specific OTP word, {@code false} otherwise.
    *         Returns {@code false} if no language-specific regex is defined for the given tag.
    */
   private static boolean hasLanguageSpecificOtpWord(@NonNull String text, @NonNull String languageTag) {
     if (!EXTRA_LANG_OTP_REGEX.containsKey(languageTag)){
       return false;
     }
     Matcher languageSpecificMatcher = EXTRA_LANG_OTP_REGEX.get(languageTag).get();
     if (languageSpecificMatcher == null) {
       return false;
     }
     languageSpecificMatcher.reset(text);
     return languageSpecificMatcher.find();
   }

   private static Set<String> getAllMatches(String text, Matcher regex) {
     Set<String> matches = new HashSet<>();
     regex.reset(text);
     while (regex.find()) {
       matches.add(regex.group());
     }
     return matches;
   }

   // Tries to determine the language of the given text.
   private static TextLanguage getTextLanguage(@NonNull String text, @NonNull TextClassifier tc) {
     TextLanguage.Request langRequest = new TextLanguage.Request.Builder(text).build();
     return tc.detectLanguage(langRequest);
   }

   // Will return the language with the highest confidence score that meets the minimum threshold,
   // and has a language-specific regex, null otherwise
   @Nullable
   private static ULocale getLanguageWithRegex(@NonNull TextLanguage lang) {
     float highestConfidence = 0;
     ULocale highestConfidenceLocale = null;
     for (int i = 0; i < lang.getLocaleHypothesisCount(); i++) {
       ULocale locale = lang.getLocale(i);
       float confidence = lang.getConfidenceScore(locale);
       if (confidence >= TC_THRESHOLD
               && confidence >= highestConfidence
               && EXTRA_LANG_OTP_REGEX.containsKey(locale.toLanguageTag())) {
         highestConfidence = confidence;
         highestConfidenceLocale = locale;
       }
     }
     return highestConfidenceLocale;
   }

   private OtpDetector() {}

   private static class RegExStrings {
     /*
      * A regex matching a line start, open paren, arrow, colon (not proceeded by a digit), open square
      * bracket, equals sign, double or single quote, ideographic char, or a space that is not preceded
      * by a number. It will not consume the start char (meaning START won't be included in the matched
      * string)
      */
     private static final String START =
             "(^|(?<=((^|[^0-9])\\s)|[>(\"'=\\[\\p{IsIdeographic}]|[^0-9]:))";

     /*
      * A regex matching a line end, a space that is not followed by a number, an ideographic char, or
      * a period, close paren, close square bracket, single or double quote, exclamation point,
      * question mark, or comma. It will not consume the end char
      */
     private static final String END = "(?=\\s[^0-9]|$|\\p{IsIdeographic}|[.?!,)'\\]\"])";

     private static final String ALL_OTP;

     static {
       /* One single OTP char. A number or alphabetical char (that isn't also ideographic) */
       final String OTP_CHAR = "([0-9\\p{IsAlphabetic}&&[^\\p{IsIdeographic}]])";

       /* One OTP char, followed by an optional dash */
       final String OTP_CHAR_WITH_DASH = format("(%s-?)", OTP_CHAR);

       /*
        * Performs a lookahead to find a digit after 0 to 7 OTP_CHARs. This ensures that our potential
        * OTP code contains at least one number
        */
       final String FIND_DIGIT = format("(?=%s{0,7}\\d)", OTP_CHAR_WITH_DASH);

       /*
        * Matches between 5 and 8 otp chars, with dashes in between. Here, we are assuming an OTP code is
        * 5-8 characters long. The last char must not be followed by a dash
        */
       final String OTP_CHARS = format("(%s{4,7}%s)", OTP_CHAR_WITH_DASH, OTP_CHAR);

       /* A regex matching four digit numerical codes */
       final String FOUR_DIGITS = "(\\d{4})";

       final String FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM =
               format("(%s%s)", FIND_DIGIT, OTP_CHARS);

       /* A regex matching two pairs of 3 digits (ex "123 456") */
       final String SIX_DIGITS_WITH_SPACE = "(\\d{3}\\s\\d{3})";

       /*
        * Combining the regular expressions above, we get an OTP regex: 1. search for START, THEN 2.
        * match ONE of a. alphanumeric sequence, at least one number, length 5-8, with optional dashes b.
        * 4 numbers in a row c. pair of 3 digit codes separated by a space THEN 3. search for END Ex:
        * "6454", " 345 678.", "[YDT-456]"
        */
       ALL_OTP =
               format(
                       "%s(%s|%s|%s)%s",
                       START, FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM, FOUR_DIGITS,
                       SIX_DIGITS_WITH_SPACE, END);
     }

     private static final String FALSE_POSITIVE;

     static {
       /*
        * A Date regular expression. Looks for dates with the month, day, and year separated by dashes.
        * Handles one and two digit months and days, and four or two-digit years. It makes the following
        * assumptions: Dates and months will never be higher than 39 If a four digit year is used, the
        * leading digit will be 1 or 2
        */
       final String DATE_WITH_DASHES = "([0-3]?\\d-[0-3]?\\d-([12]\\d)?\\d\\d)";

       /*
        * matches a ten digit phone number, when the area code is separated by a space or dash. Supports
        * optional parentheses around the area code, and an optional dash or space in between the rest of
        * the numbers. This format registers as an otp match due to the space between the area code and
        * the rest, but shouldn't.
        */
       final String PHONE_WITH_SPACE = "(\\(?\\d{3}\\)?(-|\\s)?\\d{3}(-|\\s)?\\d{4})";

       /*
        * A combination of common false positives. These matches are expected to be longer than (or equal
        * in length to) otp matches.
        */
       FALSE_POSITIVE = format("%s(%s|%s)%s", START, DATE_WITH_DASHES, PHONE_WITH_SPACE, END);
     }

     /**
      * A list of regular expressions representing words found in an OTP context (non case sensitive)
      * Note: TAN is short for Transaction Authentication Number
      */
     private static final String[] englishContextWords =
             new String[] {
                     "pin",
                     "pass[-\\s]?(code|word)",
                     "TAN",
                     "otp",
                     "2fa",
                     "(two|2)[-\\s]?factor",
                     "log[-\\s]?in",
                     "auth(enticat(e|ion))?",
                     "code",
                     "secret",
                     "verif(y|ication)",
                     "one(\\s|-)?time",
                     "access",
                     "validat(e|ion)"
             };
   }
 }
	/*
	* Copyright (C) 2025 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package com.android.textclassifier;

	import static java.lang.String.format;

	import android.icu.util.ULocale;
	import android.util.ArrayMap;
	import android.view.textclassifier.TextClassifier;
	import android.view.textclassifier.TextLanguage;

	import androidx.annotation.NonNull;
	import androidx.annotation.Nullable;

	import java.util.HashSet;
	import java.util.Set;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	/**
	* Class with helper methods to detecting One-Time Password (OTP) codes in a text.
	*
	* <p>This class is designed to be lightweight with minimal dependencies, allowing it
	* to be easily exported and built as a standalone library.
	*/
	public class OtpDetector {
	private static final int PATTERN_FLAGS =
	Pattern.DOTALL \| Pattern.CASE_INSENSITIVE \| Pattern.MULTILINE;

	private static ThreadLocal<Matcher> compileToRegex(String pattern) {
	return ThreadLocal.withInitial(() -> Pattern.compile(pattern, PATTERN_FLAGS).matcher(""));
	}

	private static final float TC_THRESHOLD = 0.6f;

	private static final ArrayMap<String, ThreadLocal<Matcher>> EXTRA_LANG_OTP_REGEX =
	new ArrayMap<>();

	private static final ThreadLocal<Matcher> OTP_REGEX = compileToRegex(RegExStrings.ALL_OTP);

	/**
	* A combination of common false positives. These matches are expected to be longer than (or equal
	* in length to) otp matches
	*/
	private static final ThreadLocal<Matcher> FALSE_POSITIVE_REGEX =
	compileToRegex(RegExStrings.FALSE_POSITIVE);

	/**
	* Creates a regular expression to match any of a series of individual words, case insensitive. It
	* also verifies the position of the word, relative to the OTP match
	*/
	private static ThreadLocal<Matcher> createDictionaryRegex(String[] words) {
	StringBuilder regex = new StringBuilder("(");
	for (int i = 0; i < words.length; i++) {
	String boundedWord = "\\b" + words[i] + "\\b";
	regex.append(boundedWord);
	if (i != words.length - 1) {
	regex.append("\|");
	}
	}
	regex.append(")");
	return compileToRegex(regex.toString());
	}

	static {
	EXTRA_LANG_OTP_REGEX.put(
	ULocale.ENGLISH.toLanguageTag(), createDictionaryRegex(RegExStrings.englishContextWords));
	}

	/**
	* Checks if a string of text might contain an OTP, based on several regular expressions, and
	* potentially using a textClassifier to eliminate false positives.
	*
	* <p><b>Note:</b> This method is meant to be called in Android V only. Android B+ should make
	* TextClassifier request to determine if the text contains OTP.</p>
	*
	* <p><b>Important:</b> Signature of this method to be kept intact since it is intended for
	* use by external modules via an exported library.
	*
	* @param text The input text to scan for OTP keywords. Must not be null.
	* @param tc TextClassifier instance to be used to find the language of the text.
	* @return {@code true} if an OTP is determined to be in the text, {@code false} otherwise.
	*/
	public static boolean containsOtp(
	@NonNull String text,
	@NonNull TextClassifier tc) {
	if (!containsOtpLikePattern(text)) {
	return false;
	}

	TextLanguage language = getTextLanguage(text, tc);
	return containsOtpWithLanguage(text, language);
	}

	/**
	* Checks if the input text likely contains a language-specific keyword commonly associated with
	* OTP, based on the provided language hint.
	*
	* <p>This method first attempts to determine a high-confidence {@link ULocale} corresponding to
	* the given {@link TextLanguage}. If a reliable locale cannot be determined, it assumes no
	* relevant OTP keyword is present for that language. Otherwise, it delegates to
	* {@link #hasLanguageSpecificOtpWord} to perform the actual check using the language tag derived
	* from the determined locale.
	*
	* @param text The input text to scan for OTP keywords. Must not be null.
	* @param language The language hint for the input text, used to determine the appropriate locale
	* for keyword matching. Must not be null.
	* @return {@code true} if the text is determined to contain a language-specific OTP keyword
	* matching the language hint, {@code false} otherwise (including cases where the language
	* could not be confidently identified or no specific OTP keyword is found).
	*/
	protected static boolean containsOtpWithLanguage(@NonNull String text, @NonNull TextLanguage language) {
	ULocale uLocale = getLanguageWithRegex(language);
	if (uLocale == null) {
	return false;
	}
	return hasLanguageSpecificOtpWord(text, uLocale.toLanguageTag());
	}

	/**
	* Checks if the given text contains a pattern resembling an OTP.
	*
	* <p>This method attempts to identify such patterns by matching against a regular expression.
	* Avoids false positives by checking for common patterns that might be mistaken for OTPs, such
	* as phone numbers or dates.</p>
	*
	* @param text The text to be checked.
	* @return {@code true} if the text contains an OTP-like pattern, {@code false} otherwise.
	*/
	protected static boolean containsOtpLikePattern(String text) {
	Set<String> otpMatches = getAllMatches(text, OTP_REGEX.get());
	if (otpMatches.isEmpty()) {
	return false;
	}
	Set<String> falsePositives = getAllMatches(text, FALSE_POSITIVE_REGEX.get());

	// This optional, but having this would help with performance
	// Example: "Your OTP code is 1234 and this is sent on 01-01-2001"
	// At this point -> otpMatches: [1234, 01-01-2001] falsePositives=[01-01-2001]
	// It filters "01-01-2001" in advance and continues to next checks with otpMatches: [1234]
	otpMatches.removeAll(falsePositives);

	// Following is to handle text like: "Your OTP can't be shared at this point, please call
	// (888) 888-8888"
	// otpMatches: [888-8888] falsePositives=[(888) 888-8888] final=[]
	for (String otpMatch : otpMatches) {
	boolean currentOtpIsFalsePositive = false;
	for (String falsePositive : falsePositives) {
	if (falsePositive.contains(otpMatch)) {
	currentOtpIsFalsePositive = true;
	break;
	}
	}
	if (!currentOtpIsFalsePositive) {
	return true;
	}
	}
	return false;
	}

	/**
	* Checks if the given text contains a language-specific word or phrase associated with OTPs.
	* This method uses regular expressions defined for specific languages to identify these words.
	*
	* @param text The text to check.
	* @param languageTag The language tag (e.g., "en", "es", "fr") for which to check.
	* @return {@code true} if the text contains a language-specific OTP word, {@code false} otherwise.
	* Returns {@code false} if no language-specific regex is defined for the given tag.
	*/
	private static boolean hasLanguageSpecificOtpWord(@NonNull String text, @NonNull String languageTag) {
	if (!EXTRA_LANG_OTP_REGEX.containsKey(languageTag)){
	return false;
	}
	Matcher languageSpecificMatcher = EXTRA_LANG_OTP_REGEX.get(languageTag).get();
	if (languageSpecificMatcher == null) {
	return false;
	}
	languageSpecificMatcher.reset(text);
	return languageSpecificMatcher.find();
	}

	private static Set<String> getAllMatches(String text, Matcher regex) {
	Set<String> matches = new HashSet<>();
	regex.reset(text);
	while (regex.find()) {
	matches.add(regex.group());
	}
	return matches;
	}

	// Tries to determine the language of the given text.
	private static TextLanguage getTextLanguage(@NonNull String text, @NonNull TextClassifier tc) {
	TextLanguage.Request langRequest = new TextLanguage.Request.Builder(text).build();
	return tc.detectLanguage(langRequest);
	}

	// Will return the language with the highest confidence score that meets the minimum threshold,
	// and has a language-specific regex, null otherwise
	@Nullable
	private static ULocale getLanguageWithRegex(@NonNull TextLanguage lang) {
	float highestConfidence = 0;
	ULocale highestConfidenceLocale = null;
	for (int i = 0; i < lang.getLocaleHypothesisCount(); i++) {
	ULocale locale = lang.getLocale(i);
	float confidence = lang.getConfidenceScore(locale);
	if (confidence >= TC_THRESHOLD
	&& confidence >= highestConfidence
	&& EXTRA_LANG_OTP_REGEX.containsKey(locale.toLanguageTag())) {
	highestConfidence = confidence;
	highestConfidenceLocale = locale;
	}
	}
	return highestConfidenceLocale;
	}

	private OtpDetector() {}

	private static class RegExStrings {
	/*
	* A regex matching a line start, open paren, arrow, colon (not proceeded by a digit), open square
	* bracket, equals sign, double or single quote, ideographic char, or a space that is not preceded
	* by a number. It will not consume the start char (meaning START won't be included in the matched
	* string)
	*/
	private static final String START =
	"(^\|(?<=((^\|[^0-9])\\s)\|[>(\"'=\\[\\p{IsIdeographic}]\|[^0-9]:))";

	/*
	* A regex matching a line end, a space that is not followed by a number, an ideographic char, or
	* a period, close paren, close square bracket, single or double quote, exclamation point,
	* question mark, or comma. It will not consume the end char
	*/
	private static final String END = "(?=\\s[^0-9]\|$\|\\p{IsIdeographic}\|[.?!,)'\\]\"])";

	private static final String ALL_OTP;

	static {
	/* One single OTP char. A number or alphabetical char (that isn't also ideographic) */
	final String OTP_CHAR = "([0-9\\p{IsAlphabetic}&&[^\\p{IsIdeographic}]])";

	/* One OTP char, followed by an optional dash */
	final String OTP_CHAR_WITH_DASH = format("(%s-?)", OTP_CHAR);

	/*
	* Performs a lookahead to find a digit after 0 to 7 OTP_CHARs. This ensures that our potential
	* OTP code contains at least one number
	*/
	final String FIND_DIGIT = format("(?=%s{0,7}\\d)", OTP_CHAR_WITH_DASH);

	/*
	* Matches between 5 and 8 otp chars, with dashes in between. Here, we are assuming an OTP code is
	* 5-8 characters long. The last char must not be followed by a dash
	*/
	final String OTP_CHARS = format("(%s{4,7}%s)", OTP_CHAR_WITH_DASH, OTP_CHAR);

	/* A regex matching four digit numerical codes */
	final String FOUR_DIGITS = "(\\d{4})";

	final String FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM =
	format("(%s%s)", FIND_DIGIT, OTP_CHARS);

	/* A regex matching two pairs of 3 digits (ex "123 456") */
	final String SIX_DIGITS_WITH_SPACE = "(\\d{3}\\s\\d{3})";

	/*
	* Combining the regular expressions above, we get an OTP regex: 1. search for START, THEN 2.
	* match ONE of a. alphanumeric sequence, at least one number, length 5-8, with optional dashes b.
	* 4 numbers in a row c. pair of 3 digit codes separated by a space THEN 3. search for END Ex:
	* "6454", " 345 678.", "[YDT-456]"
	*/
	ALL_OTP =
	format(
	"%s(%s\|%s\|%s)%s",
	START, FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM, FOUR_DIGITS,
	SIX_DIGITS_WITH_SPACE, END);
	}

	private static final String FALSE_POSITIVE;

	static {
	/*
	* A Date regular expression. Looks for dates with the month, day, and year separated by dashes.
	* Handles one and two digit months and days, and four or two-digit years. It makes the following
	* assumptions: Dates and months will never be higher than 39 If a four digit year is used, the
	* leading digit will be 1 or 2
	*/
	final String DATE_WITH_DASHES = "([0-3]?\\d-[0-3]?\\d-([12]\\d)?\\d\\d)";

	/*
	* matches a ten digit phone number, when the area code is separated by a space or dash. Supports
	* optional parentheses around the area code, and an optional dash or space in between the rest of
	* the numbers. This format registers as an otp match due to the space between the area code and
	* the rest, but shouldn't.
	*/
	final String PHONE_WITH_SPACE = "(\\(?\\d{3}\\)?(-\|\\s)?\\d{3}(-\|\\s)?\\d{4})";

	/*
	* A combination of common false positives. These matches are expected to be longer than (or equal
	* in length to) otp matches.
	*/
	FALSE_POSITIVE = format("%s(%s\|%s)%s", START, DATE_WITH_DASHES, PHONE_WITH_SPACE, END);
	}

	/**
	* A list of regular expressions representing words found in an OTP context (non case sensitive)
	* Note: TAN is short for Transaction Authentication Number
	*/
	private static final String[] englishContextWords =
	new String[] {
	"pin",
	"pass[-\\s]?(code\|word)",
	"TAN",
	"otp",
	"2fa",
	"(two\|2)[-\\s]?factor",
	"log[-\\s]?in",
	"auth(enticat(e\|ion))?",
	"code",
	"secret",
	"verif(y\|ication)",
	"one(\\s\|-)?time",
	"access",
	"validat(e\|ion)"
	};
	}
	}