| /* GENERATED SOURCE. DO NOT MODIFY. */ |
| // © 2017 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html#License |
| package android.icu.impl.number.parse; |
| |
| import java.text.ParsePosition; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Comparator; |
| import java.util.List; |
| |
| import android.icu.impl.StringSegment; |
| import android.icu.impl.number.AffixPatternProvider; |
| import android.icu.impl.number.AffixUtils; |
| import android.icu.impl.number.CustomSymbolCurrency; |
| import android.icu.impl.number.DecimalFormatProperties; |
| import android.icu.impl.number.Grouper; |
| import android.icu.impl.number.PatternStringParser; |
| import android.icu.impl.number.PatternStringParser.ParsedPatternInfo; |
| import android.icu.impl.number.PropertiesAffixPatternProvider; |
| import android.icu.impl.number.RoundingUtils; |
| import android.icu.number.NumberFormatter.GroupingStrategy; |
| import android.icu.text.DecimalFormatSymbols; |
| import android.icu.text.UnicodeSet; |
| import android.icu.util.Currency; |
| import android.icu.util.CurrencyAmount; |
| import android.icu.util.ULocale; |
| |
| /** |
| * Primary number parsing implementation class. |
| * |
| * @author sffc |
| * @hide Only a subset of ICU is exposed in Android |
| * |
| */ |
| public class NumberParserImpl { |
| |
| @Deprecated |
| public static NumberParserImpl removeMeWhenMerged(ULocale locale, String pattern, int parseFlags) { |
| NumberParserImpl parser = new NumberParserImpl(parseFlags); |
| DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale); |
| IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT; |
| |
| MatcherFactory factory = new MatcherFactory(); |
| factory.currency = Currency.getInstance("USD"); |
| factory.symbols = symbols; |
| factory.ignorables = ignorables; |
| factory.locale = locale; |
| |
| ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern); |
| AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags); |
| |
| Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo); |
| parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); |
| parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); |
| parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); |
| |
| parser.freeze(); |
| return parser; |
| } |
| |
| // TODO: Find a better place for this enum. |
| /** Controls the set of rules for parsing a string. */ |
| public static enum ParseMode { |
| /** |
| * Lenient mode should be used if you want to accept malformed user input. It will use heuristics |
| * to attempt to parse through typographical errors in the string. |
| */ |
| LENIENT, |
| |
| /** |
| * Strict mode should be used if you want to require that the input is well-formed. More |
| * specifically, it differs from lenient mode in the following ways: |
| * |
| * <ul> |
| * <li>Grouping widths must match the grouping settings. For example, "12,3,45" will fail if the |
| * grouping width is 3, as in the pattern "#,##0". |
| * <li>The string must contain a complete prefix and suffix. For example, if the pattern is |
| * "{#};(#)", then "{123}" or "(123)" would match, but "{123", "123}", and "123" would all fail. |
| * (The latter strings would be accepted in lenient mode.) |
| * <li>Whitespace may not appear at arbitrary places in the string. In lenient mode, whitespace |
| * is allowed to occur arbitrarily before and after prefixes and exponent separators. |
| * <li>Leading grouping separators are not allowed, as in ",123". |
| * <li>Minus and plus signs can only appear if specified in the pattern. In lenient mode, a plus |
| * or minus sign can always precede a number. |
| * <li>The set of characters that can be interpreted as a decimal or grouping separator is |
| * smaller. |
| * <li><strong>If currency parsing is enabled,</strong> currencies must only appear where |
| * specified in either the current pattern string or in a valid pattern string for the current |
| * locale. For example, if the pattern is "¤0.00", then "$1.23" would match, but "1.23$" would |
| * fail to match. |
| * </ul> |
| */ |
| STRICT, |
| } |
| |
| @Deprecated |
| public static NumberParserImpl createParserFromPattern( |
| ULocale locale, |
| String pattern, |
| boolean strictGrouping) { |
| // Temporary frontend for testing. |
| |
| int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE |
| | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES |
| | ParsingUtils.PARSE_FLAG_OPTIMIZE; |
| if (strictGrouping) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE; |
| } |
| |
| NumberParserImpl parser = new NumberParserImpl(parseFlags); |
| DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale); |
| IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT; |
| |
| MatcherFactory factory = new MatcherFactory(); |
| factory.currency = Currency.getInstance("USD"); |
| factory.symbols = symbols; |
| factory.ignorables = ignorables; |
| factory.locale = locale; |
| |
| ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern); |
| AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags); |
| |
| Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo); |
| |
| parser.addMatcher(ignorables); |
| parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); |
| parser.addMatcher(MinusSignMatcher.getInstance(symbols, false)); |
| parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); |
| parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper)); |
| parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); |
| parser.addMatcher(new RequireNumberMatcher()); |
| |
| parser.freeze(); |
| return parser; |
| } |
| |
| public static Number parseStatic( |
| String input, |
| ParsePosition ppos, |
| DecimalFormatProperties properties, |
| DecimalFormatSymbols symbols) { |
| NumberParserImpl parser = createParserFromProperties(properties, symbols, false, false); |
| ParsedNumber result = new ParsedNumber(); |
| parser.parse(input, true, result); |
| if (result.success()) { |
| ppos.setIndex(result.charEnd); |
| return result.getNumber(); |
| } else { |
| ppos.setErrorIndex(result.charEnd); |
| return null; |
| } |
| } |
| |
| public static CurrencyAmount parseStaticCurrency( |
| String input, |
| ParsePosition ppos, |
| DecimalFormatProperties properties, |
| DecimalFormatSymbols symbols) { |
| NumberParserImpl parser = createParserFromProperties(properties, symbols, true, false); |
| ParsedNumber result = new ParsedNumber(); |
| parser.parse(input, true, result); |
| if (result.success()) { |
| ppos.setIndex(result.charEnd); |
| // TODO: Clean this up |
| Currency currency; |
| if (result.currencyCode != null) { |
| currency = Currency.getInstance(result.currencyCode); |
| } else { |
| assert 0 != (result.flags & ParsedNumber.FLAG_HAS_DEFAULT_CURRENCY); |
| currency = CustomSymbolCurrency |
| .resolve(properties.getCurrency(), symbols.getULocale(), symbols); |
| } |
| return new CurrencyAmount(result.getNumber(), currency); |
| } else { |
| ppos.setErrorIndex(result.charEnd); |
| return null; |
| } |
| } |
| |
| public static NumberParserImpl createDefaultParserForLocale(ULocale loc, boolean optimize) { |
| DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(loc); |
| DecimalFormatProperties properties = PatternStringParser.parseToProperties("0"); |
| return createParserFromProperties(properties, symbols, false, optimize); |
| } |
| |
| public static NumberParserImpl createParserFromProperties( |
| DecimalFormatProperties properties, |
| DecimalFormatSymbols symbols, |
| boolean parseCurrency, |
| boolean optimize) { |
| |
| ULocale locale = symbols.getULocale(); |
| AffixPatternProvider patternInfo = new PropertiesAffixPatternProvider(properties); |
| Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols); |
| boolean isStrict = properties.getParseMode() == ParseMode.STRICT; |
| Grouper grouper = Grouper.forProperties(properties); |
| int parseFlags = 0; |
| // Fraction grouping is disabled by default because it has never been supported in DecimalFormat |
| parseFlags |= ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_DISABLED; |
| if (!properties.getParseCaseSensitive()) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_IGNORE_CASE; |
| } |
| if (properties.getParseIntegerOnly()) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_INTEGER_ONLY; |
| } |
| if (properties.getSignAlwaysShown()) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_PLUS_SIGN_ALLOWED; |
| } |
| if (isStrict) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE; |
| parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS; |
| parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES; |
| parseFlags |= ParsingUtils.PARSE_FLAG_EXACT_AFFIX; |
| } else { |
| parseFlags |= ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; |
| } |
| if (grouper.getPrimary() <= 0) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_GROUPING_DISABLED; |
| } |
| if (parseCurrency || patternInfo.hasCurrencySign()) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS; |
| } |
| if (optimize) { |
| parseFlags |= ParsingUtils.PARSE_FLAG_OPTIMIZE; |
| } |
| IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT; |
| |
| NumberParserImpl parser = new NumberParserImpl(parseFlags); |
| |
| MatcherFactory factory = new MatcherFactory(); |
| factory.currency = currency; |
| factory.symbols = symbols; |
| factory.ignorables = ignorables; |
| factory.locale = locale; |
| |
| ////////////////////// |
| /// AFFIX MATCHERS /// |
| ////////////////////// |
| |
| // Set up a pattern modifier with mostly defaults to generate AffixMatchers. |
| AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags); |
| |
| //////////////////////// |
| /// CURRENCY MATCHER /// |
| //////////////////////// |
| |
| if (parseCurrency || patternInfo.hasCurrencySign()) { |
| parser.addMatcher(CurrencyMatcher.getInstance(currency, locale)); |
| parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); |
| } |
| |
| /////////////// |
| /// PERCENT /// |
| /////////////// |
| |
| // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern, |
| // and to maintain regressive behavior, divide by 100 even if no percent sign is present. |
| if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERCENT)) { |
| parser.addMatcher(PercentMatcher.getInstance(symbols)); |
| // causes number to be always scaled by 100: |
| parser.addMatcher(FlagHandler.PERCENT); |
| } |
| if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERMILLE)) { |
| parser.addMatcher(PermilleMatcher.getInstance(symbols)); |
| // causes number to be always scaled by 1000: |
| parser.addMatcher(FlagHandler.PERMILLE); |
| } |
| |
| /////////////////////////////// |
| /// OTHER STANDARD MATCHERS /// |
| /////////////////////////////// |
| |
| if (!isStrict) { |
| parser.addMatcher(PlusSignMatcher.getInstance(symbols, false)); |
| parser.addMatcher(MinusSignMatcher.getInstance(symbols, false)); |
| } |
| parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); |
| parser.addMatcher(InfinityMatcher.getInstance(symbols)); |
| String padString = properties.getPadString(); |
| if (padString != null && !ignorables.getSet().contains(padString)) { |
| parser.addMatcher(PaddingMatcher.getInstance(padString)); |
| } |
| parser.addMatcher(ignorables); |
| parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); |
| if (!properties.getParseNoExponent()) { |
| parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper)); |
| } |
| |
| ////////////////// |
| /// VALIDATORS /// |
| ////////////////// |
| |
| parser.addMatcher(new RequireNumberMatcher()); |
| if (isStrict) { |
| parser.addMatcher(new RequireAffixMatcher()); |
| } |
| if (isStrict && properties.getMinimumExponentDigits() > 0) { |
| parser.addMatcher(new RequireExponentMatcher()); |
| } |
| if (parseCurrency) { |
| parser.addMatcher(new RequireCurrencyMatcher()); |
| } |
| if (properties.getDecimalPatternMatchRequired()) { |
| boolean patternHasDecimalSeparator = properties.getDecimalSeparatorAlwaysShown() |
| || properties.getMaximumFractionDigits() != 0; |
| parser.addMatcher(RequireDecimalSeparatorMatcher.getInstance(patternHasDecimalSeparator)); |
| } |
| if (properties.getMultiplier() != null) { |
| // We need to use a math context in order to prevent non-terminating decimal expansions. |
| // This is only used when dividing by the multiplier. |
| parser.addMatcher(new MultiplierHandler(properties.getMultiplier(), |
| RoundingUtils.getMathContextOr34Digits(properties))); |
| } |
| |
| parser.freeze(); |
| return parser; |
| } |
| |
| private final int parseFlags; |
| private final List<NumberParseMatcher> matchers; |
| private final List<UnicodeSet> leadCodePointses; |
| private Comparator<ParsedNumber> comparator; |
| private boolean frozen; |
| |
| /** |
| * Creates a new, empty parser. |
| * |
| * @param parseFlags |
| * The parser settings defined in the PARSE_FLAG_* fields. |
| */ |
| public NumberParserImpl(int parseFlags) { |
| matchers = new ArrayList<NumberParseMatcher>(); |
| if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) { |
| leadCodePointses = new ArrayList<UnicodeSet>(); |
| } else { |
| leadCodePointses = null; |
| } |
| comparator = ParsedNumber.COMPARATOR; // default value |
| this.parseFlags = parseFlags; |
| frozen = false; |
| } |
| |
| public void addMatcher(NumberParseMatcher matcher) { |
| assert !frozen; |
| this.matchers.add(matcher); |
| if (leadCodePointses != null) { |
| addLeadCodePointsForMatcher(matcher); |
| } |
| } |
| |
| public void addMatchers(Collection<? extends NumberParseMatcher> matchers) { |
| assert !frozen; |
| this.matchers.addAll(matchers); |
| if (leadCodePointses != null) { |
| for (NumberParseMatcher matcher : matchers) { |
| addLeadCodePointsForMatcher(matcher); |
| } |
| } |
| } |
| |
| private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) { |
| UnicodeSet leadCodePoints = matcher.getLeadCodePoints(); |
| assert leadCodePoints.isFrozen(); |
| // TODO: Avoid the clone operation here. |
| if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) { |
| leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS) |
| .freeze(); |
| } |
| this.leadCodePointses.add(leadCodePoints); |
| } |
| |
| public void setComparator(Comparator<ParsedNumber> comparator) { |
| assert !frozen; |
| this.comparator = comparator; |
| } |
| |
| public void freeze() { |
| frozen = true; |
| } |
| |
| public void parse(String input, boolean greedy, ParsedNumber result) { |
| parse(input, 0, greedy, result); |
| } |
| |
| /** |
| * Primary entrypoint to parsing code path. |
| * |
| * @param input |
| * The string to parse. This is a String, not CharSequence, to enforce assumptions about |
| * immutability (CharSequences are not guaranteed to be immutable). |
| * @param start |
| * The index into the string at which to start parsing. |
| * @param greedy |
| * Whether to use the faster but potentially less accurate greedy code path. |
| * @param result |
| * Output variable to store results. |
| */ |
| public void parse(String input, int start, boolean greedy, ParsedNumber result) { |
| assert frozen; |
| assert start >= 0 && start < input.length(); |
| StringSegment segment = new StringSegment(input, |
| 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)); |
| segment.adjustOffset(start); |
| if (greedy) { |
| parseGreedyRecursive(segment, result); |
| } else { |
| parseLongestRecursive(segment, result); |
| } |
| for (NumberParseMatcher matcher : matchers) { |
| matcher.postProcess(result); |
| } |
| // Android Patch: to be removed in ICU 62 |
| result.postProcess(); |
| // End Android Patch |
| } |
| |
| private void parseGreedyRecursive(StringSegment segment, ParsedNumber result) { |
| // Base Case |
| if (segment.length() == 0) { |
| return; |
| } |
| |
| int initialOffset = segment.getOffset(); |
| int leadCp = segment.getCodePoint(); |
| for (int i = 0; i < matchers.size(); i++) { |
| if (leadCodePointses != null && !leadCodePointses.get(i).contains(leadCp)) { |
| continue; |
| } |
| NumberParseMatcher matcher = matchers.get(i); |
| matcher.match(segment, result); |
| if (segment.getOffset() != initialOffset) { |
| // In a greedy parse, recurse on only the first match. |
| parseGreedyRecursive(segment, result); |
| // The following line resets the offset so that the StringSegment says the same across |
| // the function |
| // call boundary. Since we recurse only once, this line is not strictly necessary. |
| segment.setOffset(initialOffset); |
| return; |
| } |
| } |
| |
| // NOTE: If we get here, the greedy parse completed without consuming the entire string. |
| } |
| |
| private void parseLongestRecursive(StringSegment segment, ParsedNumber result) { |
| // Base Case |
| if (segment.length() == 0) { |
| return; |
| } |
| |
| // TODO: Give a nice way for the matcher to reset the ParsedNumber? |
| ParsedNumber initial = new ParsedNumber(); |
| initial.copyFrom(result); |
| ParsedNumber candidate = new ParsedNumber(); |
| |
| int initialOffset = segment.getOffset(); |
| for (int i = 0; i < matchers.size(); i++) { |
| NumberParseMatcher matcher = matchers.get(i); |
| |
| // In a non-greedy parse, we attempt all possible matches and pick the best. |
| for (int charsToConsume = 0; charsToConsume < segment.length();) { |
| charsToConsume += Character.charCount(Character.codePointAt(segment, charsToConsume)); |
| |
| // Run the matcher on a segment of the current length. |
| candidate.copyFrom(initial); |
| segment.setLength(charsToConsume); |
| boolean maybeMore = matcher.match(segment, candidate); |
| segment.resetLength(); |
| |
| // If the entire segment was consumed, recurse. |
| if (segment.getOffset() - initialOffset == charsToConsume) { |
| parseLongestRecursive(segment, candidate); |
| if (comparator.compare(candidate, result) > 0) { |
| result.copyFrom(candidate); |
| } |
| } |
| |
| // Since the segment can be re-used, reset the offset. |
| // This does not have an effect if the matcher did not consume any chars. |
| segment.setOffset(initialOffset); |
| |
| // Unless the matcher wants to see the next char, continue to the next matcher. |
| if (!maybeMore) { |
| break; |
| } |
| } |
| } |
| } |
| |
| @Override |
| public String toString() { |
| return "<NumberParserImpl matchers=" + matchers.toString() + ">"; |
| } |
| } |