| /* |
| * Copyright 2008 ZXing authors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.zxing.client.result; |
| |
| import com.google.zxing.Result; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.UnsupportedEncodingException; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.List; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| /** |
| * Parses contact information formatted according to the VCard (2.1) format. This is not a complete |
| * implementation but should parse information as commonly encoded in 2D barcodes. |
| * |
| * @author Sean Owen |
| */ |
| public final class VCardResultParser extends ResultParser { |
| |
| private static final Pattern BEGIN_VCARD = Pattern.compile("BEGIN:VCARD", Pattern.CASE_INSENSITIVE); |
| private static final Pattern VCARD_LIKE_DATE = Pattern.compile("\\d{4}-?\\d{2}-?\\d{2}"); |
| private static final Pattern CR_LF_SPACE_TAB = Pattern.compile("\r\n[ \t]"); |
| private static final Pattern NEWLINE_ESCAPE = Pattern.compile("\\\\[nN]"); |
| private static final Pattern VCARD_ESCAPES = Pattern.compile("\\\\([,;\\\\])"); |
| private static final Pattern EQUALS = Pattern.compile("="); |
| private static final Pattern SEMICOLON = Pattern.compile(";"); |
| private static final Pattern UNESCAPED_SEMICOLONS = Pattern.compile("(?<!\\\\);+"); |
| |
| @Override |
| public AddressBookParsedResult parse(Result result) { |
| // Although we should insist on the raw text ending with "END:VCARD", there's no reason |
| // to throw out everything else we parsed just because this was omitted. In fact, Eclair |
| // is doing just that, and we can't parse its contacts without this leniency. |
| String rawText = getMassagedText(result); |
| Matcher m = BEGIN_VCARD.matcher(rawText); |
| if (!m.find() || m.start() != 0) { |
| return null; |
| } |
| List<List<String>> names = matchVCardPrefixedField("FN", rawText, true, false); |
| if (names == null) { |
| // If no display names found, look for regular name fields and format them |
| names = matchVCardPrefixedField("N", rawText, true, false); |
| formatNames(names); |
| } |
| List<List<String>> phoneNumbers = matchVCardPrefixedField("TEL", rawText, true, false); |
| List<List<String>> emails = matchVCardPrefixedField("EMAIL", rawText, true, false); |
| List<String> note = matchSingleVCardPrefixedField("NOTE", rawText, false, false); |
| List<List<String>> addresses = matchVCardPrefixedField("ADR", rawText, true, true); |
| List<String> org = matchSingleVCardPrefixedField("ORG", rawText, true, true); |
| List<String> birthday = matchSingleVCardPrefixedField("BDAY", rawText, true, false); |
| if (birthday != null && !isLikeVCardDate(birthday.get(0))) { |
| birthday = null; |
| } |
| List<String> title = matchSingleVCardPrefixedField("TITLE", rawText, true, false); |
| List<String> url = matchSingleVCardPrefixedField("URL", rawText, true, false); |
| List<String> instantMessenger = matchSingleVCardPrefixedField("IMPP", rawText, true, false); |
| return new AddressBookParsedResult(toPrimaryValues(names), |
| null, |
| toPrimaryValues(phoneNumbers), |
| toTypes(phoneNumbers), |
| toPrimaryValues(emails), |
| toTypes(emails), |
| toPrimaryValue(instantMessenger), |
| toPrimaryValue(note), |
| toPrimaryValues(addresses), |
| toTypes(addresses), |
| toPrimaryValue(org), |
| toPrimaryValue(birthday), |
| toPrimaryValue(title), |
| toPrimaryValue(url)); |
| } |
| |
| private static List<List<String>> matchVCardPrefixedField(CharSequence prefix, |
| String rawText, |
| boolean trim, |
| boolean parseFieldDivider) { |
| List<List<String>> matches = null; |
| int i = 0; |
| int max = rawText.length(); |
| |
| while (i < max) { |
| |
| // At start or after newline, match prefix, followed by optional metadata |
| // (led by ;) ultimately ending in colon |
| Matcher matcher = Pattern.compile("(?:^|\n)" + prefix + "(?:;([^:]*))?:", |
| Pattern.CASE_INSENSITIVE).matcher(rawText); |
| if (i > 0) { |
| i--; // Find from i-1 not i since looking at the preceding character |
| } |
| if (!matcher.find(i)) { |
| break; |
| } |
| i = matcher.end(0); // group 0 = whole pattern; end(0) is past final colon |
| |
| String metadataString = matcher.group(1); // group 1 = metadata substring |
| List<String> metadata = null; |
| boolean quotedPrintable = false; |
| String quotedPrintableCharset = null; |
| if (metadataString != null) { |
| for (String metadatum : SEMICOLON.split(metadataString)) { |
| if (metadata == null) { |
| metadata = new ArrayList<String>(1); |
| } |
| metadata.add(metadatum); |
| String[] metadatumTokens = EQUALS.split(metadatum, 2); |
| if (metadatumTokens.length > 1) { |
| String key = metadatumTokens[0]; |
| String value = metadatumTokens[1]; |
| if ("ENCODING".equalsIgnoreCase(key) && "QUOTED-PRINTABLE".equalsIgnoreCase(value)) { |
| quotedPrintable = true; |
| } else if ("CHARSET".equalsIgnoreCase(key)) { |
| quotedPrintableCharset = value; |
| } |
| } |
| } |
| } |
| |
| int matchStart = i; // Found the start of a match here |
| |
| while ((i = rawText.indexOf((int) '\n', i)) >= 0) { // Really, end in \r\n |
| if (i < rawText.length() - 1 && // But if followed by tab or space, |
| (rawText.charAt(i+1) == ' ' || // this is only a continuation |
| rawText.charAt(i+1) == '\t')) { |
| i += 2; // Skip \n and continutation whitespace |
| } else if (quotedPrintable && // If preceded by = in quoted printable |
| ((i >= 1 && rawText.charAt(i-1) == '=') || // this is a continuation |
| (i >= 2 && rawText.charAt(i-2) == '='))) { |
| i++; // Skip \n |
| } else { |
| break; |
| } |
| } |
| |
| if (i < 0) { |
| // No terminating end character? uh, done. Set i such that loop terminates and break |
| i = max; |
| } else if (i > matchStart) { |
| // found a match |
| if (matches == null) { |
| matches = new ArrayList<List<String>>(1); // lazy init |
| } |
| if (i >= 1 && rawText.charAt(i-1) == '\r') { |
| i--; // Back up over \r, which really should be there |
| } |
| String element = rawText.substring(matchStart, i); |
| if (trim) { |
| element = element.trim(); |
| } |
| if (quotedPrintable) { |
| element = decodeQuotedPrintable(element, quotedPrintableCharset); |
| if (parseFieldDivider) { |
| element = UNESCAPED_SEMICOLONS.matcher(element).replaceAll("\n").trim(); |
| } |
| } else { |
| if (parseFieldDivider) { |
| element = UNESCAPED_SEMICOLONS.matcher(element).replaceAll("\n").trim(); |
| } |
| element = CR_LF_SPACE_TAB.matcher(element).replaceAll(""); |
| element = NEWLINE_ESCAPE.matcher(element).replaceAll("\n"); |
| element = VCARD_ESCAPES.matcher(element).replaceAll("$1"); |
| } |
| if (metadata == null) { |
| List<String> match = new ArrayList<String>(1); |
| match.add(element); |
| matches.add(match); |
| } else { |
| metadata.add(0, element); |
| matches.add(metadata); |
| } |
| i++; |
| } else { |
| i++; |
| } |
| |
| } |
| |
| return matches; |
| } |
| |
| private static String decodeQuotedPrintable(CharSequence value, String charset) { |
| int length = value.length(); |
| StringBuilder result = new StringBuilder(length); |
| ByteArrayOutputStream fragmentBuffer = new ByteArrayOutputStream(); |
| for (int i = 0; i < length; i++) { |
| char c = value.charAt(i); |
| switch (c) { |
| case '\r': |
| case '\n': |
| break; |
| case '=': |
| if (i < length - 2) { |
| char nextChar = value.charAt(i+1); |
| if (nextChar != '\r' && nextChar != '\n') { |
| char nextNextChar = value.charAt(i+2); |
| int firstDigit = parseHexDigit(nextChar); |
| int secondDigit = parseHexDigit(nextNextChar); |
| if (firstDigit >= 0 && secondDigit >= 0) { |
| fragmentBuffer.write((firstDigit << 4) + secondDigit); |
| } // else ignore it, assume it was incorrectly encoded |
| i += 2; |
| } |
| } |
| break; |
| default: |
| maybeAppendFragment(fragmentBuffer, charset, result); |
| result.append(c); |
| } |
| } |
| maybeAppendFragment(fragmentBuffer, charset, result); |
| return result.toString(); |
| } |
| |
| private static void maybeAppendFragment(ByteArrayOutputStream fragmentBuffer, |
| String charset, |
| StringBuilder result) { |
| if (fragmentBuffer.size() > 0) { |
| byte[] fragmentBytes = fragmentBuffer.toByteArray(); |
| String fragment; |
| if (charset == null) { |
| fragment = new String(fragmentBytes); |
| } else { |
| try { |
| fragment = new String(fragmentBytes, charset); |
| } catch (UnsupportedEncodingException e) { |
| // Yikes, well try anyway: |
| fragment = new String(fragmentBytes); |
| } |
| } |
| fragmentBuffer.reset(); |
| result.append(fragment); |
| } |
| } |
| |
| static List<String> matchSingleVCardPrefixedField(CharSequence prefix, |
| String rawText, |
| boolean trim, |
| boolean parseFieldDivider) { |
| List<List<String>> values = matchVCardPrefixedField(prefix, rawText, trim, parseFieldDivider); |
| return values == null || values.isEmpty() ? null : values.get(0); |
| } |
| |
| private static String toPrimaryValue(List<String> list) { |
| return list == null || list.isEmpty() ? null : list.get(0); |
| } |
| |
| private static String[] toPrimaryValues(Collection<List<String>> lists) { |
| if (lists == null || lists.isEmpty()) { |
| return null; |
| } |
| List<String> result = new ArrayList<String>(lists.size()); |
| for (List<String> list : lists) { |
| result.add(list.get(0)); |
| } |
| return result.toArray(new String[lists.size()]); |
| } |
| |
| private static String[] toTypes(Collection<List<String>> lists) { |
| if (lists == null || lists.isEmpty()) { |
| return null; |
| } |
| List<String> result = new ArrayList<String>(lists.size()); |
| for (List<String> list : lists) { |
| String type = null; |
| for (int i = 1; i < list.size(); i++) { |
| String metadatum = list.get(i); |
| int equals = metadatum.indexOf('='); |
| if (equals < 0) { |
| // take the whole thing as a usable label |
| type = metadatum; |
| break; |
| } |
| if ("TYPE".equalsIgnoreCase(metadatum.substring(0, equals))) { |
| type = metadatum.substring(equals + 1); |
| break; |
| } |
| } |
| result.add(type); |
| } |
| return result.toArray(new String[lists.size()]); |
| } |
| |
| private static boolean isLikeVCardDate(CharSequence value) { |
| return value == null || VCARD_LIKE_DATE.matcher(value).matches(); |
| } |
| |
| /** |
| * Formats name fields of the form "Public;John;Q.;Reverend;III" into a form like |
| * "Reverend John Q. Public III". |
| * |
| * @param names name values to format, in place |
| */ |
| private static void formatNames(Iterable<List<String>> names) { |
| if (names != null) { |
| for (List<String> list : names) { |
| String name = list.get(0); |
| String[] components = new String[5]; |
| int start = 0; |
| int end; |
| int componentIndex = 0; |
| while ((end = name.indexOf(';', start)) > 0) { |
| components[componentIndex] = name.substring(start, end); |
| componentIndex++; |
| start = end + 1; |
| } |
| components[componentIndex] = name.substring(start); |
| StringBuilder newName = new StringBuilder(100); |
| maybeAppendComponent(components, 3, newName); |
| maybeAppendComponent(components, 1, newName); |
| maybeAppendComponent(components, 2, newName); |
| maybeAppendComponent(components, 0, newName); |
| maybeAppendComponent(components, 4, newName); |
| list.set(0, newName.toString().trim()); |
| } |
| } |
| } |
| |
| private static void maybeAppendComponent(String[] components, int i, StringBuilder newName) { |
| if (components[i] != null) { |
| newName.append(' '); |
| newName.append(components[i]); |
| } |
| } |
| |
| } |