core/java/com/google/android/util/AbstractMessageParser.java - platform/frameworks/base - Git at Google

 /*
  * Copyright (C) 2007 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.google.android.util;

 import android.compat.annotation.UnsupportedAppUsage;

 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 /**
  *
  * Logic for parsing a text message typed by the user looking for smileys,
  * urls, acronyms,formatting (e.g., '*'s for bold), me commands
  * (e.g., "/me is asleep"), and punctuation.
  *
  * It constructs an array, which breaks the text up into its
  * constituent pieces, which we return to the client.
  *
  */
 public abstract class AbstractMessageParser {
 /**
  * Interface representing the set of resources needed by a message parser
  *
  * @author jessan (Jessan Hutchison-Quillian)
  */
   public static interface Resources {

     /** Get the known set of URL schemes. */
     public Set<String> getSchemes();

     /** Get the possible values for the last part of a domain name.
      *  Values are expected to be reversed in the Trie.
      */
     public TrieNode getDomainSuffixes();

     /** Get the smileys accepted by the parser. */
     public TrieNode getSmileys();

     /** Get the acronyms accepted by the parser. */
     public TrieNode getAcronyms();
   }

   /**
    * Subclasses must define the schemes, domains, smileys and acronyms
    * that are necessary for parsing
    */
   protected abstract Resources getResources();

   /** Music note that indicates user is listening to a music track. */
   public static final String musicNote = "\u266B ";

   private String text;
   private int nextChar;
   private int nextClass;
   private ArrayList<Part> parts;
   private ArrayList<Token> tokens;
   private HashMap<Character,Format> formatStart;
   private boolean parseSmilies;
   private boolean parseAcronyms;
   private boolean parseFormatting;
   private boolean parseUrls;
   private boolean parseMeText;
   private boolean parseMusic;

   /**
    * Create a message parser to parse urls, formatting, acronyms, smileys,
    * /me text and  music
    *
    * @param text the text to parse
    */
   public AbstractMessageParser(String text) {
     this(text, true, true, true, true, true, true);
   }

   /**
    * Create a message parser, specifying the kinds of text to parse
    *
    * @param text the text to parse
    *
    */
   public AbstractMessageParser(String text, boolean parseSmilies,
       boolean parseAcronyms, boolean parseFormatting, boolean parseUrls,
       boolean parseMusic, boolean parseMeText) {
     this.text = text;
     this.nextChar = 0;
     this.nextClass = 10;
     this.parts = new ArrayList<Part>();
     this.tokens = new ArrayList<Token>();
     this.formatStart = new HashMap<Character,Format>();
     this.parseSmilies = parseSmilies;
     this.parseAcronyms = parseAcronyms;
     this.parseFormatting = parseFormatting;
     this.parseUrls = parseUrls;
     this.parseMusic = parseMusic;
     this.parseMeText = parseMeText;
   }

   /** Returns the raw text being parsed. */
   public final String getRawText() { return text; }

   /** Return the number of parts. */
   public final int getPartCount() { return parts.size(); }

   /** Return the part at the given index. */
   public final Part getPart(int index) { return parts.get(index); }

   /** Return the list of parts from the parsed text */
   public final List<Part> getParts() { return parts; }

   /** Parses the text string into an internal representation. */
   public void parse() {
     // Look for music track (of which there would be only one and it'll be the
     // first token)
     if (parseMusicTrack()) {
       buildParts(null);
       return;
     }

     // Look for me commands.
     String meText = null;
     if (parseMeText && text.startsWith("/me") && (text.length() > 3) &&
         Character.isWhitespace(text.charAt(3))) {
       meText = text.substring(0, 4);
       text = text.substring(4);
     }

     // Break the text into tokens.
     boolean wasSmiley = false;
     while (nextChar < text.length()) {
       if (!isWordBreak(nextChar)) {
         if (!wasSmiley || !isSmileyBreak(nextChar)) {
           throw new AssertionError("last chunk did not end at word break");
         }
       }

       if (parseSmiley()) {
         wasSmiley = true;
       } else {
         wasSmiley = false;

         if (!parseAcronym() && !parseURL() && !parseFormatting()) {
           parseText();
         }
       }
     }

     // Trim the whitespace before and after media components.
     for (int i = 0; i < tokens.size(); ++i) {
       if (tokens.get(i).isMedia()) {
         if ((i > 0) && (tokens.get(i - 1) instanceof Html)) {
           ((Html)tokens.get(i - 1)).trimLeadingWhitespace();
         }
         if ((i + 1 < tokens.size()) && (tokens.get(i + 1) instanceof Html)) {
           ((Html)tokens.get(i + 1)).trimTrailingWhitespace();
         }
       }
     }

     // Remove any empty html tokens.
     for (int i = 0; i < tokens.size(); ++i) {
       if (tokens.get(i).isHtml() &&
           (tokens.get(i).toHtml(true).length() == 0)) {
         tokens.remove(i);
         --i;  // visit this index again
       }
     }

     buildParts(meText);
   }

   /**
    * Get a the appropriate Token for a given URL
    *
    * @param text the anchor text
    * @param url the url
    *
    */
   public static Token tokenForUrl(String url, String text) {
     if(url == null) {
       return null;
     }

     //Look for video links
     Video video = Video.matchURL(url, text);
     if (video != null) {
       return video;
     }

     // Look for video links.
     YouTubeVideo ytVideo = YouTubeVideo.matchURL(url, text);
     if (ytVideo != null) {
       return ytVideo;
     }

     // Look for photo links.
     Photo photo = Photo.matchURL(url, text);
     if (photo != null) {
       return photo;
     }

     // Look for photo links.
     FlickrPhoto flickrPhoto = FlickrPhoto.matchURL(url, text);
     if (flickrPhoto != null) {
       return flickrPhoto;
     }

     //Not media, so must be a regular URL
     return new Link(url, text);
   }

   /**
    * Builds the parts list.
    *
    * @param meText any meText parsed from the message
    */
   private void buildParts(String meText) {
     for (int i = 0; i < tokens.size(); ++i) {
       Token token = tokens.get(i);
       if (token.isMedia() || (parts.size() == 0) || lastPart().isMedia()) {
         parts.add(new Part());
       }
       lastPart().add(token);
     }

     // The first part inherits the meText of the line.
     if (parts.size() > 0) {
       parts.get(0).setMeText(meText);
     }
   }

   /** Returns the last part in the list. */
   private Part lastPart() { return parts.get(parts.size() - 1); }

   /**
    * Looks for a music track (\u266B is first character, everything else is
    * track info).
    */
   private boolean parseMusicTrack() {

     if (parseMusic && text.startsWith(musicNote)) {
       addToken(new MusicTrack(text.substring(musicNote.length())));
       nextChar = text.length();
       return true;
     }
     return false;
   }

   /** Consumes all of the text in the next word . */
   private void parseText() {
     StringBuilder buf = new StringBuilder();
     int start = nextChar;
     do {
       char ch = text.charAt(nextChar++);
       switch (ch) {
         case '<':  buf.append("&lt;"); break;
         case '>':  buf.append("&gt;"); break;
         case '&':  buf.append("&amp;"); break;
         case '"':  buf.append("&quot;"); break;
         case '\'':  buf.append("&apos;"); break;
         case '\n':  buf.append("<br>"); break;
         default:  buf.append(ch); break;
       }
     } while (!isWordBreak(nextChar));

     addToken(new Html(text.substring(start, nextChar), buf.toString()));
   }

   /**
    * Looks for smileys (e.g., ":)") in the text.  The set of known smileys is
    * loaded from a file into a trie at server start.
    */
   private boolean parseSmiley() {
     if(!parseSmilies) {
       return false;
     }
     TrieNode match = longestMatch(getResources().getSmileys(), this, nextChar,
                                   true);
     if (match == null) {
       return false;
     } else {
       int previousCharClass = getCharClass(nextChar - 1);
       int nextCharClass = getCharClass(nextChar + match.getText().length());
       if ((previousCharClass == 2 || previousCharClass == 3)
           && (nextCharClass == 2 || nextCharClass == 3)) {
         return false;
       }
       addToken(new Smiley(match.getText()));
       nextChar += match.getText().length();
       return true;
     }
   }

   /** Looks for acronyms (e.g., "lol") in the text.
    */
   private boolean parseAcronym() {
     if(!parseAcronyms) {
       return false;
     }
     TrieNode match = longestMatch(getResources().getAcronyms(), this, nextChar);
     if (match == null) {
       return false;
     } else {
       addToken(new Acronym(match.getText(), match.getValue()));
       nextChar += match.getText().length();
       return true;
     }
   }

   /** Determines if this is an allowable domain character. */
   private boolean isDomainChar(char c) {
     return c == '-' || Character.isLetter(c) || Character.isDigit(c);
   }

   /** Determines if the given string is a valid domain. */
   private boolean isValidDomain(String domain) {
     // For hostnames, check that it ends with a known domain suffix
     if (matches(getResources().getDomainSuffixes(), reverse(domain))) {
       return true;
     }
     return false;
   }

   /**
    * Looks for a URL in two possible forms:  either a proper URL with a known
    * scheme or a domain name optionally followed by a path, query, or query.
    */
   private boolean parseURL() {
     // Make sure this is a valid place to start a URL.
     if (!parseUrls || !isURLBreak(nextChar)) {
       return false;
     }

     int start = nextChar;

     // Search for the first block of letters.
     int index = start;
     while ((index < text.length()) && isDomainChar(text.charAt(index))) {
       index += 1;
     }

     String url = "";
     boolean done = false;

     if (index == text.length()) {
       return false;
     } else if (text.charAt(index) == ':') {
       // Make sure this is a known scheme.
       String scheme = text.substring(nextChar, index);
       if (!getResources().getSchemes().contains(scheme)) {
         return false;
       }
     } else if (text.charAt(index) == '.') {
       // Search for the end of the domain name.
       while (index < text.length()) {
         char ch = text.charAt(index);
         if ((ch != '.') && !isDomainChar(ch)) {
           break;
         } else {
           index += 1;
         }
       }

       // Make sure the domain name has a valid suffix.  Since tries look for
       // prefix matches, we reverse all the strings to get suffix comparisons.
       String domain = text.substring(nextChar, index);
       if (!isValidDomain(domain)) {
         return false;
       }

       // Search for a port.  We deal with this specially because a colon can
       // also be a punctuation character.
       if ((index + 1 < text.length()) && (text.charAt(index) == ':')) {
         char ch = text.charAt(index + 1);
         if (Character.isDigit(ch)) {
           index += 1;
           while ((index < text.length()) &&
                  Character.isDigit(text.charAt(index))) {
             index += 1;
           }
         }
       }

       // The domain name should be followed by end of line, whitespace,
       // punctuation, or a colon, slash, question, or hash character.  The
       // tricky part here is that some URL characters are also punctuation, so
       // we need to distinguish them.  Since we looked for ports above, a colon
       // is always punctuation here.  To distinguish '?' cases, we look at the
       // character that follows it.
       if (index == text.length()) {
         done = true;
       } else {
         char ch = text.charAt(index);
         if (ch == '?') {
           // If the next character is whitespace or punctuation (or missing),
           // then this question mark looks like punctuation.
           if (index + 1 == text.length()) {
             done = true;
           } else {
             char ch2 = text.charAt(index + 1);
             if (Character.isWhitespace(ch2) || isPunctuation(ch2)) {
               done = true;
             }
           }
         } else if (isPunctuation(ch)) {
           done = true;
         } else if (Character.isWhitespace(ch)) {
           done = true;
         } else if ((ch == '/') || (ch == '#')) {
           // In this case, the URL is not done.  We will search for the end of
           // it below.
         } else {
           return false;
         }
       }

       // We will assume the user meant HTTP.  (One weird case is where they
       // type a port of 443.  That could mean HTTPS, but they might also want
       // HTTP.  We'll let them specify if they don't want HTTP.)
       url = "http://";
     } else {
       return false;
     }

     // If the URL is not done, search for the end, which is just before the
     // next whitespace character.
     if (!done) {
       while ((index < text.length()) &&
              !Character.isWhitespace(text.charAt(index))) {
         index += 1;
       }
     }

     String urlText = text.substring(start, index);
     url += urlText;

     // Figure out the appropriate token type.
     addURLToken(url, urlText);

     nextChar = index;
     return true;
   }

   /**
    * Adds the appropriate token for the given URL.  This might be a simple
    * link or it might be a recognized media type.
    */
   private void addURLToken(String url, String text) {
      addToken(tokenForUrl(url, text));
   }

   /**
    * Deal with formatting characters.
    *
    * Parsing is as follows:
    *  - Treat all contiguous strings of formatting characters as one block.
    *    (This method processes one block.)
    *  - Only a single instance of a particular format character within a block
    *    is used to determine whether to turn on/off that type of formatting;
    *    other instances simply print the character itself.
    *  - If the format is to be turned on, we use the _first_ instance; if it
    *    is to be turned off, we use the _last_ instance (by appending the
    *    format.)
    *
    * Example:
    *   **string** turns into <b>*string*</b>
    */
   private boolean parseFormatting() {
     if(!parseFormatting) {
       return false;
     }
     int endChar = nextChar;
     while ((endChar < text.length()) && isFormatChar(text.charAt(endChar))) {
       endChar += 1;
     }

     if ((endChar == nextChar) || !isWordBreak(endChar)) {
       return false;
     }

     // Keeps track of whether we've seen a character (in map if we've seen it)
     // and whether we should append a closing format token (if value in
     // map is TRUE).  Linked hashmap for consistent ordering.
     LinkedHashMap<Character, Boolean> seenCharacters =
         new LinkedHashMap<Character, Boolean>();

     for (int index = nextChar; index < endChar; ++index) {
       char ch = text.charAt(index);
       Character key = Character.valueOf(ch);
       if (seenCharacters.containsKey(key)) {
         // Already seen this character, just append an unmatched token, which
         // will print plaintext character
         addToken(new Format(ch, false));
       } else {
         Format start = formatStart.get(key);
         if (start != null) {
           // Match the start token, and ask an end token to be appended
           start.setMatched(true);
           formatStart.remove(key);
           seenCharacters.put(key, Boolean.TRUE);
         } else {
           // Append start token
           start = new Format(ch, true);
           formatStart.put(key, start);
           addToken(start);
           seenCharacters.put(key, Boolean.FALSE);
         }
       }
     }

     // Append any necessary end tokens
     for (Character key : seenCharacters.keySet()) {
       if (seenCharacters.get(key) == Boolean.TRUE) {
         Format end = new Format(key.charValue(), false);
         end.setMatched(true);
         addToken(end);
       }
     }

     nextChar = endChar;
     return true;
   }

   /** Determines whether the given index could be a possible word break. */
   private boolean isWordBreak(int index) {
     return getCharClass(index - 1) != getCharClass(index);
   }

   /** Determines whether the given index could be a possible smiley break. */
   private boolean isSmileyBreak(int index) {
     if (index > 0 && index < text.length()) {
       if (isSmileyBreak(text.charAt(index - 1), text.charAt(index))) {
         return true;
       }
     }

     return false;
   }

   /**
    * Verifies that the character before the given index is end of line,
    * whitespace, or punctuation.
    */
   private boolean isURLBreak(int index) {
     switch (getCharClass(index - 1)) {
       case 2:
       case 3:
       case 4:
         return false;

       case 0:
       case 1:
       default:
         return true;
     }
   }

   /** Returns the class for the character at the given index. */
   private int getCharClass(int index) {
     if ((index < 0) || (text.length() <= index)) {
       return 0;
     }

     char ch = text.charAt(index);
     if (Character.isWhitespace(ch)) {
       return 1;
     } else if (Character.isLetter(ch)) {
       return 2;
     } else if (Character.isDigit(ch)) {
       return 3;
     } else if (isPunctuation(ch)) {
       // For punctuation, we return a unique value every time so that they are
       // always different from any other character.  Punctuation should always
       // be considered a possible word break.
       return ++nextClass;
     } else {
       return 4;
     }
   }

   /**
    * Returns true if <code>c1</code> could be the last character of
    * a smiley and <code>c2</code> could be the first character of
    * a different smiley, if {@link #isWordBreak} would not already
    * recognize that this is possible.
    */
   private static boolean isSmileyBreak(char c1, char c2) {
     switch (c1) {
       /*
        * These characters can end smileys, but don't normally end words.
        */
       case '$': case '&': case '*': case '+': case '-':
       case '/': case '<': case '=': case '>': case '@':
       case '[': case '\\': case ']': case '^': case '|':
       case '}': case '~':
         switch (c2) {
           /*
            * These characters can begin smileys, but don't normally
            * begin words.
            */
           case '#': case '$': case '%': case '*': case '/':
           case '<': case '=': case '>': case '@': case '[':
           case '\\': case '^': case '~':
             return true;
         }
     }

     return false;
   }

   /** Determines whether the given character is punctuation. */
   private static boolean isPunctuation(char ch) {
     switch (ch) {
       case '.': case ',': case '"': case ':': case ';':
       case '?': case '!': case '(': case ')':
         return true;

       default:
         return false;
     }
   }

   /**
    * Determines whether the given character is the beginning or end of a
    * section with special formatting.
    */
   private static boolean isFormatChar(char ch) {
     switch (ch) {
       case '*': case '_': case '^':
         return true;

       default:
         return false;
     }
   }

   /** Represents a unit of parsed output. */
   public static abstract class Token {
     @UnsupportedAppUsage(implicitMember =
             "values()[Lcom/google/android/util/AbstractMessageParser$Token$Type;")
     public enum Type {

       @UnsupportedAppUsage
       HTML ("html"),
       @UnsupportedAppUsage
       FORMAT ("format"),  // subtype of HTML
       @UnsupportedAppUsage
       LINK ("l"),
       @UnsupportedAppUsage
       SMILEY ("e"),
       @UnsupportedAppUsage
       ACRONYM ("a"),
       @UnsupportedAppUsage
       MUSIC ("m"),
       @UnsupportedAppUsage
       GOOGLE_VIDEO ("v"),
       @UnsupportedAppUsage
       YOUTUBE_VIDEO ("yt"),
       @UnsupportedAppUsage
       PHOTO ("p"),
       @UnsupportedAppUsage
       FLICKR ("f");

       //stringreps for HTML and FORMAT don't really matter
       //because they don't define getInfo(), which is where it is used
       //For the other types, code depends on their stringreps
       private String stringRep;

       Type(String stringRep) {
         this.stringRep = stringRep;
       }

       /** {@inheritDoc} */
       public String toString() {
         return this.stringRep;
       }
     }

     protected Type type;
     protected String text;

     protected Token(Type type, String text) {
       this.type = type;
       this.text = text;
     }

     /** Returns the type of the token. */
     public Type getType() { return type; }

     /**
      * Get the relevant information about a token
      *
      * @return a list of strings representing the token, not null
      *         The first item is always a string representation of the type
      */
     public List<String> getInfo() {
       List<String> info = new ArrayList<String>();
       info.add(getType().toString());
       return info;
     }

     /** Returns the raw text of the token. */
     public String getRawText() { return text; }

     public boolean isMedia() { return false; }
     public abstract boolean isHtml();
     public boolean isArray() { return !isHtml(); }

     public String toHtml(boolean caps) { throw new AssertionError("not html"); }

     // The token can change the caps of the text after that point.
     public boolean controlCaps() { return false; }
     public boolean setCaps() { return false; }
   }

   /** Represents a simple string of html text. */
   public static class Html extends Token {
     private String html;

     public Html(String text, String html) {
       super(Type.HTML, text);
       this.html = html;
     }

     public boolean isHtml() { return true; }
     public String toHtml(boolean caps) {
       return caps ? html.toUpperCase() : html;
     }
     /**
      * Not supported. Info should not be needed for this type
      */
     public List<String> getInfo() {
       throw new UnsupportedOperationException();
     }

     public void trimLeadingWhitespace() {
       text = trimLeadingWhitespace(text);
       html = trimLeadingWhitespace(html);
     }

     public void trimTrailingWhitespace() {
       text = trimTrailingWhitespace(text);
       html = trimTrailingWhitespace(html);
     }

     private static String trimLeadingWhitespace(String text) {
       int index = 0;
       while ((index < text.length()) &&
              Character.isWhitespace(text.charAt(index))) {
         ++index;
       }
       return text.substring(index);
     }

     public static String trimTrailingWhitespace(String text) {
       int index = text.length();
       while ((index > 0) && Character.isWhitespace(text.charAt(index - 1))) {
         --index;
       }
       return text.substring(0, index);
     }
   }

   /** Represents a music track token at the beginning. */
   public static class MusicTrack extends Token {
     private String track;

     public MusicTrack(String track) {
       super(Type.MUSIC, track);
       this.track = track;
     }

     public String getTrack() { return track; }

     public boolean isHtml() { return false; }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getTrack());
       return info;
     }
   }

   /** Represents a link that was found in the input. */
   public static class Link extends Token {
     private String url;

     public Link(String url, String text) {
       super(Type.LINK, text);
       this.url = url;
     }

     public String getURL() { return url; }

     public boolean isHtml() { return false; }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getURL());
       info.add(getRawText());
       return info;
     }
   }

   /** Represents a link to a Google Video. */
   public static class Video extends Token {
     /** Pattern for a video URL. */
     private static final Pattern URL_PATTERN = Pattern.compile(
         "(?i)http://video\\.google\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/videoplay\\?"
         + ".*?\\bdocid=(-?\\d+).*");

     private String docid;

     public Video(String docid, String text) {
       super(Type.GOOGLE_VIDEO, text);
       this.docid = docid;
     }

     public String getDocID() { return docid; }

     public boolean isHtml() { return false; }
     public boolean isMedia() { return true; }

     /** Returns a Video object if the given url is to a video. */
     public static Video matchURL(String url, String text) {
       Matcher m = URL_PATTERN.matcher(url);
       if (m.matches()) {
         return new Video(m.group(1), text);
       } else {
         return null;
       }
     }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getRssUrl(docid));
       info.add(getURL(docid));
       return info;
     }

     /** Returns the URL for the RSS description of the given video. */
     public static String getRssUrl(String docid) {
       return "http://video.google.com/videofeed"
              + "?type=docid&output=rss&sourceid=gtalk&docid=" + docid;
     }

     /** (For testing purposes:) Returns a video URL with the given parts.  */
     public static String getURL(String docid) {
       return getURL(docid, null);
     }

     /** (For testing purposes:) Returns a video URL with the given parts.  */
     public static String getURL(String docid, String extraParams) {
       if (extraParams == null) {
         extraParams = "";
       } else if (extraParams.length() > 0) {
         extraParams += "&";
       }
       return "http://video.google.com/videoplay?" + extraParams
              + "docid=" + docid;
     }
   }

   /** Represents a link to a YouTube video. */
   public static class YouTubeVideo extends Token {
     /** Pattern for a video URL. */
     private static final Pattern URL_PATTERN = Pattern.compile(
         "(?i)http://(?:[a-z0-9]+\\.)?youtube\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/watch\\?"
         + ".*\\bv=([-_a-zA-Z0-9=]+).*");

     private String docid;

     public YouTubeVideo(String docid, String text) {
       super(Type.YOUTUBE_VIDEO, text);
       this.docid = docid;
     }

     public String getDocID() { return docid; }

     public boolean isHtml() { return false; }
     public boolean isMedia() { return true; }

     /** Returns a Video object if the given url is to a video. */
     public static YouTubeVideo matchURL(String url, String text) {
       Matcher m = URL_PATTERN.matcher(url);
       if (m.matches()) {
         return new YouTubeVideo(m.group(1), text);
       } else {
         return null;
       }
     }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getRssUrl(docid));
       info.add(getURL(docid));
       return info;
     }

     /** Returns the URL for the RSS description of the given video. */
     public static String getRssUrl(String docid) {
       return "http://youtube.com/watch?v=" + docid;
     }

     /** (For testing purposes:) Returns a video URL with the given parts.  */
     public static String getURL(String docid) {
       return getURL(docid, null);
     }

     /** (For testing purposes:) Returns a video URL with the given parts.  */
     public static String getURL(String docid, String extraParams) {
       if (extraParams == null) {
         extraParams = "";
       } else if (extraParams.length() > 0) {
         extraParams += "&";
       }
       return "http://youtube.com/watch?" + extraParams + "v=" + docid;
     }

     /** (For testing purposes:) Returns a video URL with the given parts.
       * @param http If true, includes http://
       * @param prefix If non-null/non-blank, adds to URL before youtube.com.
       *   (e.g., prefix="br." --> "br.youtube.com")
       */
     public static String getPrefixedURL(boolean http, String prefix,
                                         String docid, String extraParams) {
       String protocol = "";

       if (http) {
         protocol = "http://";
       }

       if (prefix == null) {
         prefix = "";
       }

       if (extraParams == null) {
         extraParams = "";
       } else if (extraParams.length() > 0) {
         extraParams += "&";
       }

       return protocol + prefix + "youtube.com/watch?" + extraParams + "v=" +
               docid;
     }
   }

   /** Represents a link to a Picasa photo or album. */
   public static class Photo extends Token {
     /** Pattern for an album or photo URL. */
     // TODO (katyarogers) searchbrowse includes search lists and tags,
     // it follows a different pattern than albums - would be nice to add later
     private static final Pattern URL_PATTERN = Pattern.compile(
         "http://picasaweb.google.com/([^/?#&]+)/+((?!searchbrowse)[^/?#&]+)(?:/|/photo)?(?:\\?[^#]*)?(?:#(.*))?");

     private String user;
     private String album;
     private String photo;  // null for albums

     public Photo(String user, String album, String photo, String text) {
       super(Type.PHOTO, text);
       this.user = user;
       this.album = album;
       this.photo = photo;
     }

     public String getUser() { return user; }
     public String getAlbum() { return album; }
     public String getPhoto() { return photo; }

     public boolean isHtml() { return false; }
     public boolean isMedia() { return true; }

     /** Returns a Photo object if the given url is to a photo or album. */
     public static Photo matchURL(String url, String text) {
       Matcher m = URL_PATTERN.matcher(url);
       if (m.matches()) {
         return new Photo(m.group(1), m.group(2), m.group(3), text);
       } else {
         return null;
       }
     }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getRssUrl(getUser()));
       info.add(getAlbumURL(getUser(), getAlbum()));
       if (getPhoto() != null) {
         info.add(getPhotoURL(getUser(), getAlbum(), getPhoto()));
       } else {
         info.add((String)null);
       }
       return info;
     }

     /** Returns the URL for the RSS description of the user's albums. */
     public static String getRssUrl(String user) {
       return "http://picasaweb.google.com/data/feed/api/user/" + user +
         "?category=album&alt=rss";
     }

     /** Returns the URL for an album. */
     public static String getAlbumURL(String user, String album) {
       return "http://picasaweb.google.com/" + user + "/" + album;
     }

     /** Returns the URL for a particular photo. */
     public static String getPhotoURL(String user, String album, String photo) {
       return "http://picasaweb.google.com/" + user + "/" + album + "/photo#"
              + photo;
     }
   }

   /** Represents a link to a Flickr photo or album. */
   public static class FlickrPhoto extends Token {
     /** Pattern for a user album or photo URL. */
     private static final Pattern URL_PATTERN = Pattern.compile(
         "http://(?:www.)?flickr.com/photos/([^/?#&]+)/?([^/?#&]+)?/?.*");
     private static final Pattern GROUPING_PATTERN = Pattern.compile(
         "http://(?:www.)?flickr.com/photos/([^/?#&]+)/(tags|sets)/" +
         "([^/?#&]+)/?");

     private static final String SETS = "sets";
     private static final String TAGS = "tags";

     private String user;
     private String photo;      // null for user album
     private String grouping;   // either "tags" or "sets"
     private String groupingId; // sets or tags identifier

     public FlickrPhoto(String user, String photo, String grouping,
                        String groupingId, String text) {
       super(Type.FLICKR, text);

       /* System wide tags look like the URL to a Flickr user. */
       if (!TAGS.equals(user)) {
         this.user = user;
         // Don't consider slide show URL a photo
         this.photo = (!"show".equals(photo) ? photo : null);
         this.grouping = grouping;
         this.groupingId = groupingId;
       } else {
         this.user = null;
         this.photo = null;
         this.grouping = TAGS;
         this.groupingId = photo;
       }
     }

     public String getUser() { return user; }
     public String getPhoto() { return photo; }
     public String getGrouping() { return grouping; }
     public String getGroupingId() { return groupingId; }

     public boolean isHtml() { return false; }
     public boolean isMedia() { return true; }

     /**
      * Returns a FlickrPhoto object if the given url is to a photo or Flickr
      * user.
      */
     public static FlickrPhoto matchURL(String url, String text) {
       Matcher m = GROUPING_PATTERN.matcher(url);
       if (m.matches()) {
         return new FlickrPhoto(m.group(1), null, m.group(2), m.group(3), text);
       }

       m = URL_PATTERN.matcher(url);
       if (m.matches()) {
         return new FlickrPhoto(m.group(1), m.group(2), null, null, text);
       } else {
         return null;
       }
     }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getUrl());
       info.add(getUser() != null ? getUser() : "");
       info.add(getPhoto() != null ? getPhoto() : "");
       info.add(getGrouping() != null ? getGrouping() : "");
       info.add(getGroupingId() != null ? getGroupingId() : "");
       return info;
     }

     public String getUrl() {
       if (SETS.equals(grouping)) {
         return getUserSetsURL(user, groupingId);
       } else if (TAGS.equals(grouping)) {
         if (user != null) {
           return getUserTagsURL(user, groupingId);
         } else {
           return getTagsURL(groupingId);
         }
       } else if (photo != null) {
         return getPhotoURL(user, photo);
       } else {
         return getUserURL(user);
       }
     }

     /** Returns the URL for the RSS description. */
     public static String getRssUrl(String user) {
       return null;
     }

     /** Returns the URL for a particular tag. */
     public static String getTagsURL(String tag) {
       return "http://flickr.com/photos/tags/" + tag;
     }

     /** Returns the URL to the user's Flickr homepage. */
     public static String getUserURL(String user) {
       return "http://flickr.com/photos/" + user;
     }

     /** Returns the URL for a particular photo. */
     public static String getPhotoURL(String user, String photo) {
       return "http://flickr.com/photos/" + user + "/" + photo;
     }

     /** Returns the URL for a user tag photo set. */
     public static String getUserTagsURL(String user, String tagId) {
       return "http://flickr.com/photos/" + user + "/tags/" + tagId;
     }

     /** Returns the URL for user set. */
     public static String getUserSetsURL(String user, String setId) {
       return "http://flickr.com/photos/" + user + "/sets/" + setId;
     }
   }

   /** Represents a smiley that was found in the input. */
   public static class Smiley extends Token {
     // TODO: Pass the SWF URL down to the client.

     public Smiley(String text) {
       super(Type.SMILEY, text);
     }

     public boolean isHtml() { return false; }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getRawText());
       return info;
     }
   }

   /** Represents an acronym that was found in the input. */
   public static class Acronym extends Token {
     private String value;
     // TODO: SWF

     public Acronym(String text, String value) {
       super(Type.ACRONYM, text);
       this.value = value;
     }

     public String getValue() { return value; }

     public boolean isHtml() { return false; }

     public List<String> getInfo() {
       List<String> info = super.getInfo();
       info.add(getRawText());
       info.add(getValue());
       return info;
     }
   }

   /** Represents a character that changes formatting. */
   public static class Format extends Token {
     private char ch;
     private boolean start;
     private boolean matched;

     public Format(char ch, boolean start) {
       super(Type.FORMAT, String.valueOf(ch));
       this.ch = ch;
       this.start = start;
     }

     public void setMatched(boolean matched) { this.matched = matched; }

     public boolean isHtml() { return true; }

     public String toHtml(boolean caps) {
       // This character only implies special formatting if it was matched.
       // Otherwise, it was just a plain old character.
       if (matched) {
         return start ? getFormatStart(ch) : getFormatEnd(ch);
       } else {
         // We have to make sure we escape HTML characters as usual.
         return (ch == '"') ? "&quot;" : String.valueOf(ch);
       }
     }

     /**
      * Not supported. Info should not be needed for this type
      */
     public List<String> getInfo() {
       throw new UnsupportedOperationException();
     }

     public boolean controlCaps() { return (ch == '^'); }
     public boolean setCaps() { return start; }

     private String getFormatStart(char ch) {
       switch (ch) {
         case '*': return "<b>";
         case '_': return "<i>";
         case '^': return "<b><font color=\"#005FFF\">"; // TODO: all caps
         case '"': return "<font color=\"#999999\">\u201c";
         default: throw new AssertionError("unknown format '" + ch + "'");
       }
     }

     private String getFormatEnd(char ch) {
       switch (ch) {
         case '*': return "</b>";
         case '_': return "</i>";
         case '^': return "</font></b>"; // TODO: all caps
         case '"': return "\u201d</font>";
         default: throw new AssertionError("unknown format '" + ch + "'");
       }
     }
   }

   /** Adds the given token to the parsed output. */
   private void addToken(Token token) {
     tokens.add(token);
   }

   /** Converts the entire message into a single HTML display string. */
   public String toHtml() {
     StringBuilder html = new StringBuilder();

     for (Part part : parts) {
       boolean caps = false;

       html.append("<p>");
       for (Token token : part.getTokens()) {
         if (token.isHtml()) {
           html.append(token.toHtml(caps));
         } else {
           switch (token.getType()) {
           case LINK:
             html.append("<a href=\"");
             html.append(((Link)token).getURL());
             html.append("\">");
             html.append(token.getRawText());
             html.append("</a>");
             break;

           case SMILEY:
             // TODO: link to an appropriate image
             html.append(token.getRawText());
             break;

           case ACRONYM:
             html.append(token.getRawText());
             break;

           case MUSIC:
             // TODO: include a music glyph
             html.append(((MusicTrack)token).getTrack());
             break;

           case GOOGLE_VIDEO:
             // TODO: include a Google Video icon
             html.append("<a href=\"");
             html.append(((Video)token).getURL(((Video)token).getDocID()));
             html.append("\">");
             html.append(token.getRawText());
             html.append("</a>");
             break;

           case YOUTUBE_VIDEO:
             // TODO: include a YouTube icon
             html.append("<a href=\"");
             html.append(((YouTubeVideo)token).getURL(
                 ((YouTubeVideo)token).getDocID()));
             html.append("\">");
             html.append(token.getRawText());
             html.append("</a>");
             break;

           case PHOTO: {
             // TODO: include a Picasa Web icon
             html.append("<a href=\"");
             html.append(Photo.getAlbumURL(
                 ((Photo)token).getUser(), ((Photo)token).getAlbum()));
             html.append("\">");
             html.append(token.getRawText());
             html.append("</a>");
             break;
           }

           case FLICKR:
             // TODO: include a Flickr icon
             Photo p = (Photo) token;
             html.append("<a href=\"");
             html.append(((FlickrPhoto)token).getUrl());
             html.append("\">");
             html.append(token.getRawText());
             html.append("</a>");
             break;

           default:
             throw new AssertionError("unknown token type: " + token.getType());
           }
         }

         if (token.controlCaps()) {
           caps = token.setCaps();
         }
       }
       html.append("</p>\n");
     }

     return html.toString();
   }

   /** Returns the reverse of the given string. */
   protected static String reverse(String str) {
     StringBuilder buf = new StringBuilder();
     for (int i = str.length() - 1; i >= 0; --i) {
       buf.append(str.charAt(i));
     }
     return buf.toString();
   }

   public static class TrieNode {
     private final HashMap<Character,TrieNode> children =
         new HashMap<Character,TrieNode>();
     private String text;
     private String value;

     public TrieNode() { this(""); }
     public TrieNode(String text) {
       this.text = text;
     }

     public final boolean exists() { return value != null; }
     public final String getText() { return text; }
     public final String getValue() { return value; }
     public void setValue(String value) { this.value = value; }

     public TrieNode getChild(char ch) {
       return children.get(Character.valueOf(ch));
     }

     public TrieNode getOrCreateChild(char ch) {
       Character key = Character.valueOf(ch);
       TrieNode node = children.get(key);
       if (node == null) {
         node = new TrieNode(text + String.valueOf(ch));
         children.put(key, node);
       }
       return node;
     }

     /** Adds the given string into the trie. */
     public static  void addToTrie(TrieNode root, String str, String value) {
       int index = 0;
       while (index < str.length()) {
         root = root.getOrCreateChild(str.charAt(index++));
       }
       root.setValue(value);
     }
   }


   /** Determines whether the given string is in the given trie. */
   private static boolean matches(TrieNode root, String str) {
     int index = 0;
     while (index < str.length()) {
       root = root.getChild(str.charAt(index++));
       if (root == null) {
         break;
       } else if (root.exists()) {
         return true;
       }
     }
     return false;
   }

   /**
    * Returns the longest substring of the given string, starting at the given
    * index, that exists in the trie.
    */
   private static TrieNode longestMatch(
       TrieNode root, AbstractMessageParser p, int start) {
     return longestMatch(root, p, start, false);
   }

   /**
    * Returns the longest substring of the given string, starting at the given
    * index, that exists in the trie, with a special tokenizing case for
    * smileys if specified.
    */
   private static TrieNode longestMatch(
       TrieNode root, AbstractMessageParser p, int start, boolean smiley) {
     int index = start;
     TrieNode bestMatch = null;
     while (index < p.getRawText().length()) {
       root = root.getChild(p.getRawText().charAt(index++));
       if (root == null) {
         break;
       } else if (root.exists()) {
         if (p.isWordBreak(index)) {
           bestMatch = root;
         } else if (smiley && p.isSmileyBreak(index)) {
           bestMatch = root;
         }
       }
     }
     return bestMatch;
   }


   /** Represents set of tokens that are delivered as a single message. */
   public static class Part {
     private String meText;
     private ArrayList<Token> tokens;

     public Part() {
       this.tokens = new ArrayList<Token>();
     }

     public String getType(boolean isSend) {
       return (isSend ? "s" : "r") + getPartType();
     }

     private String getPartType() {
       if (isMedia()) {
         return "d";
       } else if (meText != null) {
         return "m";
       } else {
         return "";
       }
     }

     public boolean isMedia() {
       return (tokens.size() == 1) && tokens.get(0).isMedia();
     }
     /**
      * Convenience method for getting the Token of a Part that represents
      * a media Token. Parts of this kind will always only have a single Token
      *
      * @return if this.isMedia(),
      *         returns the Token representing the media contained in this Part,
      *         otherwise returns null;
      */
     public Token getMediaToken() {
       if(isMedia()) {
         return tokens.get(0);
       }
       return null;
     }

     /** Adds the given token to this part. */
     public void add(Token token) {
       if (isMedia()) {
         throw new AssertionError("media ");
       }
        tokens.add(token);
     }

     public void setMeText(String meText) {
       this.meText = meText;
     }

     /** Returns the original text of this part. */
     public String getRawText() {
       StringBuilder buf = new StringBuilder();
       if (meText != null) {
         buf.append(meText);
       }
       for (int i = 0; i < tokens.size(); ++i) {
         buf.append(tokens.get(i).getRawText());
       }
       return buf.toString();
     }

     /** Returns the tokens in this part. */
     public ArrayList<Token> getTokens() { return tokens; }

     /** Adds the tokens into the given builder as an array. */
 //    public void toArray(JSArrayBuilder array) {
 //      if (isMedia()) {
 //        // For media, we send its array (i.e., we don't wrap this in another
 //        // array as we do for non-media parts).
 //        tokens.get(0).toArray(array);
 //      } else {
 //        array.beginArray();
 //        addToArray(array);
 //        array.endArray();
 //      }
 //    }
   }
 }