Updaiting re2 to the re2-20130115. Updating RE2 to latest from: https://re2.googlecode.com/files/re2-20130115.tgz Change-Id: I0b2527af4443bf8815db2c78ef14f6edfe3ecb37

commit: 0d4c52358a1af421705c54bd8a9fdd8a30558a2e [log] [tgz]
author: Alexander Gutkin <agutkin@google.com> Thu Feb 28 13:47:27 2013 +0000
committer: Alexander Gutkin <agutkin@google.com> Thu Feb 28 17:22:58 2013 +0000
tree: c21f8d71d3fbbb42fc377b0ebad51b06ab5667b0
parent: c94c4501fe83e3ad77ce597b55bbbfbf533c10ee [diff]
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 981ce02..7b44e04 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS

@@ -26,6 +26,8 @@
 
 # Please keep the list sorted.
 
+Dominic Battré <battre@chromium.org>
+John Millikin <jmillikin@gmail.com>
 Rob Pike <r@google.com>
 Russ Cox <rsc@swtch.com>
 Sanjay Ghemawat <sanjay@google.com>

diff --git a/NOTICE b/NOTICE
deleted file mode 100644
index 09e5ec1..0000000
--- a/NOTICE
+++ /dev/null

@@ -1,27 +0,0 @@
-// Copyright (c) 2009 The RE2 Authors. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//    * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//    * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//    * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

diff --git a/README.android b/README.android
index 453cd80..4911c3c 100644
--- a/README.android
+++ b/README.android

@@ -1,29 +1,28 @@
-Code obtained via
------------------
-hg clone https://re2.googlecode.com/hg re2
+Code obtained from
+------------------
+
+https://re2.googlecode.com/files/re2-20130115.tgz
 
 Version
 -------
 
-hg identify
-2d252384c5e8 tip
+re2-20130115.tgz
 
 Changes required to build using stlport on Android as follows (full diff)
 -------------------------------------------------------------------------
-diff -r ./re2/parse.cc /home/idh/temp9/re2/re2/parse.cc
-19d18
-< #include <ctype.h>
-diff -r ./re2/re2.cc /home/idh/temp9/re2/re2/re2.cc
-13d12
-< #include <ctype.h>
-Only in /home/idh/temp9/re2/re2: testing
-diff -r ./util/util.h /home/idh/temp9/re2/util/util.h
-43,48c43
+util/util.h:
+
+44,53c44
 < #if defined(ANDROID)
 < 
-< #include <unordered_set>
+< #if defined(_STLPORT_VERSION)
+< #include <unordered_set>      // using stlport
+< #else
+< #include <tr1/unordered_set>  // using gnustl
+< #endif
 < using std::tr1::unordered_set;
-< 
+<  
 < #elif defined(__GNUC__) && !defined(USE_CXX0X)
 ---
 > #if defined(__GNUC__) && !defined(USE_CXX0X)
+

diff --git a/doc/mksyntaxgo b/doc/mksyntaxgo
new file mode 100755
index 0000000..42e87d6
--- /dev/null
+++ b/doc/mksyntaxgo

@@ -0,0 +1,41 @@
+#!/bin/sh
+
+set -e
+out=$GOROOT/src/pkg/regexp/syntax/doc.go
+cp syntax.txt $out
+sam -d $out <<'!'
+,x g/NOT SUPPORTED/d
+/^Unicode character class/,$d
+,s/[«»]//g
+,x g/^Possessive repetitions:/d
+,x g/\\C/d
+,x g/Flag syntax/d
+,s/.=(true|false)/flag &/g
+,s/^Flags:/  Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are:\n/
+,s/\n\n\n+/\n\n/g
+,x/(^.*	.*\n)+/ | awk -F'	' '{printf("  %-14s %s\n", $1, $2)}'
+1,2c
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution.
+
+/*
+Package syntax parses regular expressions into parse trees and compiles
+parse trees into programs. Most clients of regular expressions will use the
+facilities of package regexp (such as Compile and Match) instead of this package.
+
+Syntax
+
+The regular expression syntax understood by this package when parsing with the Perl flag is as follows.
+Parts of the syntax can be disabled by passing alternate flags to Parse.
+
+.
+$a
+*/
+package syntax
+.
+w
+q
+!

diff --git a/doc/syntax.txt b/doc/syntax.txt
index 740e5ce..f940750 100644
--- a/doc/syntax.txt
+++ b/doc/syntax.txt

@@ -2,7 +2,7 @@
 -------------------------------------
 
 Single characters:
-.	any character, including newline (s=true)
+.	any character, possibly including newline (s=true)
 [xyz]	character class
 [^xyz]	negated character class
 \d	Perl character class
@@ -60,7 +60,7 @@
 
 Flags:
 i	case-insensitive (default false)
-m	multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false)
+m	multi-line mode: «^» and «$» match begin/end line in addition to begin/end text (default false)
 s	let «.» match «\n» (default false)
 U	ungreedy: swap meaning of «x*» and «x*?», «x+» and «x+?», etc (default false)
 Flag syntax is «xyz» (set) or «-xyz» (clear) or «xy-z» (set «xy», clear «z»).

diff --git a/lib/codereview/codereview.py b/lib/codereview/codereview.py
index b980929..d26df2a 100644
--- a/lib/codereview/codereview.py
+++ b/lib/codereview/codereview.py

@@ -22,7 +22,7 @@
 your repository's .hg/hgrc file.
 
 	[extensions]
-	codereview = path/to/codereview.py
+	codereview = /path/to/codereview.py
 
 	[codereview]
 	server = codereview.appspot.com
@@ -38,110 +38,60 @@
 "hg diff @123456" is equivalent to"hg diff x.go y.go".
 '''
 
-from mercurial import cmdutil, commands, hg, util, error, match, discovery
-from mercurial.node import nullrev, hex, nullid, short
-import os, re, time
-import stat
-import subprocess
-import threading
-from HTMLParser import HTMLParser
-
-# The standard 'json' package is new in Python 2.6.
-# Before that it was an external package named simplejson.
-try:
-	# Standard location in 2.6 and beyond.
-	import json
-except Exception, e:
-	try:
-		# Conventional name for earlier package.
-		import simplejson as json
-	except:
-		try:
-			# Was also bundled with django, which is commonly installed.
-			from django.utils import simplejson as json
-		except:
-			# We give up.
-			raise e
-
-try:
-	hgversion = util.version()
-except:
-	from mercurial.version import version as v
-	hgversion = v.get_version()
-
-# in Mercurial 1.9 the cmdutil.match and cmdutil.revpair moved to scmutil
-if hgversion >= '1.9':
-    from mercurial import scmutil
-else:
-    scmutil = cmdutil
-
-oldMessage = """
-The code review extension requires Mercurial 1.3 or newer.
-
-To install a new Mercurial,
-
-	sudo easy_install mercurial
-
-works on most systems.
-"""
-
-linuxMessage = """
-You may need to clear your current Mercurial installation by running:
-
-	sudo apt-get remove mercurial mercurial-common
-	sudo rm -rf /etc/mercurial
-"""
-
-if hgversion < '1.3':
-	msg = oldMessage
-	if os.access("/etc/mercurial", 0):
-		msg += linuxMessage
-	raise util.Abort(msg)
-
-def promptyesno(ui, msg):
-	# Arguments to ui.prompt changed between 1.3 and 1.3.1.
-	# Even so, some 1.3.1 distributions seem to have the old prompt!?!?
-	# What a terrible way to maintain software.
-	try:
-		return ui.promptchoice(msg, ["&yes", "&no"], 0) == 0
-	except AttributeError:
-		return ui.prompt(msg, ["&yes", "&no"], "y") != "n"
-
-def incoming(repo, other):
-	fui = FakeMercurialUI()
-	ret = commands.incoming(fui, repo, *[other.path], **{'bundle': '', 'force': False})
-	if ret and ret != 1:
-		raise util.Abort(ret)
-	out = fui.output
-	return out
-
-def outgoing(repo):
-	fui = FakeMercurialUI()
-	ret = commands.outgoing(fui, repo, *[], **{})
-	if ret and ret != 1:
-		raise util.Abort(ret)
-	out = fui.output
-	return out
-
-# To experiment with Mercurial in the python interpreter:
-#    >>> repo = hg.repository(ui.ui(), path = ".")
-
-#######################################################################
-# Normally I would split this into multiple files, but it simplifies
-# import path headaches to keep it all in one file.  Sorry.
-
 import sys
+
 if __name__ == "__main__":
 	print >>sys.stderr, "This is a Mercurial extension and should not be invoked directly."
 	sys.exit(2)
 
-server = "codereview.appspot.com"
-server_url_base = None
+# We require Python 2.6 for the json package.
+if sys.version < '2.6':
+	print >>sys.stderr, "The codereview extension requires Python 2.6 or newer."
+	print >>sys.stderr, "You are running Python " + sys.version
+	sys.exit(2)
+
+import json
+import os
+import re
+import stat
+import subprocess
+import threading
+import time
+
+from mercurial import commands as hg_commands
+from mercurial import util as hg_util
+
 defaultcc = None
-contributors = {}
-missing_codereview = None
+codereview_disabled = None
 real_rollback = None
 releaseBranch = None
+server = "codereview.appspot.com"
+server_url_base = None
+
+#######################################################################
+# Normally I would split this into multiple files, but it simplifies
+# import path headaches to keep it all in one file.  Sorry.
+# The different parts of the file are separated by banners like this one.
+
+#######################################################################
+# Helpers
+
+def RelativePath(path, cwd):
+	n = len(cwd)
+	if path.startswith(cwd) and path[n] == '/':
+		return path[n+1:]
+	return path
+
+def Sub(l1, l2):
+	return [l for l in l1 if l not in l2]
+
+def Add(l1, l2):
+	l = l1 + Sub(l2, l1)
+	l.sort()
+	return l
+
+def Intersect(l1, l2):
+	return [l for l in l1 if l in l2]
 
 #######################################################################
 # RE: UNICODE STRING HANDLING
@@ -168,7 +118,7 @@
 
 def typecheck(s, t):
 	if type(s) != t:
-		raise util.Abort("type check failed: %s has type %s != %s" % (repr(s), type(s), t))
+		raise hg_util.Abort("type check failed: %s has type %s != %s" % (repr(s), type(s), t))
 
 # If we have to pass unicode instead of str, ustr does that conversion clearly.
 def ustr(s):
@@ -200,6 +150,40 @@
 default_to_utf8()
 
 #######################################################################
+# Status printer for long-running commands
+
+global_status = None
+
+def set_status(s):
+	# print >>sys.stderr, "\t", time.asctime(), s
+	global global_status
+	global_status = s
+
+class StatusThread(threading.Thread):
+	def __init__(self):
+		threading.Thread.__init__(self)
+	def run(self):
+		# pause a reasonable amount of time before
+		# starting to display status messages, so that
+		# most hg commands won't ever see them.
+		time.sleep(30)
+
+		# now show status every 15 seconds
+		while True:
+			time.sleep(15 - time.time() % 15)
+			s = global_status
+			if s is None:
+				continue
+			if s == "":
+				s = "(unknown status)"
+			print >>sys.stderr, time.asctime(), s
+
+def start_status_thread():
+	t = StatusThread()
+	t.setDaemon(True)  # allowed to exit if t is still running
+	t.start()
+
+#######################################################################
 # Change list parsing.
 #
 # Change lists are stored in .hg/codereview/cl.nnnnnn
@@ -275,17 +259,18 @@
 		typecheck(s, str)
 		return s
 
-	def PendingText(self):
+	def PendingText(self, quick=False):
 		cl = self
 		s = cl.name + ":" + "\n"
 		s += Indent(cl.desc, "\t")
 		s += "\n"
 		if cl.copied_from:
 			s += "\tAuthor: " + cl.copied_from + "\n"
-		s += "\tReviewer: " + JoinComma(cl.reviewer) + "\n"
-		for (who, line) in cl.lgtm:
-			s += "\t\t" + who + ": " + line + "\n"
-		s += "\tCC: " + JoinComma(cl.cc) + "\n"
+		if not quick:
+			s += "\tReviewer: " + JoinComma(cl.reviewer) + "\n"
+			for (who, line) in cl.lgtm:
+				s += "\t\t" + who + ": " + line + "\n"
+			s += "\tCC: " + JoinComma(cl.cc) + "\n"
 		s += "\tFiles:\n"
 		for f in cl.files:
 			s += "\t\t" + f + "\n"
@@ -360,7 +345,7 @@
 			uploaded_diff_file = [("data", "data.diff", emptydiff)]
 		
 		if vcs and self.name != "new":
-			form_fields.append(("subject", "diff -r " + vcs.base_rev + " " + getremote(ui, repo, {}).path))
+			form_fields.append(("subject", "diff -r " + vcs.base_rev + " " + ui.expandpath("default")))
 		else:
 			# First upload sets the subject for the CL itself.
 			form_fields.append(("subject", self.Subject()))
@@ -379,7 +364,7 @@
 			ui.status(msg + "\n")
 		set_status("uploaded CL metadata + diffs")
 		if not response_body.startswith("Issue created.") and not response_body.startswith("Issue updated."):
-			raise util.Abort("failed to update issue: " + response_body)
+			raise hg_util.Abort("failed to update issue: " + response_body)
 		issue = msg[msg.rfind("/")+1:]
 		self.name = issue
 		if not self.url:
@@ -404,7 +389,7 @@
 			pmsg += " (cc: %s)" % (', '.join(self.cc),)
 		pmsg += ",\n"
 		pmsg += "\n"
-		repourl = getremote(ui, repo, {}).path
+		repourl = ui.expandpath("default")
 		if not self.mailed:
 			pmsg += "I'd like you to review this change to\n" + repourl + "\n"
 		else:
@@ -567,37 +552,6 @@
 	set_status("loaded CL " + name)
 	return cl, ''
 
-global_status = None
-
-def set_status(s):
-	# print >>sys.stderr, "\t", time.asctime(), s
-	global global_status
-	global_status = s
-
-class StatusThread(threading.Thread):
-	def __init__(self):
-		threading.Thread.__init__(self)
-	def run(self):
-		# pause a reasonable amount of time before
-		# starting to display status messages, so that
-		# most hg commands won't ever see them.
-		time.sleep(30)
-
-		# now show status every 15 seconds
-		while True:
-			time.sleep(15 - time.time() % 15)
-			s = global_status
-			if s is None:
-				continue
-			if s == "":
-				s = "(unknown status)"
-			print >>sys.stderr, time.asctime(), s
-
-def start_status_thread():
-	t = StatusThread()
-	t.setDaemon(True)  # allowed to exit if t is still running
-	t.start()
-
 class LoadCLThread(threading.Thread):
 	def __init__(self, ui, repo, dir, f, web):
 		threading.Thread.__init__(self)
@@ -735,101 +689,6 @@
 # Multi-line values should be indented.
 """
 
-#######################################################################
-# Mercurial helper functions
-
-# Get effective change nodes taking into account applied MQ patches
-def effective_revpair(repo):
-    try:
-	return scmutil.revpair(repo, ['qparent'])
-    except:
-	return scmutil.revpair(repo, None)
-
-# Return list of changed files in repository that match pats.
-# Warn about patterns that did not match.
-def matchpats(ui, repo, pats, opts):
-	matcher = scmutil.match(repo, pats, opts)
-	node1, node2 = effective_revpair(repo)
-	modified, added, removed, deleted, unknown, ignored, clean = repo.status(node1, node2, matcher, ignored=True, clean=True, unknown=True)
-	return (modified, added, removed, deleted, unknown, ignored, clean)
-
-# Return list of changed files in repository that match pats.
-# The patterns came from the command line, so we warn
-# if they have no effect or cannot be understood.
-def ChangedFiles(ui, repo, pats, opts, taken=None):
-	taken = taken or {}
-	# Run each pattern separately so that we can warn about
-	# patterns that didn't do anything useful.
-	for p in pats:
-		modified, added, removed, deleted, unknown, ignored, clean = matchpats(ui, repo, [p], opts)
-		redo = False
-		for f in unknown:
-			promptadd(ui, repo, f)
-			redo = True
-		for f in deleted:
-			promptremove(ui, repo, f)
-			redo = True
-		if redo:
-			modified, added, removed, deleted, unknown, ignored, clean = matchpats(ui, repo, [p], opts)
-		for f in modified + added + removed:
-			if f in taken:
-				ui.warn("warning: %s already in CL %s\n" % (f, taken[f].name))
-		if not modified and not added and not removed:
-			ui.warn("warning: %s did not match any modified files\n" % (p,))
-
-	# Again, all at once (eliminates duplicates)
-	modified, added, removed = matchpats(ui, repo, pats, opts)[:3]
-	l = modified + added + removed
-	l.sort()
-	if taken:
-		l = Sub(l, taken.keys())
-	return l
-
-# Return list of changed files in repository that match pats and still exist.
-def ChangedExistingFiles(ui, repo, pats, opts):
-	modified, added = matchpats(ui, repo, pats, opts)[:2]
-	l = modified + added
-	l.sort()
-	return l
-
-# Return list of files claimed by existing CLs
-def Taken(ui, repo):
-	all = LoadAllCL(ui, repo, web=False)
-	taken = {}
-	for _, cl in all.items():
-		for f in cl.files:
-			taken[f] = cl
-	return taken
-
-# Return list of changed files that are not claimed by other CLs
-def DefaultFiles(ui, repo, pats, opts):
-	return ChangedFiles(ui, repo, pats, opts, taken=Taken(ui, repo))
-
-def Sub(l1, l2):
-	return [l for l in l1 if l not in l2]
-
-def Add(l1, l2):
-	l = l1 + Sub(l2, l1)
-	l.sort()
-	return l
-
-def Intersect(l1, l2):
-	return [l for l in l1 if l in l2]
-
-def getremote(ui, repo, opts):
-	# save $http_proxy; creating the HTTP repo object will
-	# delete it in an attempt to "help"
-	proxy = os.environ.get('http_proxy')
-	source = hg.parseurl(ui.expandpath("default"), None)[0]
-	try:
-		remoteui = hg.remoteui # hg 1.6
-	except:
-		remoteui = cmdutil.remoteui
-	other = hg.repository(remoteui(repo, opts), source)
-	if proxy is not None:
-		os.environ['http_proxy'] = proxy
-	return other
-
 desc_re = '^(.+: |(tag )?(release|weekly)\.|fix build|undo CL)'
 
 desc_msg = '''Your CL description appears not to use the standard form.
@@ -851,15 +710,17 @@
 
 '''
 
+def promptyesno(ui, msg):
+	return ui.promptchoice(msg, ["&yes", "&no"], 0) == 0
 
 def promptremove(ui, repo, f):
 	if promptyesno(ui, "hg remove %s (y/n)?" % (f,)):
-		if commands.remove(ui, repo, 'path:'+f) != 0:
+		if hg_commands.remove(ui, repo, 'path:'+f) != 0:
 			ui.warn("error removing %s" % (f,))
 
 def promptadd(ui, repo, f):
 	if promptyesno(ui, "hg add %s (y/n)?" % (f,)):
-		if commands.add(ui, repo, 'path:'+f) != 0:
+		if hg_commands.add(ui, repo, 'path:'+f) != 0:
 			ui.warn("error adding %s" % (f,))
 
 def EditCL(ui, repo, cl):
@@ -899,10 +760,14 @@
 		# Check file list for files that need to be hg added or hg removed
 		# or simply aren't understood.
 		pats = ['path:'+f for f in clx.files]
-		modified, added, removed, deleted, unknown, ignored, clean = matchpats(ui, repo, pats, {})
+		changed = hg_matchPattern(ui, repo, *pats, modified=True, added=True, removed=True)
+		deleted = hg_matchPattern(ui, repo, *pats, deleted=True)
+		unknown = hg_matchPattern(ui, repo, *pats, unknown=True)
+		ignored = hg_matchPattern(ui, repo, *pats, ignored=True)
+		clean = hg_matchPattern(ui, repo, *pats, clean=True)
 		files = []
 		for f in clx.files:
-			if f in modified or f in added or f in removed:
+			if f in changed:
 				files.append(f)
 				continue
 			if f in deleted:
@@ -954,7 +819,7 @@
 	else:
 		cl = CL("new")
 		cl.local = True
-		cl.files = ChangedFiles(ui, repo, pats, opts, taken=Taken(ui, repo))
+		cl.files = ChangedFiles(ui, repo, pats, taken=Taken(ui, repo))
 		if not cl.files:
 			return None, "no files changed"
 	if opts.get('reviewer'):
@@ -972,42 +837,56 @@
 				return None, err
 	return cl, ""
 
-# reposetup replaces cmdutil.match with this wrapper,
-# which expands the syntax @clnumber to mean the files
-# in that CL.
-original_match = None
-global_repo = None
-global_ui = None
-def ReplacementForCmdutilMatch(ctx, pats=None, opts=None, globbed=False, default='relpath'):
-	taken = []
-	files = []
-	pats = pats or []
-	opts = opts or {}
-	
+#######################################################################
+# Change list file management
+
+# Return list of changed files in repository that match pats.
+# The patterns came from the command line, so we warn
+# if they have no effect or cannot be understood.
+def ChangedFiles(ui, repo, pats, taken=None):
+	taken = taken or {}
+	# Run each pattern separately so that we can warn about
+	# patterns that didn't do anything useful.
 	for p in pats:
-		if p.startswith('@'):
-			taken.append(p)
-			clname = p[1:]
-			if not GoodCLName(clname):
-				raise util.Abort("invalid CL name " + clname)
-			cl, err = LoadCL(global_repo.ui, global_repo, clname, web=False)
-			if err != '':
-				raise util.Abort("loading CL " + clname + ": " + err)
-			if not cl.files:
-				raise util.Abort("no files in CL " + clname)
-			files = Add(files, cl.files)
-	pats = Sub(pats, taken) + ['path:'+f for f in files]
+		for f in hg_matchPattern(ui, repo, p, unknown=True):
+			promptadd(ui, repo, f)
+		for f in hg_matchPattern(ui, repo, p, removed=True):
+			promptremove(ui, repo, f)
+		files = hg_matchPattern(ui, repo, p, modified=True, added=True, removed=True)
+		for f in files:
+			if f in taken:
+				ui.warn("warning: %s already in CL %s\n" % (f, taken[f].name))
+		if not files:
+			ui.warn("warning: %s did not match any modified files\n" % (p,))
 
-	# work-around for http://selenic.com/hg/rev/785bbc8634f8
-	if hgversion >= '1.9' and not hasattr(ctx, 'match'):
-		ctx = ctx[None]
-	return original_match(ctx, pats=pats, opts=opts, globbed=globbed, default=default)
+	# Again, all at once (eliminates duplicates)
+	l = hg_matchPattern(ui, repo, *pats, modified=True, added=True, removed=True)
+	l.sort()
+	if taken:
+		l = Sub(l, taken.keys())
+	return l
 
-def RelativePath(path, cwd):
-	n = len(cwd)
-	if path.startswith(cwd) and path[n] == '/':
-		return path[n+1:]
-	return path
+# Return list of changed files in repository that match pats and still exist.
+def ChangedExistingFiles(ui, repo, pats, opts):
+	l = hg_matchPattern(ui, repo, *pats, modified=True, added=True)
+	l.sort()
+	return l
+
+# Return list of files claimed by existing CLs
+def Taken(ui, repo):
+	all = LoadAllCL(ui, repo, web=False)
+	taken = {}
+	for _, cl in all.items():
+		for f in cl.files:
+			taken[f] = cl
+	return taken
+
+# Return list of changed files that are not claimed by other CLs
+def DefaultFiles(ui, repo, pats):
+	return ChangedFiles(ui, repo, pats, taken=Taken(ui, repo))
+
+#######################################################################
+# File format checking.
 
 def CheckFormat(ui, repo, files, just_warn=False):
 	set_status("running gofmt")
@@ -1016,7 +895,7 @@
 
 # Check that gofmt run on the list of files does not change them
 def CheckGofmt(ui, repo, files, just_warn):
-	files = [f for f in files if (f.startswith('src/') or f.startswith('test/bench/')) and f.endswith('.go')]
+	files = gofmt_required(files)
 	if not files:
 		return
 	cwd = os.getcwd()
@@ -1028,7 +907,7 @@
 		cmd = subprocess.Popen(["gofmt", "-l"] + files, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=sys.platform != "win32")
 		cmd.stdin.close()
 	except:
-		raise util.Abort("gofmt: " + ExceptionDetail())
+		raise hg_util.Abort("gofmt: " + ExceptionDetail())
 	data = cmd.stdout.read()
 	errors = cmd.stderr.read()
 	cmd.wait()
@@ -1041,12 +920,12 @@
 		if just_warn:
 			ui.warn("warning: " + msg + "\n")
 		else:
-			raise util.Abort(msg)
+			raise hg_util.Abort(msg)
 	return
 
 # Check that *.[chys] files indent using tabs.
 def CheckTabfmt(ui, repo, files, just_warn):
-	files = [f for f in files if f.startswith('src/') and re.search(r"\.[chys]$", f)]
+	files = [f for f in files if f.startswith('src/') and re.search(r"\.[chys]$", f) and not re.search(r"\.tab\.[ch]$", f)]
 	if not files:
 		return
 	cwd = os.getcwd()
@@ -1070,20 +949,327 @@
 		if just_warn:
 			ui.warn("warning: " + msg + "\n")
 		else:
-			raise util.Abort(msg)
+			raise hg_util.Abort(msg)
 	return
 
 #######################################################################
-# Mercurial commands
+# CONTRIBUTORS file parsing
 
-# every command must take a ui and and repo as arguments.
-# opts is a dict where you can find other command line flags
-#
-# Other parameters are taken in order from items on the command line that
-# don't start with a dash.  If no default value is given in the parameter list,
-# they are required.
-#
+contributorsCache = None
+contributorsURL = None
 
+def ReadContributors(ui, repo):
+	global contributorsCache
+	if contributorsCache is not None:
+		return contributorsCache
+
+	try:
+		if contributorsURL is not None:
+			opening = contributorsURL
+			f = urllib2.urlopen(contributorsURL)
+		else:
+			opening = repo.root + '/CONTRIBUTORS'
+			f = open(repo.root + '/CONTRIBUTORS', 'r')
+	except:
+		ui.write("warning: cannot open %s: %s\n" % (opening, ExceptionDetail()))
+		return
+
+	contributors = {}
+	for line in f:
+		# CONTRIBUTORS is a list of lines like:
+		#	Person <email>
+		#	Person <email> <alt-email>
+		# The first email address is the one used in commit logs.
+		if line.startswith('#'):
+			continue
+		m = re.match(r"([^<>]+\S)\s+(<[^<>\s]+>)((\s+<[^<>\s]+>)*)\s*$", line)
+		if m:
+			name = m.group(1)
+			email = m.group(2)[1:-1]
+			contributors[email.lower()] = (name, email)
+			for extra in m.group(3).split():
+				contributors[extra[1:-1].lower()] = (name, email)
+
+	contributorsCache = contributors
+	return contributors
+
+def CheckContributor(ui, repo, user=None):
+	set_status("checking CONTRIBUTORS file")
+	user, userline = FindContributor(ui, repo, user, warn=False)
+	if not userline:
+		raise hg_util.Abort("cannot find %s in CONTRIBUTORS" % (user,))
+	return userline
+
+def FindContributor(ui, repo, user=None, warn=True):
+	if not user:
+		user = ui.config("ui", "username")
+		if not user:
+			raise hg_util.Abort("[ui] username is not configured in .hgrc")
+	user = user.lower()
+	m = re.match(r".*<(.*)>", user)
+	if m:
+		user = m.group(1)
+
+	contributors = ReadContributors(ui, repo)
+	if user not in contributors:
+		if warn:
+			ui.warn("warning: cannot find %s in CONTRIBUTORS\n" % (user,))
+		return user, None
+	
+	user, email = contributors[user]
+	return email, "%s <%s>" % (user, email)
+
+#######################################################################
+# Mercurial helper functions.
+# Read http://mercurial.selenic.com/wiki/MercurialApi before writing any of these.
+# We use the ui.pushbuffer/ui.popbuffer + hg_commands.xxx tricks for all interaction
+# with Mercurial.  It has proved the most stable as they make changes.
+
+hgversion = hg_util.version()
+
+# We require Mercurial 1.9 and suggest Mercurial 2.0.
+# The details of the scmutil package changed then,
+# so allowing earlier versions would require extra band-aids below.
+# Ubuntu 11.10 ships with Mercurial 1.9.1 as the default version.
+hg_required = "1.9"
+hg_suggested = "2.0"
+
+old_message = """
+
+The code review extension requires Mercurial """+hg_required+""" or newer.
+You are using Mercurial """+hgversion+""".
+
+To install a new Mercurial, use
+
+	sudo easy_install mercurial=="""+hg_suggested+"""
+
+or visit http://mercurial.selenic.com/downloads/.
+"""
+
+linux_message = """
+You may need to clear your current Mercurial installation by running:
+
+	sudo apt-get remove mercurial mercurial-common
+	sudo rm -rf /etc/mercurial
+"""
+
+if hgversion < hg_required:
+	msg = old_message
+	if os.access("/etc/mercurial", 0):
+		msg += linux_message
+	raise hg_util.Abort(msg)
+
+from mercurial.hg import clean as hg_clean
+from mercurial import cmdutil as hg_cmdutil
+from mercurial import error as hg_error
+from mercurial import match as hg_match
+from mercurial import node as hg_node
+
+class uiwrap(object):
+	def __init__(self, ui):
+		self.ui = ui
+		ui.pushbuffer()
+		self.oldQuiet = ui.quiet
+		ui.quiet = True
+		self.oldVerbose = ui.verbose
+		ui.verbose = False
+	def output(self):
+		ui = self.ui
+		ui.quiet = self.oldQuiet
+		ui.verbose = self.oldVerbose
+		return ui.popbuffer()
+
+def to_slash(path):
+	if sys.platform == "win32":
+		return path.replace('\\', '/')
+	return path
+
+def hg_matchPattern(ui, repo, *pats, **opts):
+	w = uiwrap(ui)
+	hg_commands.status(ui, repo, *pats, **opts)
+	text = w.output()
+	ret = []
+	prefix = to_slash(os.path.realpath(repo.root))+'/'
+	for line in text.split('\n'):
+		f = line.split()
+		if len(f) > 1:
+			if len(pats) > 0:
+				# Given patterns, Mercurial shows relative to cwd
+				p = to_slash(os.path.realpath(f[1]))
+				if not p.startswith(prefix):
+					print >>sys.stderr, "File %s not in repo root %s.\n" % (p, prefix)
+				else:
+					ret.append(p[len(prefix):])
+			else:
+				# Without patterns, Mercurial shows relative to root (what we want)
+				ret.append(to_slash(f[1]))
+	return ret
+
+def hg_heads(ui, repo):
+	w = uiwrap(ui)
+	hg_commands.heads(ui, repo)
+	return w.output()
+
+noise = [
+	"",
+	"resolving manifests",
+	"searching for changes",
+	"couldn't find merge tool hgmerge",
+	"adding changesets",
+	"adding manifests",
+	"adding file changes",
+	"all local heads known remotely",
+]
+
+def isNoise(line):
+	line = str(line)
+	for x in noise:
+		if line == x:
+			return True
+	return False
+
+def hg_incoming(ui, repo):
+	w = uiwrap(ui)
+	ret = hg_commands.incoming(ui, repo, force=False, bundle="")
+	if ret and ret != 1:
+		raise hg_util.Abort(ret)
+	return w.output()
+
+def hg_log(ui, repo, **opts):
+	for k in ['date', 'keyword', 'rev', 'user']:
+		if not opts.has_key(k):
+			opts[k] = ""
+	w = uiwrap(ui)
+	ret = hg_commands.log(ui, repo, **opts)
+	if ret:
+		raise hg_util.Abort(ret)
+	return w.output()
+
+def hg_outgoing(ui, repo, **opts):
+	w = uiwrap(ui)
+	ret = hg_commands.outgoing(ui, repo, **opts)
+	if ret and ret != 1:
+		raise hg_util.Abort(ret)
+	return w.output()
+
+def hg_pull(ui, repo, **opts):
+	w = uiwrap(ui)
+	ui.quiet = False
+	ui.verbose = True  # for file list
+	err = hg_commands.pull(ui, repo, **opts)
+	for line in w.output().split('\n'):
+		if isNoise(line):
+			continue
+		if line.startswith('moving '):
+			line = 'mv ' + line[len('moving '):]
+		if line.startswith('getting ') and line.find(' to ') >= 0:
+			line = 'mv ' + line[len('getting '):]
+		if line.startswith('getting '):
+			line = '+ ' + line[len('getting '):]
+		if line.startswith('removing '):
+			line = '- ' + line[len('removing '):]
+		ui.write(line + '\n')
+	return err
+
+def hg_push(ui, repo, **opts):
+	w = uiwrap(ui)
+	ui.quiet = False
+	ui.verbose = True
+	err = hg_commands.push(ui, repo, **opts)
+	for line in w.output().split('\n'):
+		if not isNoise(line):
+			ui.write(line + '\n')
+	return err
+
+def hg_commit(ui, repo, *pats, **opts):
+	return hg_commands.commit(ui, repo, *pats, **opts)
+
+#######################################################################
+# Mercurial precommit hook to disable commit except through this interface.
+
+commit_okay = False
+
+def precommithook(ui, repo, **opts):
+	if commit_okay:
+		return False  # False means okay.
+	ui.write("\ncodereview extension enabled; use mail, upload, or submit instead of commit\n\n")
+	return True
+
+#######################################################################
+# @clnumber file pattern support
+
+# We replace scmutil.match with the MatchAt wrapper to add the @clnumber pattern.
+
+match_repo = None
+match_ui = None
+match_orig = None
+
+def InstallMatch(ui, repo):
+	global match_repo
+	global match_ui
+	global match_orig
+
+	match_ui = ui
+	match_repo = repo
+
+	from mercurial import scmutil
+	match_orig = scmutil.match
+	scmutil.match = MatchAt
+
+def MatchAt(ctx, pats=None, opts=None, globbed=False, default='relpath'):
+	taken = []
+	files = []
+	pats = pats or []
+	opts = opts or {}
+	
+	for p in pats:
+		if p.startswith('@'):
+			taken.append(p)
+			clname = p[1:]
+			if clname == "default":
+				files = DefaultFiles(match_ui, match_repo, [])
+			else:
+				if not GoodCLName(clname):
+					raise hg_util.Abort("invalid CL name " + clname)
+				cl, err = LoadCL(match_repo.ui, match_repo, clname, web=False)
+				if err != '':
+					raise hg_util.Abort("loading CL " + clname + ": " + err)
+				if not cl.files:
+					raise hg_util.Abort("no files in CL " + clname)
+				files = Add(files, cl.files)
+	pats = Sub(pats, taken) + ['path:'+f for f in files]
+
+	# work-around for http://selenic.com/hg/rev/785bbc8634f8
+	if not hasattr(ctx, 'match'):
+		ctx = ctx[None]
+	return match_orig(ctx, pats=pats, opts=opts, globbed=globbed, default=default)
+
+#######################################################################
+# Commands added by code review extension.
+
+# As of Mercurial 2.1 the commands are all required to return integer
+# exit codes, whereas earlier versions allowed returning arbitrary strings
+# to be printed as errors.  We wrap the old functions to make sure we
+# always return integer exit codes now.  Otherwise Mercurial dies
+# with a TypeError traceback (unsupported operand type(s) for &: 'str' and 'int').
+# Introduce a Python decorator to convert old functions to the new
+# stricter convention.
+
+def hgcommand(f):
+	def wrapped(ui, repo, *pats, **opts):
+		err = f(ui, repo, *pats, **opts)
+		if type(err) is int:
+			return err
+		if not err:
+			return 0
+		raise hg_util.Abort(err)
+	wrapped.__doc__ = f.__doc__
+	return wrapped
+
+#######################################################################
+# hg change
+
+@hgcommand
 def change(ui, repo, *pats, **opts):
 	"""create, edit or delete a change list
 
@@ -1106,8 +1292,8 @@
 	before running hg change -d 123456.
 	"""
 
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 	
 	dirty = {}
 	if len(pats) > 0 and GoodCLName(pats[0]):
@@ -1121,12 +1307,12 @@
 		if not cl.local and (opts["stdin"] or not opts["stdout"]):
 			return "cannot change non-local CL " + name
 	else:
-		if repo[None].branch() != "default":
-			return "cannot run hg change outside default branch"
 		name = "new"
 		cl = CL("new")
+		if repo[None].branch() != "default":
+			return "cannot create CL outside default branch; switch with 'hg update default'"
 		dirty[cl] = True
-		files = ChangedFiles(ui, repo, pats, opts, taken=Taken(ui, repo))
+		files = ChangedFiles(ui, repo, pats, taken=Taken(ui, repo))
 
 	if opts["delete"] or opts["deletelocal"]:
 		if opts["delete"] and opts["deletelocal"]:
@@ -1194,17 +1380,26 @@
 			ui.write("CL created: " + cl.url + "\n")
 	return
 
+#######################################################################
+# hg code-login (broken?)
+
+@hgcommand
 def code_login(ui, repo, **opts):
 	"""log in to code review server
 
 	Logs in to the code review server, saving a cookie in
 	a file in your home directory.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	MySend(None)
 
+#######################################################################
+# hg clpatch / undo / release-apply / download
+# All concerned with applying or unapplying patches to the repository.
+
+@hgcommand
 def clpatch(ui, repo, clname, **opts):
 	"""import a patch from the code review server
 
@@ -1219,6 +1414,7 @@
 		return "cannot run hg clpatch outside default branch"
 	return clpatch_or_undo(ui, repo, clname, opts, mode="clpatch")
 
+@hgcommand
 def undo(ui, repo, clname, **opts):
 	"""undo the effect of a CL
 	
@@ -1230,6 +1426,7 @@
 		return "cannot run hg undo outside default branch"
 	return clpatch_or_undo(ui, repo, clname, opts, mode="undo")
 
+@hgcommand
 def release_apply(ui, repo, clname, **opts):
 	"""apply a CL to the release branch
 
@@ -1274,16 +1471,16 @@
 		return "no active release branches"
 	if c.branch() != releaseBranch:
 		if c.modified() or c.added() or c.removed():
-			raise util.Abort("uncommitted local changes - cannot switch branches")
-		err = hg.clean(repo, releaseBranch)
+			raise hg_util.Abort("uncommitted local changes - cannot switch branches")
+		err = hg_clean(repo, releaseBranch)
 		if err:
 			return err
 	try:
 		err = clpatch_or_undo(ui, repo, clname, opts, mode="backport")
 		if err:
-			raise util.Abort(err)
+			raise hg_util.Abort(err)
 	except Exception, e:
-		hg.clean(repo, "default")
+		hg_clean(repo, "default")
 		raise e
 	return None
 
@@ -1318,14 +1515,10 @@
 
 # Implementation of clpatch/undo.
 def clpatch_or_undo(ui, repo, clname, opts, mode):
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	if mode == "undo" or mode == "backport":
-		if hgversion < '1.4':
-			# Don't have cmdutil.match (see implementation of sync command).
-			return "hg is too old to run hg %s - update to 1.4 or newer" % mode
-
 		# Find revision in Mercurial repository.
 		# Assume CL number is 7+ decimal digits.
 		# Otherwise is either change log sequence number (fewer decimal digits),
@@ -1334,11 +1527,8 @@
 		# sequence numbers get to be 7 digits long.
 		if re.match('^[0-9]{7,}$', clname):
 			found = False
-			matchfn = scmutil.match(repo, [], {'rev': None})
-			def prep(ctx, fns):
-				pass
-			for ctx in cmdutil.walkchangerevs(repo, matchfn, {'rev': None}, prep):
-				rev = repo[ctx.rev()]
+			for r in hg_log(ui, repo, keyword="codereview.appspot.com/"+clname, limit=100, template="{node}\n").split():
+				rev = repo[r]
 				# Last line with a code review URL is the actual review URL.
 				# Earlier ones might be part of the CL description.
 				n = rev2clname(rev)
@@ -1356,7 +1546,7 @@
 				return "cannot find CL name in revision description"
 		
 		# Create fresh CL and start with patch that would reverse the change.
-		vers = short(rev.node())
+		vers = hg_node.short(rev.node())
 		cl = CL("new")
 		desc = str(rev.description())
 		if mode == "undo":
@@ -1364,7 +1554,7 @@
 		else:
 			cl.desc = (backportHeader % (releaseBranch, line1(desc), clname, vers)) + desc + undoFooter
 		v1 = vers
-		v0 = short(rev.parents()[0].node())
+		v0 = hg_node.short(rev.parents()[0].node())
 		if mode == "undo":
 			arg = v1 + ":" + v0
 		else:
@@ -1382,7 +1572,7 @@
 	# find current hg version (hg identify)
 	ctx = repo[None]
 	parents = ctx.parents()
-	id = '+'.join([short(p.node()) for p in parents])
+	id = '+'.join([hg_node.short(p.node()) for p in parents])
 
 	# if version does not match the patch version,
 	# try to update the patch line numbers.
@@ -1406,7 +1596,7 @@
 	try:
 		cmd = subprocess.Popen(argv, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None, close_fds=sys.platform != "win32")
 	except:
-		return "hgpatch: " + ExceptionDetail()
+		return "hgpatch: " + ExceptionDetail() + "\nInstall hgpatch with:\n$ go get code.google.com/p/go.codereview/cmd/hgpatch\n"
 
 	out, err = cmd.communicate(patch)
 	if cmd.returncode != 0 and not opts["ignore_hgpatch_failure"]:
@@ -1415,7 +1605,7 @@
 	cl.files = out.strip().split()
 	if not cl.files and not opts["ignore_hgpatch_failure"]:
 		return "codereview issue %s has no changed files" % clname
-	files = ChangedFiles(ui, repo, [], opts)
+	files = ChangedFiles(ui, repo, [])
 	extra = Sub(cl.files, files)
 	if extra:
 		ui.warn("warning: these files were listed in the patch but not changed:\n\t" + "\n\t".join(extra) + "\n")
@@ -1489,14 +1679,15 @@
 		d = newdelta
 	return d, ""
 
+@hgcommand
 def download(ui, repo, clname, **opts):
 	"""download a change from the code review server
 
 	Download prints a description of the given change list
 	followed by its diff, downloaded from the code review server.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	cl, vers, patch, err = DownloadCL(ui, repo, clname)
 	if err != "":
@@ -1505,6 +1696,10 @@
 	ui.write(patch + "\n")
 	return
 
+#######################################################################
+# hg file
+
+@hgcommand
 def file(ui, repo, clname, pat, *pats, **opts):
 	"""assign files to or remove files from a change list
 
@@ -1513,8 +1708,8 @@
 	The -d option only removes files from the change list.
 	It does not edit them or remove them from the repository.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	pats = tuple([pat] + list(pats))
 	if not GoodCLName(clname):
@@ -1527,7 +1722,7 @@
 	if not cl.local:
 		return "cannot change non-local CL " + clname
 
-	files = ChangedFiles(ui, repo, pats, opts)
+	files = ChangedFiles(ui, repo, pats)
 
 	if opts["delete"]:
 		oldfiles = Intersect(files, cl.files)
@@ -1567,17 +1762,21 @@
 		d.Flush(ui, repo)
 	return
 
+#######################################################################
+# hg gofmt
+
+@hgcommand
 def gofmt(ui, repo, *pats, **opts):
 	"""apply gofmt to modified files
 
 	Applies gofmt to the modified files in the repository that match
 	the given patterns.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	files = ChangedExistingFiles(ui, repo, pats, opts)
-	files = [f for f in files if f.endswith(".go")]
+	files = gofmt_required(files)
 	if not files:
 		return "no modified go files"
 	cwd = os.getcwd()
@@ -1587,21 +1786,28 @@
 		if not opts["list"]:
 			cmd += ["-w"]
 		if os.spawnvp(os.P_WAIT, "gofmt", cmd + files) != 0:
-			raise util.Abort("gofmt did not exit cleanly")
-	except error.Abort, e:
+			raise hg_util.Abort("gofmt did not exit cleanly")
+	except hg_error.Abort, e:
 		raise
 	except:
-		raise util.Abort("gofmt: " + ExceptionDetail())
+		raise hg_util.Abort("gofmt: " + ExceptionDetail())
 	return
 
+def gofmt_required(files):
+	return [f for f in files if (not f.startswith('test/') or f.startswith('test/bench/')) and f.endswith('.go')]
+
+#######################################################################
+# hg mail
+
+@hgcommand
 def mail(ui, repo, *pats, **opts):
 	"""mail a change for review
 
 	Uploads a patch to the code review server and then sends mail
 	to the reviewer and CC list asking for a review.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	cl, err = CommandLineCL(ui, repo, pats, opts, defaultcc=defaultcc)
 	if err != "":
@@ -1623,80 +1829,74 @@
 
 	cl.Mail(ui, repo)		
 
+#######################################################################
+# hg p / hg pq / hg ps / hg pending
+
+@hgcommand
+def ps(ui, repo, *pats, **opts):
+	"""alias for hg p --short
+	"""
+	opts['short'] = True
+	return pending(ui, repo, *pats, **opts)
+
+@hgcommand
+def pq(ui, repo, *pats, **opts):
+	"""alias for hg p --quick
+	"""
+	opts['quick'] = True
+	return pending(ui, repo, *pats, **opts)
+
+@hgcommand
 def pending(ui, repo, *pats, **opts):
 	"""show pending changes
 
 	Lists pending changes followed by a list of unassigned but modified files.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
-	m = LoadAllCL(ui, repo, web=True)
+	quick = opts.get('quick', False)
+	short = opts.get('short', False)
+	m = LoadAllCL(ui, repo, web=not quick and not short)
 	names = m.keys()
 	names.sort()
 	for name in names:
 		cl = m[name]
-		ui.write(cl.PendingText() + "\n")
+		if short:
+			ui.write(name + "\t" + line1(cl.desc) + "\n")
+		else:
+			ui.write(cl.PendingText(quick=quick) + "\n")
 
-	files = DefaultFiles(ui, repo, [], opts)
+	if short:
+		return
+	files = DefaultFiles(ui, repo, [])
 	if len(files) > 0:
 		s = "Changed files not in any CL:\n"
 		for f in files:
 			s += "\t" + f + "\n"
 		ui.write(s)
 
-def reposetup(ui, repo):
-	global original_match
-	if original_match is None:
-		global global_repo, global_ui
-		global_repo = repo
-		global_ui = ui
-		start_status_thread()
-		original_match = scmutil.match
-		scmutil.match = ReplacementForCmdutilMatch
-		RietveldSetup(ui, repo)
+#######################################################################
+# hg submit
 
-def CheckContributor(ui, repo, user=None):
-	set_status("checking CONTRIBUTORS file")
-	user, userline = FindContributor(ui, repo, user, warn=False)
-	if not userline:
-		raise util.Abort("cannot find %s in CONTRIBUTORS" % (user,))
-	return userline
+def need_sync():
+	raise hg_util.Abort("local repository out of date; must sync before submit")
 
-def FindContributor(ui, repo, user=None, warn=True):
-	if not user:
-		user = ui.config("ui", "username")
-		if not user:
-			raise util.Abort("[ui] username is not configured in .hgrc")
-	user = user.lower()
-	m = re.match(r".*<(.*)>", user)
-	if m:
-		user = m.group(1)
-
-	if user not in contributors:
-		if warn:
-			ui.warn("warning: cannot find %s in CONTRIBUTORS\n" % (user,))
-		return user, None
-	
-	user, email = contributors[user]
-	return email, "%s <%s>" % (user, email)
-
+@hgcommand
 def submit(ui, repo, *pats, **opts):
 	"""submit change to remote repository
 
 	Submits change to remote repository.
 	Bails out if the local repository is not in sync with the remote one.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	# We already called this on startup but sometimes Mercurial forgets.
 	set_mercurial_encoding_to_utf8()
 
-	other = getremote(ui, repo, opts)
-	repo.ui.quiet = True
-	if not opts["no_incoming"] and incoming(repo, other):
-		return "local repository out of date; must sync before submit"
+	if not opts["no_incoming"] and hg_incoming(ui, repo):
+		need_sync()
 
 	cl, err = CommandLineCL(ui, repo, pats, opts, defaultcc=defaultcc)
 	if err != "":
@@ -1742,64 +1942,59 @@
 		cl.Mail(ui, repo)
 
 	# submit changes locally
-	date = opts.get('date')
-	if date:
-		opts['date'] = util.parsedate(date)
-		typecheck(opts['date'], str)
-	opts['message'] = cl.desc.rstrip() + "\n\n" + about
-	typecheck(opts['message'], str)
-
-	if opts['dryrun']:
-		print "NOT SUBMITTING:"
-		print "User: ", userline
-		print "Message:"
-		print Indent(opts['message'], "\t")
-		print "Files:"
-		print Indent('\n'.join(cl.files), "\t")
-		return "dry run; not submitted"
+	message = cl.desc.rstrip() + "\n\n" + about
+	typecheck(message, str)
 
 	set_status("pushing " + cl.name + " to remote server")
 
-	other = getremote(ui, repo, opts)
-	if outgoing(repo):
-		raise util.Abort("local repository corrupt or out-of-phase with remote: found outgoing changes")
+	if hg_outgoing(ui, repo):
+		raise hg_util.Abort("local repository corrupt or out-of-phase with remote: found outgoing changes")
+	
+	old_heads = len(hg_heads(ui, repo).split())
 
-	m = match.exact(repo.root, repo.getcwd(), cl.files)
-	node = repo.commit(ustr(opts['message']), ustr(userline), opts.get('date'), m)
-	if not node:
+	global commit_okay
+	commit_okay = True
+	ret = hg_commit(ui, repo, *['path:'+f for f in cl.files], message=message, user=userline)
+	commit_okay = False
+	if ret:
 		return "nothing changed"
-
+	node = repo["-1"].node()
 	# push to remote; if it fails for any reason, roll back
 	try:
-		log = repo.changelog
-		rev = log.rev(node)
-		parents = log.parentrevs(rev)
-		if (rev-1 not in parents and
-				(parents == (nullrev, nullrev) or
-				len(log.heads(log.node(parents[0]))) > 1 and
-				(parents[1] == nullrev or len(log.heads(log.node(parents[1]))) > 1))):
-			# created new head
-			raise util.Abort("local repository out of date; must sync before submit")
+		new_heads = len(hg_heads(ui, repo).split())
+		if old_heads != new_heads and not (old_heads == 0 and new_heads == 1):
+			# Created new head, so we weren't up to date.
+			need_sync()
 
-		# push changes to remote.
-		# if it works, we're committed.
-		# if not, roll back
-		r = repo.push(other, False, None)
-		if r == 0:
-			raise util.Abort("local repository out of date; must sync before submit")
+		# Push changes to remote.  If it works, we're committed.  If not, roll back.
+		try:
+			hg_push(ui, repo)
+		except hg_error.Abort, e:
+			if e.message.find("push creates new heads") >= 0:
+				# Remote repository had changes we missed.
+				need_sync()
+			raise
 	except:
 		real_rollback()
 		raise
 
-	# we're committed. upload final patch, close review, add commit message
-	changeURL = short(node)
-	url = other.url()
-	m = re.match("^https?://([^@/]+@)?([^.]+)\.googlecode\.com/hg/?", url)
+	# We're committed. Upload final patch, close review, add commit message.
+	changeURL = hg_node.short(node)
+	url = ui.expandpath("default")
+	m = re.match("(^https?://([^@/]+@)?([^.]+)\.googlecode\.com/hg/?)" + "|" +
+		"(^https?://([^@/]+@)?code\.google\.com/p/([^/.]+)(\.[^./]+)?/?)", url)
 	if m:
-		changeURL = "http://code.google.com/p/%s/source/detail?r=%s" % (m.group(2), changeURL)
+		if m.group(1): # prj.googlecode.com/hg/ case
+			changeURL = "http://code.google.com/p/%s/source/detail?r=%s" % (m.group(3), changeURL)
+		elif m.group(4) and m.group(7): # code.google.com/p/prj.subrepo/ case
+			changeURL = "http://code.google.com/p/%s/source/detail?r=%s&repo=%s" % (m.group(6), changeURL, m.group(7)[1:])
+		elif m.group(4): # code.google.com/p/prj/ case
+			changeURL = "http://code.google.com/p/%s/source/detail?r=%s" % (m.group(6), changeURL)
+		else:
+			print >>sys.stderr, "URL: ", url
 	else:
 		print >>sys.stderr, "URL: ", url
-	pmsg = "*** Submitted as " + changeURL + " ***\n\n" + opts['message']
+	pmsg = "*** Submitted as " + changeURL + " ***\n\n" + message
 
 	# When posting, move reviewers to CC line,
 	# so that the issue stops showing up in their "My Issues" page.
@@ -1808,53 +2003,39 @@
 	if not cl.copied_from:
 		EditDesc(cl.name, closed=True, private=cl.private)
 	cl.Delete(ui, repo)
-	
+
 	c = repo[None]
 	if c.branch() == releaseBranch and not c.modified() and not c.added() and not c.removed():
 		ui.write("switching from %s to default branch.\n" % releaseBranch)
-		err = hg.clean(repo, "default")
+		err = hg_clean(repo, "default")
 		if err:
 			return err
 	return None
 
+#######################################################################
+# hg sync
+
+@hgcommand
 def sync(ui, repo, **opts):
 	"""synchronize with remote repository
 
 	Incorporates recent changes from the remote repository
 	into the local repository.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	if not opts["local"]:
-		ui.status = sync_note
-		ui.note = sync_note
-		other = getremote(ui, repo, opts)
-		modheads = repo.pull(other)
-		err = commands.postincoming(ui, repo, modheads, True, "tip")
+		err = hg_pull(ui, repo, update=True)
 		if err:
 			return err
-	commands.update(ui, repo, rev="default")
 	sync_changes(ui, repo)
 
-def sync_note(msg):
-	# we run sync (pull -u) in verbose mode to get the
-	# list of files being updated, but that drags along
-	# a bunch of messages we don't care about.
-	# omit them.
-	if msg == 'resolving manifests\n':
-		return
-	if msg == 'searching for changes\n':
-		return
-	if msg == "couldn't find merge tool hgmerge\n":
-		return
-	sys.stdout.write(msg)
-
 def sync_changes(ui, repo):
 	# Look through recent change log descriptions to find
 	# potential references to http://.*/our-CL-number.
 	# Double-check them by looking at the Rietveld log.
-	def Rev(rev):
+	for rev in hg_log(ui, repo, limit=100, template="{node}\n").split():
 		desc = repo[rev].description().strip()
 		for clname in re.findall('(?m)^http://(?:[^\n]+)/([0-9]+)$', desc):
 			if IsLocalCL(ui, repo, clname) and IsRietveldSubmitted(ui, clname, repo[rev].hex()):
@@ -1867,28 +2048,10 @@
 					EditDesc(cl.name, closed=True, private=cl.private)
 				cl.Delete(ui, repo)
 
-	if hgversion < '1.4':
-		get = util.cachefunc(lambda r: repo[r].changeset())
-		changeiter, matchfn = cmdutil.walkchangerevs(ui, repo, [], get, {'rev': None})
-		n = 0
-		for st, rev, fns in changeiter:
-			if st != 'iter':
-				continue
-			n += 1
-			if n > 100:
-				break
-			Rev(rev)
-	else:
-		matchfn = scmutil.match(repo, [], {'rev': None})
-		def prep(ctx, fns):
-			pass
-		for ctx in cmdutil.walkchangerevs(repo, matchfn, {'rev': None}, prep):
-			Rev(ctx.rev())
-
 	# Remove files that are not modified from the CLs in which they appear.
 	all = LoadAllCL(ui, repo, web=False)
-	changed = ChangedFiles(ui, repo, [], {})
-	for _, cl in all.items():
+	changed = ChangedFiles(ui, repo, [])
+	for cl in all.values():
 		extra = Sub(cl.files, changed)
 		if extra:
 			ui.warn("Removing unmodified files from CL %s:\n" % (cl.name,))
@@ -1903,13 +2066,17 @@
 				ui.warn("CL %s has no files; delete locally with hg change -D %s\n" % (cl.name, cl.name))
 	return
 
+#######################################################################
+# hg upload
+
+@hgcommand
 def upload(ui, repo, name, **opts):
 	"""upload diffs to the code review server
 
 	Uploads the current modifications for a given change to the server.
 	"""
-	if missing_codereview:
-		return missing_codereview
+	if codereview_disabled:
+		return codereview_disabled
 
 	repo.ui.quiet = True
 	cl, err = LoadCL(ui, repo, name, web=True)
@@ -1921,6 +2088,9 @@
 	print "%s%s\n" % (server_url_base, cl.name)
 	return
 
+#######################################################################
+# Table of commands, supplied to Mercurial for installation.
+
 review_opts = [
 	('r', 'reviewer', '', 'add reviewer'),
 	('', 'cc', '', 'add cc'),
@@ -1979,13 +2149,26 @@
 	),
 	"^pending|p": (
 		pending,
+		[
+			('s', 'short', False, 'show short result form'),
+			('', 'quick', False, 'do not consult codereview server'),
+		],
+		"[FILE ...]"
+	),
+	"^ps": (
+		ps,
+		[],
+		"[FILE ...]"
+	),
+	"^pq": (
+		pq,
 		[],
 		"[FILE ...]"
 	),
 	"^mail": (
 		mail,
 		review_opts + [
-		] + commands.walkopts,
+		] + hg_commands.walkopts,
 		"[-r reviewer] [--cc cc] [change# | file ...]"
 	),
 	"^release-apply": (
@@ -2001,8 +2184,7 @@
 		submit,
 		review_opts + [
 			('', 'no_incoming', None, 'disable initial incoming check (for testing)'),
-			('n', 'dryrun', None, 'make change only locally (for testing)'),
-		] + commands.walkopts + commands.commitopts + commands.commitopts2,
+		] + hg_commands.walkopts + hg_commands.commitopts + hg_commands.commitopts2,
 		"[-r reviewer] [--cc cc] [change# | file ...]"
 	),
 	"^sync": (
@@ -2027,10 +2209,77 @@
 	),
 }
 
+#######################################################################
+# Mercurial extension initialization
+
+def norollback(*pats, **opts):
+	"""(disabled when using this extension)"""
+	raise hg_util.Abort("codereview extension enabled; use undo instead of rollback")
+
+codereview_init = False
+
+def reposetup(ui, repo):
+	global codereview_disabled
+	global defaultcc
+	
+	# reposetup gets called both for the local repository
+	# and also for any repository we are pulling or pushing to.
+	# Only initialize the first time.
+	global codereview_init
+	if codereview_init:
+		return
+	codereview_init = True
+
+	# Read repository-specific options from lib/codereview/codereview.cfg or codereview.cfg.
+	root = ''
+	try:
+		root = repo.root
+	except:
+		# Yes, repo might not have root; see issue 959.
+		codereview_disabled = 'codereview disabled: repository has no root'
+		return
+	
+	repo_config_path = ''
+	p1 = root + '/lib/codereview/codereview.cfg'
+	p2 = root + '/codereview.cfg'
+	if os.access(p1, os.F_OK):
+		repo_config_path = p1
+	else:
+		repo_config_path = p2
+	try:
+		f = open(repo_config_path)
+		for line in f:
+			if line.startswith('defaultcc:'):
+				defaultcc = SplitCommaSpace(line[len('defaultcc:'):])
+			if line.startswith('contributors:'):
+				global contributorsURL
+				contributorsURL = line[len('contributors:'):].strip()
+	except:
+		codereview_disabled = 'codereview disabled: cannot open ' + repo_config_path
+		return
+
+	remote = ui.config("paths", "default", "")
+	if remote.find("://") < 0:
+		raise hg_util.Abort("codereview: default path '%s' is not a URL" % (remote,))
+
+	InstallMatch(ui, repo)
+	RietveldSetup(ui, repo)
+
+	# Disable the Mercurial commands that might change the repository.
+	# Only commands in this extension are supposed to do that.
+	ui.setconfig("hooks", "precommit.codereview", precommithook)
+
+	# Rollback removes an existing commit.  Don't do that either.
+	global real_rollback
+	real_rollback = repo.rollback
+	repo.rollback = norollback
+	
 
 #######################################################################
 # Wrappers around upload.py for interacting with Rietveld
 
+from HTMLParser import HTMLParser
+
 # HTML form parser
 class FormParser(HTMLParser):
 	def __init__(self):
@@ -2106,7 +2355,7 @@
 		for k in todel:
 			del x[k]
 	else:
-		raise util.Abort("unknown type " + str(type(x)) + " in fix_json")
+		raise hg_util.Abort("unknown type " + str(type(x)) + " in fix_json")
 	if type(x) is str:
 		x = x.replace('\r\n', '\n')
 	return x
@@ -2309,68 +2558,13 @@
 class opt(object):
 	pass
 
-def nocommit(*pats, **opts):
-	"""(disabled when using this extension)"""
-	raise util.Abort("codereview extension enabled; use mail, upload, or submit instead of commit")
-
-def nobackout(*pats, **opts):
-	"""(disabled when using this extension)"""
-	raise util.Abort("codereview extension enabled; use undo instead of backout")
-
-def norollback(*pats, **opts):
-	"""(disabled when using this extension)"""
-	raise util.Abort("codereview extension enabled; use undo instead of rollback")
-
 def RietveldSetup(ui, repo):
-	global defaultcc, upload_options, rpc, server, server_url_base, force_google_account, verbosity, contributors
-	global missing_codereview
-
-	repo_config_path = ''
-	# Read repository-specific options from lib/codereview/codereview.cfg
-	try:
-		repo_config_path = repo.root + '/lib/codereview/codereview.cfg'
-		f = open(repo_config_path)
-		for line in f:
-			if line.startswith('defaultcc: '):
-				defaultcc = SplitCommaSpace(line[10:])
-	except:
-		# If there are no options, chances are good this is not
-		# a code review repository; stop now before we foul
-		# things up even worse.  Might also be that repo doesn't
-		# even have a root.  See issue 959.
-		if repo_config_path == '':
-			missing_codereview = 'codereview disabled: repository has no root'
-		else:
-			missing_codereview = 'codereview disabled: cannot open ' + repo_config_path
-		return
-
-	# Should only modify repository with hg submit.
-	# Disable the built-in Mercurial commands that might
-	# trip things up.
-	cmdutil.commit = nocommit
-	global real_rollback
-	real_rollback = repo.rollback
-	repo.rollback = norollback
-	# would install nobackout if we could; oh well
-
-	try:
-		f = open(repo.root + '/CONTRIBUTORS', 'r')
-	except:
-		raise util.Abort("cannot open %s: %s" % (repo.root+'/CONTRIBUTORS', ExceptionDetail()))
-	for line in f:
-		# CONTRIBUTORS is a list of lines like:
-		#	Person <email>
-		#	Person <email> <alt-email>
-		# The first email address is the one used in commit logs.
-		if line.startswith('#'):
-			continue
-		m = re.match(r"([^<>]+\S)\s+(<[^<>\s]+>)((\s+<[^<>\s]+>)*)\s*$", line)
-		if m:
-			name = m.group(1)
-			email = m.group(2)[1:-1]
-			contributors[email.lower()] = (name, email)
-			for extra in m.group(3).split():
-				contributors[extra[1:-1].lower()] = (name, email)
+	global force_google_account
+	global rpc
+	global server
+	global server_url_base
+	global upload_options
+	global verbosity
 
 	if not ui.verbose:
 		verbosity = 0
@@ -2416,15 +2610,14 @@
 	
 	global releaseBranch
 	tags = repo.branchtags().keys()
-	if 'release-branch.r100' in tags:
+	if 'release-branch.go10' in tags:
 		# NOTE(rsc): This tags.sort is going to get the wrong
-		# answer when comparing release-branch.r99 with
-		# release-branch.r100.  If we do ten releases a year
-		# that gives us 4 years before we have to worry about this.
-		raise util.Abort('tags.sort needs to be fixed for release-branch.r100')
+		# answer when comparing release-branch.go9 with
+		# release-branch.go10.  It will be a while before we care.
+		raise hg_util.Abort('tags.sort needs to be fixed for release-branch.go10')
 	tags.sort()
 	for t in tags:
-		if t.startswith('release-branch.'):
+		if t.startswith('release-branch.go'):
 			releaseBranch = t			
 
 #######################################################################
@@ -3030,7 +3223,7 @@
 				unused, filename = line.split(':', 1)
 				# On Windows if a file has property changes its filename uses '\'
 				# instead of '/'.
-				filename = filename.strip().replace('\\', '/')
+				filename = to_slash(filename.strip())
 				files[filename] = self.GetBaseFile(filename)
 		return files
 
@@ -3150,6 +3343,10 @@
 		return self
 	def status(self, *args, **opts):
 		pass
+
+	def formatter(self, topic, opts):
+		from mercurial.formatter import plainformatter
+		return plainformatter(self, topic, opts)
 	
 	def readconfig(self, *args, **opts):
 		pass
@@ -3183,7 +3380,11 @@
 			if not err and mqparent != "":
 				self.base_rev = mqparent
 			else:
-				self.base_rev = RunShell(["hg", "parents", "-q"]).split(':')[1].strip()
+				out = RunShell(["hg", "parents", "-q"], silent_ok=True).strip()
+				if not out:
+					# No revisions; use 0 to mean a repository with nothing.
+					out = "0:0"
+				self.base_rev = out.split(':')[1].strip()
 	def _GetRelPath(self, filename):
 		"""Get relative path of a file according to the current directory,
 		given its logical path in the repo."""
@@ -3238,9 +3439,9 @@
 				out = RunShell(["hg", "status", "-C", "--rev", rev])
 			else:
 				fui = FakeMercurialUI()
-				ret = commands.status(fui, self.repo, *[], **{'rev': [rev], 'copies': True})
+				ret = hg_commands.status(fui, self.repo, *[], **{'rev': [rev], 'copies': True})
 				if ret:
-					raise util.Abort(ret)
+					raise hg_util.Abort(ret)
 				out = fui.output
 			self.status = out.splitlines()
 		for i in range(len(self.status)):
@@ -3248,12 +3449,12 @@
 			#	A path
 			#	M path
 			# etc
-			line = self.status[i].replace('\\', '/')
+			line = to_slash(self.status[i])
 			if line[2:] == path:
 				if i+1 < len(self.status) and self.status[i+1][:2] == '  ':
 					return self.status[i:i+2]
 				return self.status[i:i+1]
-		raise util.Abort("no status for " + path)
+		raise hg_util.Abort("no status for " + path)
 	
 	def GetBaseFile(self, filename):
 		set_status("inspecting " + filename)
@@ -3315,7 +3516,7 @@
 			# When a file is modified, paths use '/' between directories, however
 			# when a property is modified '\' is used on Windows.  Make them the same
 			# otherwise the file shows up twice.
-			temp_filename = temp_filename.strip().replace('\\', '/')
+			temp_filename = to_slash(temp_filename.strip())
 			if temp_filename != filename:
 				# File has property changes but no modifications, create a new diff.
 				new_filename = temp_filename

diff --git a/re2/compile.cc b/re2/compile.cc
index 67c4c2c..9cddb71 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc

@@ -44,7 +44,7 @@
   static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
 };
 
-static PatchList nullPatchList = { 0 };
+static PatchList nullPatchList;
 
 // Returns patch list containing just p.
 PatchList PatchList::Mk(uint32 p) {
@@ -106,11 +106,12 @@
   uint32 begin;
   PatchList end;
 
+  explicit Frag(LinkerInitialized) {}
   Frag() : begin(0) { end.p = 0; }  // needed so Frag can go in vector
   Frag(uint32 begin, PatchList end) : begin(begin), end(end) {}
 };
 
-static Frag kNullFrag;
+static Frag kNullFrag(LINKER_INITIALIZED);
 
 // Input encodings.
 enum Encoding {
@@ -458,7 +459,7 @@
 // A Rune is a name for a Unicode code point.
 // Returns maximum rune encoded by UTF-8 sequence of length len.
 static int MaxRune(int len) {
-  int b;  // number of Rune blents lenn len-byte UTF-8 sequence (len < UTFmax)
+  int b;  // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax)
   if (len == 1)
     b = 7;
   else
@@ -588,7 +589,7 @@
 };
 
 void Compiler::Add_80_10ffff() {
-  int inst[arraysize(prog_80_10ffff)];
+  int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning
   for (int i = 0; i < arraysize(prog_80_10ffff); i++) {
     const ByteRangeProg& p = prog_80_10ffff[i];
     int next = 0;
@@ -732,7 +733,7 @@
       Frag f = Match(re->match_id());
       // Remember unanchored match to end of string.
       if (anchor_ != RE2::ANCHOR_BOTH)
-        f = Cat(DotStar(), f);
+        f = Cat(DotStar(), Cat(EmptyWidth(kEmptyEndText), f));
       return f;
     }
 

diff --git a/re2/dfa.cc b/re2/dfa.cc
index 7d206fb..2556c0f 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc

@@ -115,6 +115,7 @@
     kFlagNeedShift = 16,        // needed kEmpty bits are or'ed in shifted left
   };
 
+#ifndef STL_MSVC
   // STL function structures for use with unordered_set.
   struct StateEqual {
     bool operator()(const State* a, const State* b) const {
@@ -132,6 +133,7 @@
       return true;  // they're equal
     }
   };
+#endif  // STL_MSVC
   struct StateHash {
     size_t operator()(const State* a) const {
       if (a == NULL)
@@ -143,9 +145,34 @@
       else
         return Hash64StringWithSeed(s, len, a->flag_);
     }
+#ifdef STL_MSVC
+    // Less than operator.
+    bool operator()(const State* a, const State* b) const {
+      if (a == b)
+        return false;
+      if (a == NULL || b == NULL)
+        return a == NULL;
+      if (a->ninst_ != b->ninst_)
+        return a->ninst_ < b->ninst_;
+      if (a->flag_ != b->flag_)
+        return a->flag_ < b->flag_;
+      for (int i = 0; i < a->ninst_; ++i)
+        if (a->inst_[i] != b->inst_[i])
+          return a->inst_[i] < b->inst_[i];
+      return false;  // they're equal
+    }
+    // The two public members are required by msvc. 4 and 8 are default values.
+    // Reference: http://msdn.microsoft.com/en-us/library/1s1byw77.aspx
+    static const size_t bucket_size = 4;
+    static const size_t min_buckets = 8;
+#endif  // STL_MSVC
   };
 
+#ifdef STL_MSVC
+  typedef unordered_set<State*, StateHash> StateSet;
+#else  // !STL_MSVC
   typedef unordered_set<State*, StateHash, StateEqual> StateSet;
+#endif  // STL_MSVC
 
 
  private:
@@ -441,8 +468,8 @@
   // At minimum, the search requires room for two states in order
   // to limp along, restarting frequently.  We'll get better performance
   // if there is room for a larger number of states, say 20.
-  int one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
-                  (prog_->bytemap_range()+1)*sizeof(State*);
+  int64 one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
+                    (prog_->bytemap_range()+1)*sizeof(State*);
   if (state_budget_ < 20*one_state) {
     LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
                               prog_->size(), max_mem);
@@ -962,8 +989,10 @@
 
   // If someone else already computed this, return it.
   MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
-  if (state->next_[ByteMap(c)])
-    return state->next_[ByteMap(c)];
+  State* ns = state->next_[ByteMap(c)];
+  ANNOTATE_HAPPENS_AFTER(ns);
+  if (ns != NULL)
+    return ns;
 
   // Convert state into Workq.
   StateToWorkq(state, q0_);
@@ -1006,7 +1035,17 @@
   }
   bool ismatch = false;
   RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_, start_unanchored_);
-  swap(q0_, q1_);
+  
+  // Most of the time, we build the state from the output of
+  // RunWorkqOnByte, so swap q0_ and q1_ here.  However, so that
+  // RE2::Set can tell exactly which match instructions
+  // contributed to the match, don't swap if c is kByteEndText.
+  // The resulting state wouldn't be correct for further processing
+  // of the string, but we're at the end of the text so that's okay.
+  // Leaving q0_ alone preseves the match instructions that led to
+  // the current setting of ismatch.
+  if (c != kByteEndText || kind_ != Prog::kManyMatch)
+    swap(q0_, q1_);
 
   // Save afterflag along with ismatch and isword in new state.
   uint flag = afterflag;
@@ -1015,7 +1054,7 @@
   if (isword)
     flag |= kFlagLastWord;
 
-  State* ns = WorkqToCachedState(q0_, flag);
+  ns = WorkqToCachedState(q0_, flag);
 
   // Write barrier before updating state->next_ so that the
   // main search loop can proceed without any locking, for speed.
@@ -1024,9 +1063,9 @@
   //   a) the access to next_ should be ignored,
   //   b) 'ns' is properly published.
   WriteMemoryBarrier();  // Flush ns before linking to it.
-  ANNOTATE_PUBLISH_MEMORY_RANGE(ns, sizeof(*ns));
 
   ANNOTATE_IGNORE_WRITES_BEGIN();
+  ANNOTATE_HAPPENS_BEFORE(ns);
   state->next_[ByteMap(c)] = ns;
   ANNOTATE_IGNORE_WRITES_END();
   return ns;
@@ -1351,6 +1390,7 @@
 
     MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
     State* ns = s->next_[bytemap[c]];
+    ANNOTATE_HAPPENS_AFTER(ns);
     if (ns == NULL) {
       ns = RunStateOnByteUnlocked(s, c);
       if (ns == NULL) {
@@ -1422,20 +1462,6 @@
     }
   }
 
-  // Peek in state to see if a match is coming up.
-  if (params->matches && kind_ == Prog::kManyMatch) {
-    vector<int>* v = params->matches;
-    v->clear();
-    if (s > SpecialStateMax) {
-      for (int i = 0; i < s->ninst_; i++) {
-        Prog::Inst* ip = prog_->inst(s->inst_[i]);
-        if (ip->opcode() == kInstMatch)
-          v->push_back(ip->match_id());
-      }
-    }
-  }
-
-
   // Process one more byte to see if it triggers a match.
   // (Remember, matches are delayed one byte.)
   int lastbyte;
@@ -1453,6 +1479,7 @@
 
   MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
   State* ns = s->next_[ByteMap(lastbyte)];
+  ANNOTATE_HAPPENS_AFTER(ns);
   if (ns == NULL) {
     ns = RunStateOnByteUnlocked(s, lastbyte);
     if (ns == NULL) {
@@ -1480,6 +1507,15 @@
   if (s > SpecialStateMax && s->IsMatch()) {
     matched = true;
     lastmatch = p;
+    if (params->matches && kind_ == Prog::kManyMatch) {
+      vector<int>* v = params->matches;
+      v->clear();
+      for (int i = 0; i < s->ninst_; i++) {
+        Prog::Inst* ip = prog_->inst(s->inst_[i]);
+        if (ip->opcode() == kInstMatch)
+          v->push_back(ip->match_id());
+      }
+    }
     if (DebugDFA)
       fprintf(stderr, "match @%d! [%s]\n", static_cast<int>(lastmatch - bp),
               DumpState(s).c_str());
@@ -1637,7 +1673,7 @@
             DumpState(info->start).c_str(), info->firstbyte);
 
   params->start = info->start;
-  params->firstbyte = info->firstbyte;
+  params->firstbyte = ANNOTATE_UNPROTECTED_READ(info->firstbyte);
 
   return true;
 }
@@ -1646,12 +1682,16 @@
 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
                               uint flags) {
   // Quick check; okay because of memory barriers below.
-  if (info->firstbyte != kFbUnknown)
+  if (ANNOTATE_UNPROTECTED_READ(info->firstbyte) != kFbUnknown) {
+    ANNOTATE_HAPPENS_AFTER(&info->firstbyte);
     return true;
+  }
 
   MutexLock l(&mutex_);
-  if (info->firstbyte != kFbUnknown)
+  if (info->firstbyte != kFbUnknown) {
+    ANNOTATE_HAPPENS_AFTER(&info->firstbyte);
     return true;
+  }
 
   q0_->clear();
   AddToQueue(q0_,
@@ -1662,12 +1702,14 @@
     return false;
 
   if (info->start == DeadState) {
+    ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
     WriteMemoryBarrier();  // Synchronize with "quick check" above.
     info->firstbyte = kFbNone;
     return true;
   }
 
   if (info->start == FullMatchState) {
+    ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
     WriteMemoryBarrier();  // Synchronize with "quick check" above.
     info->firstbyte = kFbNone;	// will be ignored
     return true;
@@ -1680,6 +1722,7 @@
   for (int i = 0; i < 256; i++) {
     State* s = RunStateOnByte(info->start, i);
     if (s == NULL) {
+      ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
       WriteMemoryBarrier();  // Synchronize with "quick check" above.
       info->firstbyte = firstbyte;
       return false;
@@ -1694,6 +1737,7 @@
       break;
     }
   }
+  ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
   WriteMemoryBarrier();  // Synchronize with "quick check" above.
   info->firstbyte = firstbyte;
   return true;
@@ -1734,7 +1778,7 @@
     return false;
   }
   if (params.start == DeadState)
-    return NULL;
+    return false;
   if (params.start == FullMatchState) {
     if (run_forward == want_earliest_match)
       *epp = text.begin();
@@ -1776,7 +1820,7 @@
   }
 
   // Quick check; okay because of memory barrier below.
-  DFA *dfa = *pdfa;
+  DFA *dfa = ANNOTATE_UNPROTECTED_READ(*pdfa);
   if (dfa != NULL) {
     ANNOTATE_HAPPENS_AFTER(dfa);
     return dfa;
@@ -1784,8 +1828,10 @@
 
   MutexLock l(&dfa_mutex_);
   dfa = *pdfa;
-  if (dfa != NULL)
+  if (dfa != NULL) {
+    ANNOTATE_HAPPENS_AFTER(dfa);
     return dfa;
+  }
 
   // For a forward DFA, half the memory goes to each DFA.
   // For a reverse DFA, all the memory goes to the

diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc
index 9269cee..f576258 100644
--- a/re2/filtered_re2.cc
+++ b/re2/filtered_re2.cc

@@ -27,8 +27,10 @@
   RE2::ErrorCode code = re->error_code();
 
   if (!re->ok()) {
-    LOG(ERROR) << "Couldn't compile regular expression, skipping: "
-               << re << " due to error " << re->error();
+    if (options.log_errors()) {
+      LOG(ERROR) << "Couldn't compile regular expression, skipping: "
+                 << re << " due to error " << re->error();
+    }
     delete re;
   } else {
     *id = re2_vec_.size();

diff --git a/re2/nfa.cc b/re2/nfa.cc
index 61a4ecf..8c4f761 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc

@@ -84,7 +84,7 @@
   inline Thread* AllocThread();
   inline void FreeThread(Thread*);
 
-  // Add r (or its children, following unlabeled arrows)
+  // Add id (or its children, following unlabeled arrows)
   // to the workqueue q with associated capture info.
   void AddToThreadq(Threadq* q, int id, int flag,
                     const char* p, const char** capture);
@@ -179,7 +179,7 @@
   }
 }
 
-// Follows all empty arrows from r and enqueues all the states reached.
+// Follows all empty arrows from id0 and enqueues all the states reached.
 // The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
 // The pointer p is the current input position, and m is the
 // current set of match boundaries.
@@ -214,7 +214,7 @@
 
     // Create entry in q no matter what.  We might fill it in below,
     // or we might not.  Even if not, it is necessary to have it,
-    // so that we don't revisit r during the recursion.
+    // so that we don't revisit id0 during the recursion.
     q->set_new(id, NULL);
 
     Thread** tp = &q->find(id)->second;

diff --git a/re2/parse.cc b/re2/parse.cc
index 4f4ef89..0cf4ab4 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc

@@ -16,7 +16,6 @@
 // and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W.
 // See regexp.h for rationale.
 
-#include <ctype.h>
 #include "util/util.h"
 #include "re2/regexp.h"
 #include "re2/stringpiece.h"
@@ -1454,6 +1453,13 @@
       // to what's already missing.  Too hard, so do in two steps.
       CharClassBuilder ccb1;
       AddUGroup(&ccb1, g, +1, parse_flags);
+      // If the flags say to take out \n, put it in, so that negating will take it out.
+      // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags.
+      bool cutnl = !(parse_flags & Regexp::ClassNL) ||
+                   (parse_flags & Regexp::NeverNL);
+      if (cutnl) {
+        ccb1.AddRange('\n', '\n');
+      }
       ccb1.Negate();
       cc->AddCharClass(&ccb1);
       return;
@@ -1996,8 +2002,13 @@
             return NULL;
           break;
         }
-        if (!ps.DoLeftParen(NULL))
-          return NULL;
+        if (ps.flags() & NeverCapture) {
+          if (!ps.DoLeftParenNoCapture())
+            return NULL;
+        } else {
+          if (!ps.DoLeftParen(NULL))
+            return NULL;
+        }
         t.remove_prefix(1);  // '('
         break;
 

diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index 30e4c01..4b9c35d 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc

@@ -181,6 +181,12 @@
   return ApplyFold(f, r);
 }
 
+static Rune ToLowerRuneLatin1(Rune r) {
+  if ('A' <= r && r <= 'Z')
+    r += 'a' - 'A';
+  return r;
+}
+
 Prefilter* Prefilter::FromString(const string& str) {
   Prefilter* m = new Prefilter(Prefilter::ATOM);
   m->atom_ = str;
@@ -205,8 +211,9 @@
   static Info* EmptyString();
   static Info* NoMatch();
   static Info* AnyChar();
-  static Info* CClass(CharClass* cc);
+  static Info* CClass(CharClass* cc, bool latin1);
   static Info* Literal(Rune r);
+  static Info* LiteralLatin1(Rune r);
   static Info* AnyMatch();
 
   // Format Info as a string.
@@ -390,6 +397,11 @@
   return string(buf, n);
 }
 
+static string RuneToStringLatin1(Rune r) {
+  char c = r & 0xff;
+  return string(&c, 1);
+}
+
 // Constructs Info for literal rune.
 Prefilter::Info* Prefilter::Info::Literal(Rune r) {
   Info* info = new Info();
@@ -398,6 +410,14 @@
   return info;
 }
 
+// Constructs Info for literal rune for Latin1 encoded string.
+Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
+  Info* info = new Info();
+  info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
+  info->is_exact_ = true;
+  return info;
+}
+
 // Constructs Info for dot (any character).
 Prefilter::Info* Prefilter::Info::AnyChar() {
   Prefilter::Info* info = new Prefilter::Info();
@@ -432,7 +452,8 @@
 
 // Constructs Prefilter::Info for a character class.
 typedef CharClass::iterator CCIter;
-Prefilter::Info* Prefilter::Info::CClass(CharClass *cc) {
+Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
+                                         bool latin1) {
   if (Trace) {
     VLOG(0) << "CharClassInfo:";
     for (CCIter i = cc->begin(); i != cc->end(); ++i)
@@ -445,8 +466,14 @@
 
   Prefilter::Info *a = new Prefilter::Info();
   for (CCIter i = cc->begin(); i != cc->end(); ++i)
-    for (Rune r = i->lo; r <= i->hi; r++)
-      a->exact_.insert(RuneToString(ToLowerRune(r)));
+    for (Rune r = i->lo; r <= i->hi; r++) {
+      if (latin1) {
+        a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
+      } else {
+        a->exact_.insert(RuneToString(ToLowerRune(r)));
+      }
+    }
+
 
   a->is_exact_ = true;
 
@@ -459,7 +486,7 @@
 
 class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
  public:
-  Walker() {}
+  Walker(bool latin1) : latin1_(latin1) {}
 
   virtual Info* PostVisit(
       Regexp* re, Info* parent_arg,
@@ -470,7 +497,9 @@
       Regexp* re,
       Info* parent_arg);
 
+  bool latin1() { return latin1_; }
  private:
+  bool latin1_;
   DISALLOW_EVIL_CONSTRUCTORS(Walker);
 };
 
@@ -478,7 +507,9 @@
   if (Trace) {
     LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
   }
-  Prefilter::Info::Walker w;
+
+  bool latin1 = re->parse_flags() & Regexp::Latin1;
+  Prefilter::Info::Walker w(latin1);
   Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
 
   if (w.stopped_early()) {
@@ -524,7 +555,12 @@
       break;
 
     case kRegexpLiteral:
-      info = Literal(re->rune());
+      if (latin1()) {
+        info = LiteralLatin1(re->rune());
+      }
+      else {
+        info = Literal(re->rune());
+      }
       break;
 
     case kRegexpLiteralString:
@@ -532,9 +568,17 @@
         info = NoMatch();
         break;
       }
-      info = Literal(re->runes()[0]);
-      for (int i = 1; i < re->nrunes(); i++)
-        info = Concat(info, Literal(re->runes()[i]));
+      if (latin1()) {
+        info = LiteralLatin1(re->runes()[0]);
+        for (int i = 1; i < re->nrunes(); i++) {
+          info = Concat(info, LiteralLatin1(re->runes()[i]));
+        }
+      } else {
+        info = Literal(re->runes()[0]);
+        for (int i = 1; i < re->nrunes(); i++) {
+          info = Concat(info, Literal(re->runes()[i]));
+        }
+      }
       break;
 
     case kRegexpConcat: {
@@ -585,7 +629,7 @@
       break;
 
     case kRegexpCharClass:
-      info = CClass(re->cc());
+      info = CClass(re->cc(), latin1());
       break;
 
     case kRegexpCapture:

diff --git a/re2/re2.cc b/re2/re2.cc
index 448f28e..8d1d468 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc

@@ -10,7 +10,6 @@
 #include "re2/re2.h"
 
 #include <stdio.h>
-#include <ctype.h>
 #include <string>
 #include <pthread.h>
 #include <errno.h>
@@ -32,20 +31,42 @@
 const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume;
 const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume;
 
+// This will trigger LNK2005 error in MSVC.
+#ifndef COMPILER_MSVC
 const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
+#endif  // COMPILER_MSVC
 
-// Commonly-used option sets; arguments to constructor are:
-//   utf8 input
-//   posix syntax
-//   longest match
-//   log errors
-const RE2::Options RE2::DefaultOptions;  // EncodingUTF8, false, false, true
-const RE2::Options RE2::Latin1(RE2::Options::EncodingLatin1, false, false, true);
-const RE2::Options RE2::POSIX(RE2::Options::EncodingUTF8, true, true, true);
-const RE2::Options RE2::Quiet(RE2::Options::EncodingUTF8, false, false, false);
+RE2::Options::Options(RE2::CannedOptions opt)
+  : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
+    posix_syntax_(opt == RE2::POSIX),
+    longest_match_(opt == RE2::POSIX),
+    log_errors_(opt != RE2::Quiet),
+    max_mem_(kDefaultMaxMem),
+    literal_(false),
+    never_nl_(false),
+    never_capture_(false),
+    case_sensitive_(true),
+    perl_classes_(false),
+    word_boundary_(false),
+    one_line_(false) {
+}
 
-// If a regular expression has no error, its error_ field points here
-static const string empty_string;
+// static empty things for use as const references.
+// To avoid global constructors, initialized on demand.
+GLOBAL_MUTEX(empty_mutex);
+static const string *empty_string;
+static const map<string, int> *empty_named_groups;
+static const map<int, string> *empty_group_names;
+
+static void InitEmpty() {
+  GLOBAL_MUTEX_LOCK(empty_mutex);
+  if (empty_string == NULL) {
+    empty_string = new string;
+    empty_named_groups = new map<string, int>;
+    empty_group_names = new map<int, string>;
+  }
+  GLOBAL_MUTEX_UNLOCK(empty_mutex);
+}
 
 // Converts from Regexp error code to RE2 error code.
 // Maybe some day they will diverge.  In any event, this
@@ -111,7 +132,8 @@
   int flags = Regexp::ClassNL;
   switch (encoding()) {
     default:
-      LOG(ERROR) << "Unknown encoding " << encoding();
+      if (log_errors())
+        LOG(ERROR) << "Unknown encoding " << encoding();
       break;
     case RE2::Options::EncodingUTF8:
       break;
@@ -129,6 +151,9 @@
   if (never_nl())
     flags |= Regexp::NeverNL;
 
+  if (never_capture())
+    flags |= Regexp::NeverCapture;
+
   if (!case_sensitive())
     flags |= Regexp::FoldCase;
 
@@ -148,7 +173,8 @@
   mutex_ = new Mutex;
   pattern_ = pattern.as_string();
   options_.Copy(options);
-  error_ = &empty_string;
+  InitEmpty();
+  error_ = empty_string;
   error_code_ = NoError;
   suffix_regexp_ = NULL;
   entire_regexp_ = NULL;
@@ -164,7 +190,7 @@
     static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
     &status);
   if (entire_regexp_ == NULL) {
-    if (error_ == &empty_string)
+    if (error_ == empty_string)
       error_ = new string(status.Text());
     if (options_.log_errors()) {
       LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
@@ -206,7 +232,7 @@
 // Returns rprog_, computing it if needed.
 re2::Prog* RE2::ReverseProg() const {
   MutexLock l(mutex_);
-  if (rprog_ == NULL && error_ == &empty_string) {
+  if (rprog_ == NULL && error_ == empty_string) {
     rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3);
     if (rprog_ == NULL) {
       if (options_.log_errors())
@@ -219,9 +245,6 @@
   return rprog_;
 }
 
-static const map<string, int> empty_named_groups;
-static const map<int, string> empty_group_names;
-
 RE2::~RE2() {
   if (suffix_regexp_)
     suffix_regexp_->Decref();
@@ -230,11 +253,11 @@
   delete mutex_;
   delete prog_;
   delete rprog_;
-  if (error_ != &empty_string)
+  if (error_ != empty_string)
     delete error_;
-  if (named_groups_ != NULL && named_groups_ != &empty_named_groups)
+  if (named_groups_ != NULL && named_groups_ != empty_named_groups)
     delete named_groups_;
-  if (group_names_ != NULL &&  group_names_ != &empty_group_names)
+  if (group_names_ != NULL &&  group_names_ != empty_group_names)
     delete group_names_;
 }
 
@@ -248,11 +271,11 @@
 const map<string, int>&  RE2::NamedCapturingGroups() const {
   MutexLock l(mutex_);
   if (!ok())
-    return empty_named_groups;
+    return *empty_named_groups;
   if (named_groups_ == NULL) {
     named_groups_ = suffix_regexp_->NamedCaptures();
     if (named_groups_ == NULL)
-      named_groups_ = &empty_named_groups;
+      named_groups_ = empty_named_groups;
   }
   return *named_groups_;
 }
@@ -261,11 +284,11 @@
 const map<int, string>&  RE2::CapturingGroupNames() const {
   MutexLock l(mutex_);
   if (!ok())
-    return empty_group_names;
+    return *empty_group_names;
   if (group_names_ == NULL) {
     group_names_ = suffix_regexp_->CaptureNames();
     if (group_names_ == NULL)
-      group_names_ = &empty_group_names;
+      group_names_ = empty_group_names;
   }
   return *group_names_;
 }
@@ -306,7 +329,7 @@
 
 // Returns the maximum submatch needed for the rewrite to be done by Replace().
 // E.g. if rewrite == "foo \\2,\\1", returns 2.
-static int MaxSubmatch(const StringPiece& rewrite) {
+int RE2::MaxSubmatch(const StringPiece& rewrite) {
   int max = 0;
   for (const char *s = rewrite.data(), *end = s + rewrite.size();
        s < end; s++) {
@@ -512,10 +535,11 @@
   }
 
   if (startpos < 0 || startpos > endpos || endpos > text.size()) {
-    LOG(ERROR) << "RE2: invalid startpos, endpos pair.";
+    if (options_.log_errors())
+      LOG(ERROR) << "RE2: invalid startpos, endpos pair.";
     return false;
   }
-  
+
   StringPiece subtext = text;
   subtext.remove_prefix(startpos);
   subtext.remove_suffix(text.size() - endpos);
@@ -631,7 +655,8 @@
           LOG(INFO) << "Match " << trunc(pattern_)
                     << " [" << CEscape(subtext) << "]"
                     << " DFA inconsistency.";
-        LOG(ERROR) << "DFA inconsistency";
+        if (options_.log_errors())
+          LOG(ERROR) << "DFA inconsistency";
         return false;
       }
       if (FLAGS_trace_re2)
@@ -715,7 +740,7 @@
                   << " [" << CEscape(subtext) << "]"
                   << " using OnePass.";
       if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
-        if (!skipped_test)
+        if (!skipped_test && options_.log_errors())
           LOG(ERROR) << "SearchOnePass inconsistency";
         return false;
       }
@@ -726,7 +751,7 @@
                   << " using BitState.";
       if (!prog_->SearchBitState(subtext1, text, anchor,
                                  kind, submatch, ncap)) {
-        if (!skipped_test)
+        if (!skipped_test && options_.log_errors())
           LOG(ERROR) << "SearchBitState inconsistency";
         return false;
       }
@@ -736,7 +761,7 @@
                   << " [" << CEscape(subtext) << "]"
                   << " using NFA.";
       if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
-        if (!skipped_test)
+        if (!skipped_test && options_.log_errors())
           LOG(ERROR) << "SearchNFA inconsistency";
         return false;
       }
@@ -835,8 +860,10 @@
       if (isdigit(c)) {
         int n = (c - '0');
         if (n >= veclen) {
-          LOG(ERROR) << "requested group " << n
-                     << " in regexp " << rewrite.data();
+          if (options_.log_errors()) {
+            LOG(ERROR) << "requested group " << n
+                       << " in regexp " << rewrite.data();
+          }
           return false;
         }
         StringPiece snip = vec[n];
@@ -845,7 +872,8 @@
       } else if (c == '\\') {
         out->push_back('\\');
       } else {
-        LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+        if (options_.log_errors())
+          LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
         return false;
       }
     } else {

diff --git a/re2/re2.h b/re2/re2.h
index 9dbc99c..272028b 100644
--- a/re2/re2.h
+++ b/re2/re2.h

@@ -187,12 +187,28 @@
 #include "re2/variadic_function.h"
 
 namespace re2 {
+
 using std::string;
 using std::map;
 class Mutex;
 class Prog;
 class Regexp;
 
+// The following enum should be used only as a constructor argument to indicate
+// that the variable has static storage class, and that the constructor should
+// do nothing to its state.  It indicates to the reader that it is legal to
+// declare a static instance of the class, provided the constructor is given
+// the LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
+// static variable that has a constructor or a destructor because invocation
+// order is undefined.  However, IF the type can be initialized by filling with
+// zeroes (which the loader does for static variables), AND the type's
+// destructor does nothing to the storage, then a constructor for static
+// initialization can be declared as
+//       explicit MyClass(LinkerInitialized x) {}
+// and invoked as
+//       static MyClass my_variable_name(LINKER_INITIALIZED);
+enum LinkerInitialized { LINKER_INITIALIZED };
+
 // Interface for regular expression matching.  Also corresponds to a
 // pre-compiled regular expression.  An "RE2" object is safe for
 // concurrent use by multiple threads.
@@ -229,12 +245,15 @@
 
   // Predefined common options.
   // If you need more complicated things, instantiate
-  // an Option class, change the settings, and pass it to the
-  // RE2 constructor.
-  static const Options DefaultOptions;
-  static const Options Latin1; // treat input as Latin-1 (default UTF-8)
-  static const Options POSIX;  // POSIX syntax, leftmost-longest match
-  static const Options Quiet;  // do not log about regexp parse errors
+  // an Option class, possibly passing one of these to
+  // the Option constructor, change the settings, and pass that
+  // Option class to the RE2 constructor.
+  enum CannedOptions {
+    DefaultOptions = 0,
+    Latin1, // treat input as Latin-1 (default UTF-8)
+    POSIX, // POSIX syntax, leftmost-longest match
+    Quiet // do not log about regexp parse errors
+  };
 
   // Need to have the const char* and const string& forms for implicit
   // conversions when passing string literals to FullMatch and PartialMatch.
@@ -467,6 +486,20 @@
   // fail because of a bad rewrite string.
   bool CheckRewriteString(const StringPiece& rewrite, string* error) const;
 
+  // Returns the maximum submatch needed for the rewrite to be done by
+  // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.
+  static int MaxSubmatch(const StringPiece& rewrite);
+
+  // Append the "rewrite" string, with backslash subsitutions from "vec",
+  // to string "out".
+  // Returns true on success.  This method can fail because of a malformed
+  // rewrite string.  CheckRewriteString guarantees that the rewrite will
+  // be sucessful.
+  bool Rewrite(string *out,
+               const StringPiece &rewrite,
+               const StringPiece* vec,
+               int veclen) const;
+
   // Constructor options
   class Options {
    public:
@@ -479,6 +512,7 @@
     //   max_mem          (see below)  approx. max memory footprint of RE2
     //   literal          (false) interpret string as literal, not regexp
     //   never_nl         (false) never match \n, even if it is in regexp
+    //   never_capture    (false) parse all parens as non-capturing
     //   case_sensitive   (true)  match is case-sensitive (regexp can override
     //                              with (?i) unless in posix_syntax mode)
     //
@@ -533,11 +567,14 @@
       max_mem_(kDefaultMaxMem),
       literal_(false),
       never_nl_(false),
+      never_capture_(false),
       case_sensitive_(true),
       perl_classes_(false),
       word_boundary_(false),
       one_line_(false) {
     }
+    
+    /*implicit*/ Options(CannedOptions);
 
     Encoding encoding() const { return encoding_; }
     void set_encoding(Encoding encoding) { encoding_ = encoding; }
@@ -571,6 +608,9 @@
     bool never_nl() const { return never_nl_; }
     void set_never_nl(bool b) { never_nl_ = b; }
 
+    bool never_capture() const { return never_capture_; }
+    void set_never_capture(bool b) { never_capture_ = b; }
+
     bool case_sensitive() const { return case_sensitive_; }
     void set_case_sensitive(bool b) { case_sensitive_ = b; }
 
@@ -591,6 +631,7 @@
       max_mem_ = src.max_mem_;
       literal_ = src.literal_;
       never_nl_ = src.never_nl_;
+      never_capture_ = src.never_capture_;
       case_sensitive_ = src.case_sensitive_;
       perl_classes_ = src.perl_classes_;
       word_boundary_ = src.word_boundary_;
@@ -600,25 +641,6 @@
     int ParseFlags() const;
 
    private:
-    // Private constructor for defining constants like RE2::Latin1.
-    friend class RE2;
-    Options(Encoding encoding,
-            bool posix_syntax,
-            bool longest_match,
-            bool log_errors) :
-      encoding_(encoding),
-      posix_syntax_(posix_syntax),
-      longest_match_(longest_match),
-      log_errors_(log_errors),
-      max_mem_(kDefaultMaxMem),
-      literal_(false),
-      never_nl_(false),
-      case_sensitive_(true),
-      perl_classes_(false),
-      word_boundary_(false),
-      one_line_(false) {
-    }
-
     Encoding encoding_;
     bool posix_syntax_;
     bool longest_match_;
@@ -626,6 +648,7 @@
     int64_t max_mem_;
     bool literal_;
     bool never_nl_;
+    bool never_capture_;
     bool case_sensitive_;
     bool perl_classes_;
     bool word_boundary_;
@@ -670,11 +693,6 @@
  private:
   void Init(const StringPiece& pattern, const Options& options);
 
-  bool Rewrite(string *out,
-               const StringPiece &rewrite,
-               const StringPiece* vec,
-               int veclen) const;
-
   bool DoMatch(const StringPiece& text,
                    Anchor anchor,
                    int* consumed,

diff --git a/re2/regexp.cc b/re2/regexp.cc
index 9486b3c..a74ceec 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc

@@ -59,29 +59,39 @@
   return false;
 }
 
-static map<Regexp*, int> ref_map;
-static Mutex ref_mutex;
+static map<Regexp*, int> *ref_map;
+GLOBAL_MUTEX(ref_mutex);
 
 int Regexp::Ref() {
   if (ref_ < kMaxRef)
     return ref_;
 
-  MutexLock l(&ref_mutex);
-  return ref_map[this];
+  GLOBAL_MUTEX_LOCK(ref_mutex);
+  int r = 0;
+  if (ref_map != NULL) {
+    r = (*ref_map)[this];
+  }
+  GLOBAL_MUTEX_UNLOCK(ref_mutex);
+  return r;
 }
 
 // Increments reference count, returns object as convenience.
 Regexp* Regexp::Incref() {
   if (ref_ >= kMaxRef-1) {
     // Store ref count in overflow map.
-    MutexLock l(&ref_mutex);
-    if (ref_ == kMaxRef) {  // already overflowed
-      ref_map[this]++;
-      return this;
+    GLOBAL_MUTEX_LOCK(ref_mutex);
+    if (ref_map == NULL) {
+      ref_map = new map<Regexp*, int>;
     }
-    // overflowing now
-    ref_map[this] = kMaxRef;
-    ref_ = kMaxRef;
+    if (ref_ == kMaxRef) {
+      // already overflowed
+      (*ref_map)[this]++;
+    } else {
+      // overflowing now
+      (*ref_map)[this] = kMaxRef;
+      ref_ = kMaxRef;
+    }
+    GLOBAL_MUTEX_UNLOCK(ref_mutex);
     return this;
   }
 
@@ -93,14 +103,15 @@
 void Regexp::Decref() {
   if (ref_ == kMaxRef) {
     // Ref count is stored in overflow map.
-    MutexLock l(&ref_mutex);
-    int r = ref_map[this] - 1;
+    GLOBAL_MUTEX_LOCK(ref_mutex);
+    int r = (*ref_map)[this] - 1;
     if (r < kMaxRef) {
       ref_ = r;
-      ref_map.erase(this);
+      ref_map->erase(this);
     } else {
-      ref_map[this] = r;
+      (*ref_map)[this] = r;
     }
+    GLOBAL_MUTEX_UNLOCK(ref_mutex);
     return;
   }
   ref_--;
@@ -447,7 +458,7 @@
 }
 
 // Keep in sync with enum RegexpStatusCode in regexp.h
-static const string kErrorStrings[] = {
+static const char *kErrorStrings[] = {
   "no error",
   "unexpected error",
   "invalid escape sequence",
@@ -464,7 +475,7 @@
   "invalid named capture group",
 };
 
-const string& RegexpStatus::CodeText(enum RegexpStatusCode code) {
+string RegexpStatus::CodeText(enum RegexpStatusCode code) {
   if (code < 0 || code >= arraysize(kErrorStrings))
     code = kRegexpInternalError;
   return kErrorStrings[code];

diff --git a/re2/regexp.h b/re2/regexp.h
index 1aebc16..331c017 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h

@@ -197,7 +197,7 @@
 
   // Returns text equivalent of code, e.g.:
   //   "Bad character class"
-  static const string& CodeText(enum RegexpStatusCode code);
+  static string CodeText(enum RegexpStatusCode code);
 
   // Returns text describing error, e.g.:
   //   "Bad character class: [z-a]"
@@ -299,6 +299,7 @@
                            //   and \P{Han} for its negation.
     NeverNL      = 1<<11,  // Never match NL, even if the regexp mentions
                            //   it explicitly.
+    NeverCapture = 1<<12,  // Parse all parens as non-capturing.
 
     // As close to Perl as we can get.
     LikePerl     = ClassNL | OneLine | PerlClasses | PerlB | PerlX |

diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
new file mode 100644
index 0000000..b2dd6db
--- /dev/null
+++ b/re2/testing/backtrack.cc

@@ -0,0 +1,254 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tested by search_test.cc, exhaustive_test.cc, tester.cc
+//
+// Prog::BadSearchBacktrack is a backtracking regular expression search,
+// except that it remembers where it has been, trading a lot of
+// memory for a lot of time. It exists only for testing purposes.
+//
+// Let me repeat that.
+//
+// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
+//   - It uses a ton of memory.
+//   - It uses a ton of stack.
+//   - It uses CHECK and LOG(FATAL).
+//   - It implements unanchored search by repeated anchored search.
+//
+// On the other hand, it is very simple and a good reference
+// implementation for the more complicated regexp packages.
+//
+// In BUILD, this file is linked into the ":testing" library,
+// not the main library, in order to make it harder to pick up
+// accidentally.
+
+#include "util/util.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+// Backtracker holds the state for a backtracking search.
+//
+// Excluding the search parameters, the main search state
+// is just the "capture registers", which record, for the
+// current execution, the string position at which each
+// parenthesis was passed.  cap_[0] and cap_[1] are the
+// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc.
+//
+// To avoid infinite loops during backtracking on expressions
+// like (a*)*, the visited_[] bitmap marks the (state, string-position)
+// pairs that have already been explored and are thus not worth
+// re-exploring if we get there via another path.  Modern backtracking
+// libraries engineer their program representation differently, to make
+// such infinite loops possible to avoid without keeping a giant visited_
+// bitmap, but visited_ works fine for a reference implementation
+// and it has the nice benefit of making the search run in linear time.
+class Backtracker {
+ public:
+  explicit Backtracker(Prog* prog);
+  ~Backtracker();
+
+  bool Search(const StringPiece& text, const StringPiece& context,
+              bool anchored, bool longest,
+              StringPiece* submatch, int nsubmatch);
+
+ private:
+  // Explores from instruction ip at string position p looking for a match.
+  // Returns true if found (so that caller can stop trying other possibilities).
+  bool Visit(int id, const char* p);
+
+  // Search parameters
+  Prog* prog_;              // program being run
+  StringPiece text_;        // text being searched
+  StringPiece context_;     // greater context of text being searched
+  bool anchored_;           // whether search is anchored at text.begin()
+  bool longest_;            // whether search wants leftmost-longest match
+  bool endmatch_;           // whether search must end at text.end()
+  StringPiece *submatch_;   // submatches to fill in
+  int nsubmatch_;           //   # of submatches to fill in
+
+  // Search state
+  const char* cap_[64];     // capture registers
+  uint32 *visited_;         // bitmap: (Inst*, char*) pairs already backtracked
+  int nvisited_;            //   # of words in bitmap
+};
+
+Backtracker::Backtracker(Prog* prog)
+  : prog_(prog),
+    anchored_(false),
+    longest_(false),
+    endmatch_(false),
+    submatch_(NULL),
+    nsubmatch_(0),
+    visited_(NULL),
+    nvisited_(0) {
+}
+
+Backtracker::~Backtracker() {
+  delete[] visited_;
+}
+
+// Runs a backtracking search.
+bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
+                         bool anchored, bool longest,
+                         StringPiece* submatch, int nsubmatch) {
+  text_ = text;
+  context_ = context;
+  if (context_.begin() == NULL)
+    context_ = text;
+  if (prog_->anchor_start() && text.begin() > context_.begin())
+    return false;
+  if (prog_->anchor_end() && text.end() < context_.end())
+    return false;
+  anchored_ = anchored | prog_->anchor_start();
+  longest_ = longest | prog_->anchor_end();
+  endmatch_ = prog_->anchor_end();
+  submatch_ = submatch;
+  nsubmatch_ = nsubmatch;
+  CHECK(2*nsubmatch_ < arraysize(cap_));
+  memset(cap_, 0, sizeof cap_);
+
+  // We use submatch_[0] for our own bookkeeping,
+  // so it had better exist.
+  StringPiece sp0;
+  if (nsubmatch < 1) {
+    submatch_ = &sp0;
+    nsubmatch_ = 1;
+  }
+  submatch_[0] = NULL;
+
+  // Allocate new visited_ bitmap -- size is proportional
+  // to text, so have to reallocate on each call to Search.
+  delete[] visited_;
+  nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
+  visited_ = new uint32[nvisited_];
+  memset(visited_, 0, nvisited_*sizeof visited_[0]);
+
+  // Anchored search must start at text.begin().
+  if (anchored_) {
+    cap_[0] = text.begin();
+    return Visit(prog_->start(), text.begin());
+  }
+
+  // Unanchored search, starting from each possible text position.
+  // Notice that we have to try the empty string at the end of
+  // the text, so the loop condition is p <= text.end(), not p < text.end().
+  for (const char* p = text.begin(); p <= text.end(); p++) {
+    cap_[0] = p;
+    if (Visit(prog_->start(), p))  // Match must be leftmost; done.
+      return true;
+  }
+  return false;
+}
+
+// Explores from instruction ip at string position p looking for a match.
+// Return true if found (so that caller can stop trying other possibilities).
+bool Backtracker::Visit(int id, const char* p) {
+  // Check bitmap.  If we've already explored from here,
+  // either it didn't match or it did but we're hoping for a better match.
+  // Either way, don't go down that road again.
+  CHECK(p <= text_.end());
+  int n = id*(text_.size()+1) + (p - text_.begin());
+  CHECK_LT(n/32, nvisited_);
+  if (visited_[n/32] & (1 << (n&31)))
+    return false;
+  visited_[n/32] |= 1 << (n&31);
+
+  // Pick out byte at current position.  If at end of string,
+  // have to explore in hope of finishing a match.  Use impossible byte -1.
+  int c = -1;
+  if (p < text_.end())
+    c = *p & 0xFF;
+
+  Prog::Inst* ip = prog_->inst(id);
+  switch (ip->opcode()) {
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
+      return false;  // not reached
+
+    case kInstAlt:
+    case kInstAltMatch:
+      // Try both possible next states: out is preferred to out1.
+      if (Visit(ip->out(), p)) {
+        if (longest_)
+          Visit(ip->out1(), p);
+        return true;
+      }
+      return Visit(ip->out1(), p);
+
+    case kInstByteRange:
+      if (ip->Matches(c))
+        return Visit(ip->out(), p+1);
+      return false;
+
+    case kInstCapture:
+      if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) {
+        // Capture p to register, but save old value.
+        const char* q = cap_[ip->cap()];
+        cap_[ip->cap()] = p;
+        bool ret = Visit(ip->out(), p);
+        // Restore old value as we backtrack.
+        cap_[ip->cap()] = q;
+        return ret;
+      }
+      return Visit(ip->out(), p);
+
+    case kInstEmptyWidth:
+      if (ip->empty() & ~Prog::EmptyFlags(context_, p))
+        return false;
+      return Visit(ip->out(), p);
+
+    case kInstNop:
+      return Visit(ip->out(), p);
+
+    case kInstMatch:
+      // We found a match.  If it's the best so far, record the
+      // parameters in the caller's submatch_ array.
+      if (endmatch_ && p != context_.end())
+        return false;
+      cap_[1] = p;
+      if (submatch_[0].data() == NULL ||           // First match so far ...
+          (longest_ && p > submatch_[0].end())) {  // ... or better match
+        for (int i = 0; i < nsubmatch_; i++)
+          submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
+      }
+      return true;
+
+    case kInstFail:
+      return false;
+  }
+}
+
+// Runs a backtracking search.
+bool Prog::UnsafeSearchBacktrack(const StringPiece& text,
+                                 const StringPiece& context,
+                                 Anchor anchor,
+                                 MatchKind kind,
+                                 StringPiece* match,
+                                 int nmatch) {
+  // If full match, we ask for an anchored longest match
+  // and then check that match[0] == text.
+  // So make sure match[0] exists.
+  StringPiece sp0;
+  if (kind == kFullMatch) {
+    anchor = kAnchored;
+    if (nmatch < 1) {
+      match = &sp0;
+      nmatch = 1;
+    }
+  }
+
+  // Run the search.
+  Backtracker b(this);
+  bool anchored = anchor == kAnchored;
+  bool longest = kind != kFirstMatch;
+  if (!b.Search(text, context, anchored, longest, match, nmatch))
+    return false;
+  if (kind == kFullMatch && match[0].end() != text.end())
+    return false;
+  return true;
+}
+
+}  // namespace re2

diff --git a/re2/testing/charclass_test.cc b/re2/testing/charclass_test.cc
new file mode 100644
index 0000000..a3764d4
--- /dev/null
+++ b/re2/testing/charclass_test.cc

@@ -0,0 +1,223 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test character class manipulations.
+
+#include "util/test.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct CCTest {
+  struct {
+    Rune lo;
+    Rune hi;
+  } add[10];
+  int remove;
+  struct {
+    Rune lo;
+    Rune hi;
+  } final[10];
+};
+
+static CCTest tests[] = {
+  { { { 10, 20 }, {-1} }, -1,
+    { { 10, 20 }, {-1} } },
+
+  { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
+    { { 10, 30 }, {-1} } },
+
+  { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
+    { { 10, 40 }, {-1} } },
+
+  { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
+    { { 0, 50 }, {-1} } },
+
+  { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
+    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
+
+  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
+    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
+
+  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
+    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
+
+  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
+    { { 5, 25 }, {-1} } },
+
+  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
+    { { 10, 23 }, {-1} } },
+
+  // These check boundary cases during negation.
+  { { { 0, Runemax }, {-1} }, -1,
+    { { 0, Runemax }, {-1} } },
+
+  { { { 0, 50 }, {-1} }, -1,
+    { { 0, 50 }, {-1} } },
+
+  { { { 50, Runemax }, {-1} }, -1,
+    { { 50, Runemax }, {-1} } },
+
+  // Check RemoveAbove.
+  { { { 50, Runemax }, {-1} }, 255,
+    { { 50, 255 }, {-1} } },
+
+  { { { 50, Runemax }, {-1} }, 65535,
+    { { 50, 65535 }, {-1} } },
+
+  { { { 50, Runemax }, {-1} }, Runemax,
+    { { 50, Runemax }, {-1} } },
+
+  { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
+    { { 50, 60 }, { 250, 255 }, {-1} } },
+
+  { { { 50, 60 }, {-1} }, 255,
+    { { 50, 60 }, {-1} } },
+
+  { { { 350, 360 }, {-1} }, 255,
+    { {-1} } },
+
+  { { {-1} }, 255,
+    { {-1} } },
+};
+
+template<class CharClass>
+static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
+  if (t == NULL) {
+    printf("\t%s:", desc);
+  } else {
+    printf("\n");
+    printf("CharClass added: [%s]", desc);
+    for (int k = 0; t->add[k].lo >= 0; k++)
+      printf(" %d-%d", t->add[k].lo, t->add[k].hi);
+    printf("\n");
+    if (t->remove >= 0)
+      printf("Removed > %d\n", t->remove);
+    printf("\twant:");
+    for (int k = 0; t->final[k].lo >= 0; k++)
+      printf(" %d-%d", t->final[k].lo, t->final[k].hi);
+    printf("\n");
+    printf("\thave:");
+  }
+
+  for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
+    printf(" %d-%d", it->lo, it->hi);
+  printf("\n");
+}
+
+bool ShouldContain(CCTest *t, int x) {
+  for (int j = 0; t->final[j].lo >= 0; j++)
+    if (t->final[j].lo <= x && x <= t->final[j].hi)
+      return true;
+  return false;
+}
+
+// Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
+
+CharClass* Negate(CharClass *cc) {
+  return cc->Negate();
+}
+
+void Delete(CharClass* cc) {
+  cc->Delete();
+}
+
+CharClassBuilder* Negate(CharClassBuilder* cc) {
+  CharClassBuilder* ncc = cc->Copy();
+  ncc->Negate();
+  return ncc;
+}
+
+void Delete(CharClassBuilder* cc) {
+  delete cc;
+}
+
+template<class CharClass>
+bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
+  typename CharClass::iterator it = cc->begin();
+  int size = 0;
+  for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
+    if (it == cc->end() ||
+        it->lo != t->final[j].lo ||
+        it->hi != t->final[j].hi) {
+      Broke(desc, t, cc);
+      return false;
+    }
+    size += it->hi - it->lo + 1;
+  }
+  if (it != cc->end()) {
+    Broke(desc, t, cc);
+    return false;
+  }
+  if (cc->size() != size) {
+    Broke(desc, t, cc);
+    printf("wrong size: want %d have %d\n", size, cc->size());
+    return false;
+  }
+
+  for (int j = 0; j < 101; j++) {
+    if (j == 100)
+      j = Runemax;
+    if (ShouldContain(t, j) != cc->Contains(j)) {
+      Broke(desc, t, cc);
+      printf("want contains(%d)=%d, got %d\n",
+             j, ShouldContain(t, j), cc->Contains(j));
+      return false;
+    }
+  }
+
+  CharClass* ncc = Negate(cc);
+  for (int j = 0; j < 101; j++) {
+    if (j == 100)
+      j = Runemax;
+    if (ShouldContain(t, j) == ncc->Contains(j)) {
+      Broke(desc, t, cc);
+      Broke("ncc", NULL, ncc);
+      printf("want ncc contains(%d)!=%d, got %d\n",
+             j, ShouldContain(t, j), ncc->Contains(j));
+      Delete(ncc);
+      return false;
+    }
+    if (ncc->size() != Runemax+1 - cc->size()) {
+      Broke(desc, t, cc);
+      Broke("ncc", NULL, ncc);
+      printf("ncc size should be %d is %d\n",
+             Runemax+1 - cc->size(), ncc->size());
+      Delete(ncc);
+      return false;
+    }
+  }
+  Delete(ncc);
+  return true;
+}
+
+TEST(TestCharClassBuilder, Adds) {
+  int nfail = 0;
+  for (int i = 0; i < arraysize(tests); i++) {
+    CharClassBuilder ccb;
+    CCTest* t = &tests[i];
+    for (int j = 0; t->add[j].lo >= 0; j++)
+      ccb.AddRange(t->add[j].lo, t->add[j].hi);
+    if (t->remove >= 0)
+      ccb.RemoveAbove(t->remove);
+    if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
+      nfail++;
+    CharClass* cc = ccb.GetCharClass();
+    if (!CorrectCC(cc, t, "before copy (CharClass)"))
+      nfail++;
+    cc->Delete();
+
+    CharClassBuilder *ccb1 = ccb.Copy();
+    if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
+      nfail++;
+    cc = ccb.GetCharClass();
+    if (!CorrectCC(cc, t, "after copy (CharClass)"))
+      nfail++;
+    cc->Delete();
+    delete ccb1;
+  }
+  EXPECT_EQ(nfail, 0);
+}
+
+}  // namespace re2

diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc
new file mode 100644
index 0000000..8d92105
--- /dev/null
+++ b/re2/testing/compile_test.cc

@@ -0,0 +1,171 @@
+// Copyright 2007 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test prog.cc, compile.cc
+
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/regexp.h"
+#include "re2/prog.h"
+
+DEFINE_string(show, "", "regular expression to compile and dump");
+
+namespace re2 {
+
+// Simple input/output tests checking that
+// the regexp compiles to the expected code.
+// These are just to sanity check the basic implementation.
+// The real confidence tests happen by testing the NFA/DFA
+// that run the compiled code.
+
+struct Test {
+  const char* regexp;
+  const char* code;
+};
+
+static Test tests[] = {
+  { "a",
+    "1. byte [61-61] -> 2\n"
+    "2. match! 0\n" },
+  { "ab",
+    "1. byte [61-61] -> 2\n"
+    "2. byte [62-62] -> 3\n"
+    "3. match! 0\n" },
+  { "a|c",
+    "3. alt -> 1 | 2\n"
+    "1. byte [61-61] -> 4\n"
+    "2. byte [63-63] -> 4\n"
+    "4. match! 0\n" },
+  { "a|b",
+    "1. byte [61-62] -> 2\n"
+    "2. match! 0\n" },
+  { "[ab]",
+    "1. byte [61-62] -> 2\n"
+    "2. match! 0\n" },
+  { "a+",
+    "1. byte [61-61] -> 2\n"
+    "2. alt -> 1 | 3\n"
+    "3. match! 0\n" },
+  { "a+?",
+    "1. byte [61-61] -> 2\n"
+    "2. alt -> 3 | 1\n"
+    "3. match! 0\n" },
+  { "a*",
+    "2. alt -> 1 | 3\n"
+    "1. byte [61-61] -> 2\n"
+    "3. match! 0\n" },
+  { "a*?",
+    "2. alt -> 3 | 1\n"
+    "3. match! 0\n"
+    "1. byte [61-61] -> 2\n" },
+  { "a?",
+    "2. alt -> 1 | 3\n"
+    "1. byte [61-61] -> 3\n"
+    "3. match! 0\n" },
+  { "a??",
+    "2. alt -> 3 | 1\n"
+    "3. match! 0\n"
+    "1. byte [61-61] -> 3\n" },
+  { "a{4}",
+    "1. byte [61-61] -> 2\n"
+    "2. byte [61-61] -> 3\n"
+    "3. byte [61-61] -> 4\n"
+    "4. byte [61-61] -> 5\n"
+    "5. match! 0\n" },
+  { "(a)",
+    "2. capture 2 -> 1\n"
+    "1. byte [61-61] -> 3\n"
+    "3. capture 3 -> 4\n"
+    "4. match! 0\n" },
+  { "(?:a)",
+    "1. byte [61-61] -> 2\n"
+    "2. match! 0\n" },
+  { "",
+    "2. match! 0\n" },
+  { ".",
+    "3. alt -> 1 | 2\n"
+    "1. byte [00-09] -> 4\n"
+    "2. byte [0b-ff] -> 4\n"
+    "4. match! 0\n" },
+  { "[^ab]",
+    "5. alt -> 3 | 4\n"
+    "3. alt -> 1 | 2\n"
+    "4. byte [63-ff] -> 6\n"
+    "1. byte [00-09] -> 6\n"
+    "2. byte [0b-60] -> 6\n"
+    "6. match! 0\n" },
+  { "[Aa]",
+    "1. byte/i [61-61] -> 2\n"
+    "2. match! 0\n" },
+};
+
+TEST(TestRegexpCompileToProg, Simple) {
+  int failed = 0;
+  for (int i = 0; i < arraysize(tests); i++) {
+    const re2::Test& t = tests[i];
+    Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
+    if (re == NULL) {
+      LOG(ERROR) << "Cannot parse: " << t.regexp;
+      failed++;
+      continue;
+    }
+    Prog* prog = re->CompileToProg(0);
+    if (prog == NULL) {
+      LOG(ERROR) << "Cannot compile: " << t.regexp;
+      re->Decref();
+      failed++;
+      continue;
+    }
+    CHECK(re->CompileToProg(1) == NULL);
+    string s = prog->Dump();
+    if (s != t.code) {
+      LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
+      LOG(ERROR) << "Want:\n" << t.code;
+      LOG(ERROR) << "Got:\n" << s;
+      failed++;
+    }
+    delete prog;
+    re->Decref();
+  }
+  EXPECT_EQ(failed, 0);
+}
+
+// The distinct byte ranges involved in the UTF-8 dot ([^\n]).
+// Once, erroneously split between 0x3f and 0x40 because it is
+// a 6-bit boundary.
+static struct UTF8ByteRange {
+  int lo;
+  int hi;
+} utf8ranges[] = {
+  { 0x00, 0x09 },
+  { 0x0A, 0x0A },
+  { 0x10, 0x7F },
+  { 0x80, 0x8F },
+  { 0x90, 0x9F },
+  { 0xA0, 0xBF },
+  { 0xC0, 0xC1 },
+  { 0xC2, 0xDF },
+  { 0xE0, 0xE0 },
+  { 0xE1, 0xEF },
+  { 0xF0, 0xF0 },
+  { 0xF1, 0xF3 },
+  { 0xF4, 0xF4 },
+  { 0xF5, 0xFF },
+};
+
+TEST(TestCompile, ByteRanges) {
+  Regexp* re = Regexp::Parse(".", Regexp::PerlX, NULL);
+  EXPECT_TRUE(re != NULL);
+  Prog* prog = re->CompileToProg(0);
+  EXPECT_TRUE(prog != NULL);
+  EXPECT_EQ(prog->bytemap_range(), arraysize(utf8ranges));
+  for (int i = 0; i < arraysize(utf8ranges); i++)
+    for (int j = utf8ranges[i].lo; j <= utf8ranges[i].hi; j++)
+      EXPECT_EQ(prog->bytemap()[j], i) << " byte " << j;
+  delete prog;
+  re->Decref();
+}
+
+}  // namespace re2

diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
new file mode 100644
index 0000000..8e95ae4
--- /dev/null
+++ b/re2/testing/dfa_test.cc

@@ -0,0 +1,344 @@
+// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "util/test.h"
+#include "util/thread.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+#include "re2/testing/regexp_generator.h"
+#include "re2/testing/string_generator.h"
+
+DECLARE_bool(re2_dfa_bail_when_slow);
+
+DEFINE_int32(size, 8, "log2(number of DFA nodes)");
+DEFINE_int32(repeat, 2, "Repetition count.");
+DEFINE_int32(threads, 4, "number of threads");
+
+namespace re2 {
+
+// Check that multithreaded access to DFA class works.
+
+// Helper thread: builds entire DFA for prog.
+class BuildThread : public Thread {
+ public:
+  BuildThread(Prog* prog) : prog_(prog) {}
+  virtual void Run() {
+    CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch));
+  }
+
+ private:
+  Prog* prog_;
+};
+
+TEST(Multithreaded, BuildEntireDFA) {
+  // Create regexp with 2^FLAGS_size states in DFA.
+  string s = "a";
+  for (int i = 0; i < FLAGS_size; i++)
+    s += "[ab]";
+  s += "b";
+
+  // Check that single-threaded code works.
+  {
+    //LOG(INFO) << s;
+    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    BuildThread* t = new BuildThread(prog);
+    t->SetJoinable(true);
+    t->Start();
+    t->Join();
+    delete t;
+    delete prog;
+    re->Decref();
+  }
+
+  // Build the DFA simultaneously in a bunch of threads.
+  for (int i = 0; i < FLAGS_repeat; i++) {
+    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+
+    vector<BuildThread*> threads;
+    for (int j = 0; j < FLAGS_threads; j++) {
+      BuildThread *t = new BuildThread(prog);
+      t->SetJoinable(true);
+      threads.push_back(t);
+    }
+    for (int j = 0; j < FLAGS_threads; j++)
+      threads[j]->Start();
+    for (int j = 0; j < FLAGS_threads; j++) {
+      threads[j]->Join();
+      delete threads[j];
+    }
+
+    // One more compile, to make sure everything is okay.
+    prog->BuildEntireDFA(Prog::kFirstMatch);
+    delete prog;
+    re->Decref();
+  }
+}
+
+// Check that DFA size requirements are followed.
+// BuildEntireDFA will, like SearchDFA, stop building out
+// the DFA once the memory limits are reached.
+TEST(SingleThreaded, BuildEntireDFA) {
+  // Create regexp with 2^30 states in DFA.
+  string s = "a";
+  for (int i = 0; i < 30; i++)
+    s += "[ab]";
+  s += "b";
+
+  //LOG(INFO) << s;
+  Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
+  CHECK(re);
+  int max = 24;
+  for (int i = 17; i < max; i++) {
+    int limit = 1<<i;
+    int usage;
+    //int progusage, dfamem;
+    {
+      testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
+      Prog* prog = re->CompileToProg(limit);
+      CHECK(prog);
+      //progusage = m.HeapGrowth();
+      //dfamem = prog->dfa_mem();
+      prog->BuildEntireDFA(Prog::kFirstMatch);
+      prog->BuildEntireDFA(Prog::kLongestMatch);
+      usage = m.HeapGrowth();
+      delete prog;
+    }
+    if (!UsingMallocCounter)
+      continue;
+    //LOG(INFO) << StringPrintf("Limit %d: prog used %d, DFA budget %d, total %d\n",
+    //                          limit, progusage, dfamem, usage);
+    CHECK_GT(usage, limit*9/10);
+    CHECK_LT(usage, limit + (16<<10));  // 16kB of slop okay
+  }
+  re->Decref();
+}
+
+// Generates and returns a string over binary alphabet {0,1} that contains
+// all possible binary sequences of length n as subsequences.  The obvious
+// brute force method would generate a string of length n * 2^n, but this
+// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
+// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
+// Such a string is useful for testing a DFA.  If you have a DFA
+// where distinct last n bytes implies distinct states, then running on a
+// DeBruijn string causes the DFA to need to create a new state at every
+// position in the input, never reusing any states until it gets to the
+// end of the string.  This is the worst possible case for DFA execution.
+static string DeBruijnString(int n) {
+  CHECK_LT(n, 8*sizeof(int));
+  CHECK_GT(n, 0);
+
+  vector<bool> did(1<<n);
+  for (int i = 0; i < 1<<n; i++)
+    did[i] = false;
+
+  string s;
+  for (int i = 0; i < n-1; i++)
+    s.append("0");
+  int bits = 0;
+  int mask = (1<<n) - 1;
+  for (int i = 0; i < (1<<n); i++) {
+    bits <<= 1;
+    bits &= mask;
+    if (!did[bits|1]) {
+      bits |= 1;
+      s.append("1");
+    } else {
+      s.append("0");
+    }
+    CHECK(!did[bits]);
+    did[bits] = true;
+  }
+  return s;
+}
+
+// Test that the DFA gets the right result even if it runs
+// out of memory during a search.  The regular expression
+// 0[01]{n}$ matches a binary string of 0s and 1s only if
+// the (n+1)th-to-last character is a 0.  Matching this in
+// a single forward pass (as done by the DFA) requires
+// keeping one bit for each of the last n+1 characters
+// (whether each was a 0), or 2^(n+1) possible states.
+// If we run this regexp to search in a string that contains
+// every possible n-character binary string as a substring,
+// then it will have to run through at least 2^n states.
+// States are big data structures -- certainly more than 1 byte --
+// so if the DFA can search correctly while staying within a
+// 2^n byte limit, it must be handling out-of-memory conditions
+// gracefully.
+TEST(SingleThreaded, SearchDFA) {
+  // Choice of n is mostly arbitrary, except that:
+  //   * making n too big makes the test run for too long.
+  //   * making n too small makes the DFA refuse to run,
+  //     because it has so little memory compared to the program size.
+  // Empirically, n = 18 is a good compromise between the two.
+  const int n = 18;
+
+  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
+                             Regexp::LikePerl, NULL);
+  CHECK(re);
+
+  // The De Bruijn string for n ends with a 1 followed by n 0s in a row,
+  // which is not a match for 0[01]{n}$.  Adding one more 0 is a match.
+  string no_match = DeBruijnString(n);
+  string match = no_match + "0";
+
+  // The De Bruijn string is the worst case input for this regexp.
+  // By default, the DFA will notice that it is flushing its cache
+  // too frequently and will bail out early, so that RE2 can use the
+  // NFA implementation instead.  (The DFA loses its speed advantage
+  // if it can't get a good cache hit rate.)
+  // Tell the DFA to trudge along instead.
+  FLAGS_re2_dfa_bail_when_slow = false;
+
+  int64 usage;
+  int64 peak_usage;
+  {
+    testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
+    Prog* prog = re->CompileToProg(1<<n);
+    CHECK(prog);
+    for (int i = 0; i < 10; i++) {
+      bool matched, failed = false;
+      matched = prog->SearchDFA(match, NULL,
+                                Prog::kUnanchored, Prog::kFirstMatch,
+                                NULL, &failed, NULL);
+      CHECK(!failed);
+      CHECK(matched);
+      matched = prog->SearchDFA(no_match, NULL,
+                                Prog::kUnanchored, Prog::kFirstMatch,
+                                NULL, &failed, NULL);
+      CHECK(!failed);
+      CHECK(!matched);
+    }
+    usage = m.HeapGrowth();
+    peak_usage = m.PeakHeapGrowth();
+    delete prog;
+  }
+  re->Decref();
+
+  if (!UsingMallocCounter)
+    return;
+  //LOG(INFO) << "usage " << usage << " " << peak_usage;
+  CHECK_LT(usage, 1<<n);
+  CHECK_LT(peak_usage, 1<<n);
+}
+
+// Helper thread: searches for match, which should match,
+// and no_match, which should not.
+class SearchThread : public Thread {
+ public:
+  SearchThread(Prog* prog, const StringPiece& match,
+               const StringPiece& no_match)
+    : prog_(prog), match_(match), no_match_(no_match) {}
+
+  virtual void Run() {
+    for (int i = 0; i < 2; i++) {
+      bool matched, failed = false;
+      matched = prog_->SearchDFA(match_, NULL,
+                                 Prog::kUnanchored, Prog::kFirstMatch,
+                                 NULL, &failed, NULL);
+      CHECK(!failed);
+      CHECK(matched);
+      matched = prog_->SearchDFA(no_match_, NULL,
+                                 Prog::kUnanchored, Prog::kFirstMatch,
+                                 NULL, &failed, NULL);
+      CHECK(!failed);
+      CHECK(!matched);
+    }
+  }
+
+ private:
+  Prog* prog_;
+  StringPiece match_;
+  StringPiece no_match_;
+};
+
+TEST(Multithreaded, SearchDFA) {
+  // Same as single-threaded test above.
+  const int n = 18;
+  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
+                             Regexp::LikePerl, NULL);
+  CHECK(re);
+  string no_match = DeBruijnString(n);
+  string match = no_match + "0";
+  FLAGS_re2_dfa_bail_when_slow = false;
+
+  // Check that single-threaded code works.
+  {
+    Prog* prog = re->CompileToProg(1<<n);
+    CHECK(prog);
+    SearchThread* t = new SearchThread(prog, match, no_match);
+    t->SetJoinable(true);
+    t->Start();
+    t->Join();
+    delete t;
+    delete prog;
+  }
+
+  // Run the search simultaneously in a bunch of threads.
+  // Reuse same flags for Multithreaded.BuildDFA above.
+  for (int i = 0; i < FLAGS_repeat; i++) {
+    //LOG(INFO) << "Search " << i;
+    Prog* prog = re->CompileToProg(1<<n);
+    CHECK(prog);
+
+    vector<SearchThread*> threads;
+    for (int j = 0; j < FLAGS_threads; j++) {
+      SearchThread *t = new SearchThread(prog, match, no_match);
+      t->SetJoinable(true);
+      threads.push_back(t);
+    }
+    for (int j = 0; j < FLAGS_threads; j++)
+      threads[j]->Start();
+    for (int j = 0; j < FLAGS_threads; j++) {
+      threads[j]->Join();
+      delete threads[j];
+    }
+    delete prog;
+  }
+  re->Decref();
+}
+
+struct ReverseTest {
+  const char *regexp;
+  const char *text;
+  bool match;
+};
+
+// Test that reverse DFA handles anchored/unanchored correctly.
+// It's in the DFA interface but not used by RE2.
+ReverseTest reverse_tests[] = {
+  { "\\A(a|b)", "abc", true },
+  { "(a|b)\\z", "cba", true },
+  { "\\A(a|b)", "cba", false },
+  { "(a|b)\\z", "abc", false },
+};
+
+TEST(DFA, ReverseMatch) {
+  int nfail = 0;
+  for (int i = 0; i < arraysize(reverse_tests); i++) {
+    const ReverseTest& t = reverse_tests[i];
+    Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog *prog = re->CompileToReverseProg(0);
+    CHECK(prog);
+    bool failed = false;
+    bool matched = prog->SearchDFA(t.text, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL);
+    if (matched != t.match) {
+      LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
+      nfail++;
+    }
+    delete prog;
+    re->Decref();
+  }
+  EXPECT_EQ(nfail, 0);
+}
+
+}  // namespace re2

diff --git a/re2/testing/dump.cc b/re2/testing/dump.cc
new file mode 100644
index 0000000..4bdf714
--- /dev/null
+++ b/re2/testing/dump.cc

@@ -0,0 +1,164 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Dump the regexp into a string showing structure.
+// Tested by parse_unittest.cc
+
+// This function traverses the regexp recursively,
+// meaning that on inputs like Regexp::Simplify of
+// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
+// it takes time and space exponential in the size of the
+// original regular expression.  It can also use stack space
+// linear in the size of the regular expression for inputs
+// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
+// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
+// As a result, Dump is provided only in the testing
+// library (see BUILD).
+
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/stringpiece.h"
+#include "re2/regexp.h"
+
+// Cause a link error if this file is used outside of testing.
+DECLARE_string(test_tmpdir);
+
+namespace re2 {
+
+static const char* kOpcodeNames[] = {
+  "bad",
+  "no",
+  "emp",
+  "lit",
+  "str",
+  "cat",
+  "alt",
+  "star",
+  "plus",
+  "que",
+  "rep",
+  "cap",
+  "dot",
+  "byte",
+  "bol",
+  "eol",
+  "wb",   // kRegexpWordBoundary
+  "nwb",  // kRegexpNoWordBoundary
+  "bot",
+  "eot",
+  "cc",
+  "match",
+};
+
+// Create string representation of regexp with explicit structure.
+// Nothing pretty, just for testing.
+static void DumpRegexpAppending(Regexp* re, string* s) {
+  if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
+    StringAppendF(s, "op%d", re->op());
+  } else {
+    switch (re->op()) {
+      default:
+        break;
+      case kRegexpStar:
+      case kRegexpPlus:
+      case kRegexpQuest:
+      case kRegexpRepeat:
+        if (re->parse_flags() & Regexp::NonGreedy)
+          s->append("n");
+        break;
+    }
+    s->append(kOpcodeNames[re->op()]);
+    if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
+      Rune r = re->rune();
+      if ('a' <= r && r <= 'z')
+        s->append("fold");
+    }
+    if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
+      for (int i = 0; i < re->nrunes(); i++) {
+        Rune r = re->runes()[i];
+        if ('a' <= r && r <= 'z') {
+          s->append("fold");
+          break;
+        }
+      }
+    }
+  }
+  s->append("{");
+  switch (re->op()) {
+    default:
+      break;
+    case kRegexpEndText:
+      if (!(re->parse_flags() & Regexp::WasDollar)) {
+        s->append("\\z");
+      }
+      break;
+    case kRegexpLiteral: {
+      Rune r = re->rune();
+      char buf[UTFmax+1];
+      buf[runetochar(buf, &r)] = 0;
+      s->append(buf);
+      break;
+    }
+    case kRegexpLiteralString:
+      for (int i = 0; i < re->nrunes(); i++) {
+        Rune r = re->runes()[i];
+        char buf[UTFmax+1];
+        buf[runetochar(buf, &r)] = 0;
+        s->append(buf);
+      }
+      break;
+    case kRegexpConcat:
+    case kRegexpAlternate:
+      for (int i = 0; i < re->nsub(); i++)
+        DumpRegexpAppending(re->sub()[i], s);
+      break;
+    case kRegexpStar:
+    case kRegexpPlus:
+    case kRegexpQuest:
+      DumpRegexpAppending(re->sub()[0], s);
+      break;
+    case kRegexpCapture:
+      if (re->name()) {
+        s->append(*re->name());
+        s->append(":");
+      }
+      DumpRegexpAppending(re->sub()[0], s);
+      break;
+    case kRegexpRepeat:
+      s->append(StringPrintf("%d,%d ", re->min(), re->max()));
+      DumpRegexpAppending(re->sub()[0], s);
+      break;
+    case kRegexpCharClass: {
+      string sep;
+      for (CharClass::iterator it = re->cc()->begin();
+           it != re->cc()->end(); ++it) {
+        RuneRange rr = *it;
+        s->append(sep);
+        if (rr.lo == rr.hi)
+          s->append(StringPrintf("%#x", rr.lo));
+        else
+          s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
+        sep = " ";
+      }
+      break;
+    }
+  }
+  s->append("}");
+}
+
+string Regexp::Dump() {
+  string s;
+
+  // Make sure being called from a unit test.
+  if (FLAGS_test_tmpdir.empty()) {
+    LOG(ERROR) << "Cannot use except for testing.";
+    return s;
+  }
+
+  DumpRegexpAppending(this, &s);
+  return s;
+}
+
+}  // namespace re2

diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc
new file mode 100644
index 0000000..9e057cc
--- /dev/null
+++ b/re2/testing/exhaustive1_test.cc

@@ -0,0 +1,42 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Exhaustive testing of regular expression matching.
+
+#include "util/test.h"
+#include "re2/testing/exhaustive_tester.h"
+
+DECLARE_string(regexp_engines);
+
+namespace re2 {
+
+// Test simple repetition operators
+TEST(Repetition, Simple) {
+  vector<string> ops = Split(" ",
+    "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "
+    "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "
+    "%s* %s+ %s? %s*? %s+? %s??");
+  ExhaustiveTest(3, 2, Explode("abc."), ops,
+                 6, Explode("ab"), "(?:%s)", "");
+  ExhaustiveTest(3, 2, Explode("abc."), ops,
+                 40, Explode("a"), "(?:%s)", "");
+}
+
+// Test capturing parens -- (a) -- inside repetition operators
+TEST(Repetition, Capturing) {
+  vector<string> ops = Split(" ",
+    "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "
+    "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "
+    "%s* %s+ %s? %s*? %s+? %s??");
+  ExhaustiveTest(3, 2, Split(" ", "a (a) b"), ops,
+                 7, Explode("ab"), "(?:%s)", "");
+
+  // This would be a great test, but it runs forever when PCRE is enabled.
+  if (strstr("PCRE", FLAGS_regexp_engines.c_str()) == NULL)
+    ExhaustiveTest(4, 3, Split(" ", "a (a)"), ops,
+                   100, Explode("a"), "(?:%s)", "");
+}
+
+}  // namespace re2
+

diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc
new file mode 100644
index 0000000..c5fec5b
--- /dev/null
+++ b/re2/testing/exhaustive2_test.cc

@@ -0,0 +1,70 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Exhaustive testing of regular expression matching.
+
+#include "util/test.h"
+#include "re2/re2.h"
+#include "re2/testing/exhaustive_tester.h"
+
+DECLARE_string(regexp_engines);
+
+namespace re2 {
+
+// Test empty string matches (aka "(?:)")
+TEST(EmptyString, Exhaustive) {
+  ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
+                 RegexpGenerator::EgrepOps(),
+                 5, Split("", "ab"), "", "");
+}
+
+// Test escaped versions of regexp syntax.
+TEST(Punctuation, Literals) {
+  vector<string> alphabet = Explode("()*+?{}[]\\^$.");
+  vector<string> escaped = alphabet;
+  for (int i = 0; i < escaped.size(); i++)
+    escaped[i] = "\\" + escaped[i];
+  ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
+                 2, alphabet, "", "");
+}
+
+// Test ^ $ . \A \z in presence of line endings.
+// Have to wrap the empty-width ones in (?:) so that
+// they can be repeated -- PCRE rejects ^* but allows (?:^)*
+TEST(LineEnds, Exhaustive) {
+  ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
+                 RegexpGenerator::EgrepOps(),
+                 4, Explode("ab\n"), "", "");
+}
+
+// Test what does and does not match \n.
+// This would be a good test, except that PCRE seems to have a bug:
+// in single-byte character set mode (the default),
+// [^a] matches \n, but in UTF-8 mode it does not.
+// So when we run the test, the tester complains that
+// we don't agree with PCRE, but it's PCRE that is at fault.
+// For what it's worth, Perl gets this right (matches
+// regardless of whether UTF-8 input is selected):
+//
+//     #!/usr/bin/perl
+//     use POSIX qw(locale_h);
+//     print "matches in latin1\n" if "\n" =~ /[^a]/;
+//     setlocale("en_US.utf8");
+//     print "matches in utf8\n" if "\n" =~ /[^a]/;
+//
+// The rule chosen for RE2 is that by default, like Perl,
+// dot does not match \n but negated character classes [^a] do.
+// (?s) will allow dot to match \n; there is no way in RE2
+// to stop [^a] from matching \n, though the underlying library
+// provides a mechanism, and RE2 could add new syntax if needed.
+//
+// TEST(Newlines, Exhaustive) {
+//   vector<string> empty_vector;
+//   ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
+//                  RegexpGenerator::EgrepOps(),
+//                  4, Explode("a\n"), "");
+// }
+
+}  // namespace re2
+

diff --git a/re2/testing/exhaustive3_test.cc b/re2/testing/exhaustive3_test.cc
new file mode 100644
index 0000000..5613fcb
--- /dev/null
+++ b/re2/testing/exhaustive3_test.cc

@@ -0,0 +1,94 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Exhaustive testing of regular expression matching.
+
+#include "util/test.h"
+#include "re2/testing/exhaustive_tester.h"
+
+namespace re2 {
+
+// Test simple character classes by themselves.
+TEST(CharacterClasses, Exhaustive) {
+  vector<string> atoms = Split(" ",
+    "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
+  ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
+                 5, Explode("ab"), "", "");
+}
+
+// Test simple character classes inside a___b (for example, a[a]b).
+TEST(CharacterClasses, ExhaustiveAB) {
+  vector<string> atoms = Split(" ",
+    "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
+  ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
+                 5, Explode("ab"), "a%sb", "");
+}
+
+// Returns UTF8 for Rune r
+static string UTF8(Rune r) {
+  char buf[UTFmax+1];
+  buf[runetochar(buf, &r)] = 0;
+  return string(buf);
+}
+
+// Returns a vector of "interesting" UTF8 characters.
+// Unicode is now too big to just return all of them,
+// so UTF8Characters return a set likely to be good test cases.
+static const vector<string>& InterestingUTF8() {
+  static bool init;
+  static vector<string> v;
+
+  if (init)
+    return v;
+
+  init = true;
+  // All the Latin1 equivalents are interesting.
+  for (int i = 1; i < 256; i++)
+    v.push_back(UTF8(i));
+
+  // After that, the codes near bit boundaries are
+  // interesting, because they span byte sequence lengths.
+  for (int j = 0; j < 8; j++)
+    v.push_back(UTF8(256 + j));
+  for (int i = 512; i < Runemax; i <<= 1)
+    for (int j = -8; j < 8; j++)
+      v.push_back(UTF8(i + j));
+
+  // The codes near Runemax, including Runemax itself, are interesting.
+  for (int j = -8; j <= 0; j++)
+    v.push_back(UTF8(Runemax + j));
+
+  return v;
+}
+
+// Test interesting UTF-8 characters against character classes.
+TEST(InterestingUTF8, SingleOps) {
+  vector<string> atoms = Split(" ",
+    ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
+    "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
+    "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
+    "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
+  vector<string> ops;  // no ops
+  ExhaustiveTest(1, 0, atoms, ops,
+                 1, InterestingUTF8(), "", "");
+}
+
+// Test interesting UTF-8 characters against character classes,
+// but wrap everything inside AB.
+TEST(InterestingUTF8, AB) {
+  vector<string> atoms = Split(" ",
+    ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
+    "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
+    "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
+    "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
+  vector<string> ops;  // no ops
+  vector<string> alpha = InterestingUTF8();
+  for (int i = 0; i < alpha.size(); i++)
+    alpha[i] = "a" + alpha[i] + "b";
+  ExhaustiveTest(1, 0, atoms, ops,
+                 1, alpha, "a%sb", "");
+}
+
+}  // namespace re2
+

diff --git a/re2/testing/exhaustive_test.cc b/re2/testing/exhaustive_test.cc
new file mode 100644
index 0000000..fc40dee
--- /dev/null
+++ b/re2/testing/exhaustive_test.cc

@@ -0,0 +1,38 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Exhaustive testing of regular expression matching.
+
+#include "util/test.h"
+#include "re2/testing/exhaustive_tester.h"
+
+namespace re2 {
+
+DECLARE_string(regexp_engines);
+
+// Test very simple expressions.
+TEST(EgrepLiterals, Lowercase) {
+  EgrepTest(3, 2, "abc.", 3, "abc", "");
+}
+
+// Test mixed-case expressions.
+TEST(EgrepLiterals, MixedCase) {
+  EgrepTest(3, 2, "AaBb.", 2, "AaBb", "");
+}
+
+// Test mixed-case in case-insensitive mode.
+TEST(EgrepLiterals, FoldCase) {
+  // The punctuation characters surround A-Z and a-z
+  // in the ASCII table.  This looks for bugs in the
+  // bytemap range code in the DFA.
+  EgrepTest(3, 2, "abAB.", 2, "aBc@_~", "(?i:%s)");
+}
+
+// Test very simple expressions.
+TEST(EgrepLiterals, UTF8) {
+  EgrepTest(3, 2, "ab.", 4, "a\xE2\x98\xBA", "");
+}
+
+}  // namespace re2
+

diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc
new file mode 100644
index 0000000..54de857
--- /dev/null
+++ b/re2/testing/exhaustive_tester.cc

@@ -0,0 +1,188 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Exhaustive testing of regular expression matching.
+
+// Each test picks an alphabet (e.g., "abc"), a maximum string length,
+// a maximum regular expression length, and a maximum number of letters
+// that can appear in the regular expression.  Given these parameters,
+// it tries every possible regular expression and string, verifying that
+// the NFA, DFA, and a trivial backtracking implementation agree about
+// the location of the match.
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef LOGGING
+#define LOGGING 0
+#endif
+
+#include "util/test.h"
+#include "re2/testing/exhaustive_tester.h"
+#include "re2/testing/tester.h"
+
+DEFINE_bool(show_regexps, false, "show regexps during testing");
+
+DEFINE_int32(max_bad_regexp_inputs, 1,
+             "Stop testing a regular expression after finding this many "
+             "strings that break it.");
+
+// Compiled in debug mode, the usual tests run for over an hour.
+// Have to cut it down to make the unit test machines happy.
+DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
+
+namespace re2 {
+
+static char* escape(const StringPiece& sp) {
+  static char buf[512];
+  char* p = buf;
+  *p++ = '\"';
+  for (int i = 0; i < sp.size(); i++) {
+    if(p+5 >= buf+sizeof buf)
+      LOG(FATAL) << "ExhaustiveTester escape: too long";
+    if(sp[i] == '\\' || sp[i] == '\"') {
+      *p++ = '\\';
+      *p++ = sp[i];
+    } else if(sp[i] == '\n') {
+      *p++ = '\\';
+      *p++ = 'n';
+    } else {
+      *p++ = sp[i];
+    }
+  }
+  *p++ = '\"';
+  *p = '\0';
+  return buf;
+}
+
+static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
+  if (!re.Match(input, 0, input.size(), anchor, m, n)) {
+    printf("-");
+    return;
+  }
+  for (int i = 0; i < n; i++) {
+    if (i > 0)
+      printf(" ");
+    if (m[i].begin() == NULL)
+      printf("-");
+    else
+      printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
+  }
+}
+	
+// Processes a single generated regexp.
+// Compiles it using Regexp interface and PCRE, and then
+// checks that NFA, DFA, and PCRE all return the same results.
+void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
+  regexps_++;
+  string regexp = const_regexp;
+  if (!topwrapper_.empty())
+    regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
+
+  if (FLAGS_show_regexps) {
+    printf("\r%s", regexp.c_str());
+    fflush(stdout);
+  }
+
+  if (LOGGING) {
+    // Write out test cases and answers for use in testing
+    // other implementations, such as Go's regexp package.
+    if (randomstrings_)
+      LOG(ERROR) << "Cannot log with random strings.";
+    if (regexps_ == 1) {  // first
+      printf("strings\n");
+      strgen_.Reset();
+      while (strgen_.HasNext())
+        printf("%s\n", escape(strgen_.Next()));
+      printf("regexps\n");
+    }
+    printf("%s\n", escape(regexp));
+
+    RE2 re(regexp);
+    RE2::Options longest;
+    longest.set_longest_match(true);
+    RE2 relongest(regexp, longest);
+    int ngroup = re.NumberOfCapturingGroups()+1;
+    StringPiece* group = new StringPiece[ngroup];
+
+    strgen_.Reset();
+    while (strgen_.HasNext()) {
+      StringPiece input = strgen_.Next();
+      PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
+      printf(";");
+      PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
+      printf(";");
+      PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
+      printf(";");
+      PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
+      printf("\n");
+    }
+    delete[] group;
+    return;
+  }
+
+  Tester tester(regexp);
+  if (tester.error())
+    return;
+
+  strgen_.Reset();
+  strgen_.GenerateNULL();
+  if (randomstrings_)
+    strgen_.Random(stringseed_, stringcount_);
+  int bad_inputs = 0;
+  while (strgen_.HasNext()) {
+    tests_++;
+    if (!tester.TestInput(strgen_.Next())) {
+      failures_++;
+      if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
+        break;
+    }
+  }
+}
+
+// Runs an exhaustive test on the given parameters.
+void ExhaustiveTest(int maxatoms, int maxops,
+                    const vector<string>& alphabet,
+                    const vector<string>& ops,
+                    int maxstrlen, const vector<string>& stralphabet,
+                    const string& wrapper,
+                    const string& topwrapper) {
+  if (DEBUG_MODE && FLAGS_quick_debug_mode) {
+    if (maxatoms > 1)
+      maxatoms--;
+    if (maxops > 1)
+      maxops--;
+    if (maxstrlen > 1)
+      maxstrlen--;
+  }
+  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
+                     maxstrlen, stralphabet, wrapper,
+                     topwrapper);
+  t.Generate();
+  if (!LOGGING) {
+    printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
+           t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
+  }
+  EXPECT_EQ(0, t.failures());
+}
+
+// Runs an exhaustive test using the given parameters and
+// the basic egrep operators.
+void EgrepTest(int maxatoms, int maxops, const string& alphabet,
+               int maxstrlen, const string& stralphabet,
+               const string& wrapper) {
+  const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
+
+  for (int i = 0; i < arraysize(tops); i++) {
+    ExhaustiveTest(maxatoms, maxops,
+                   Split("", alphabet),
+                   RegexpGenerator::EgrepOps(),
+                   maxstrlen,
+                   Split("", stralphabet),
+                   wrapper,
+                   tops[i]);
+  }
+}
+
+}  // namespace re2

diff --git a/re2/testing/exhaustive_tester.h b/re2/testing/exhaustive_tester.h
new file mode 100644
index 0000000..38a139f
--- /dev/null
+++ b/re2/testing/exhaustive_tester.h

@@ -0,0 +1,85 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_TESTING_EXHAUSTIVE_TESTER_H__
+#define RE2_TESTING_EXHAUSTIVE_TESTER_H__
+
+#include <string>
+#include <vector>
+#include "util/util.h"
+#include "re2/testing/regexp_generator.h"
+#include "re2/testing/string_generator.h"
+
+namespace re2 {
+
+// Exhaustive regular expression test: generate all regexps within parameters,
+// then generate all strings of a given length over a given alphabet,
+// then check that NFA, DFA, and PCRE agree about whether each regexp matches
+// each possible string, and if so, where the match is.
+//
+// Can also be used in a "random" mode that generates a given number
+// of random regexp and strings, allowing testing of larger expressions
+// and inputs.
+class ExhaustiveTester : public RegexpGenerator {
+ public:
+  ExhaustiveTester(int maxatoms,
+                   int maxops,
+                   const vector<string>& alphabet,
+                   const vector<string>& ops,
+                   int maxstrlen,
+                   const vector<string>& stralphabet,
+                   const string& wrapper,
+                   const string& topwrapper)
+    : RegexpGenerator(maxatoms, maxops, alphabet, ops),
+      strgen_(maxstrlen, stralphabet),
+      wrapper_(wrapper),
+      topwrapper_(topwrapper),
+      regexps_(0), tests_(0), failures_(0),
+      randomstrings_(0), stringseed_(0), stringcount_(0)  { }
+
+  int regexps()  { return regexps_; }
+  int tests()    { return tests_; }
+  int failures() { return failures_; }
+
+  // Needed for RegexpGenerator interface.
+  void HandleRegexp(const string& regexp);
+
+  // Causes testing to generate random input strings.
+  void RandomStrings(int32 seed, int32 count) {
+    randomstrings_ = true;
+    stringseed_ = seed;
+    stringcount_ = count;
+  }
+
+ private:
+  StringGenerator strgen_;
+  string wrapper_;      // Regexp wrapper - either empty or has one %s.
+  string topwrapper_;   // Regexp top-level wrapper.
+  int regexps_;   // Number of HandleRegexp calls
+  int tests_;     // Number of regexp tests.
+  int failures_;  // Number of tests failed.
+
+  bool randomstrings_;  // Whether to use random strings
+  int32 stringseed_;    // If so, the seed.
+  int stringcount_;     // If so, how many to generate.
+  DISALLOW_EVIL_CONSTRUCTORS(ExhaustiveTester);
+};
+
+// Runs an exhaustive test on the given parameters.
+void ExhaustiveTest(int maxatoms, int maxops,
+                    const vector<string>& alphabet,
+                    const vector<string>& ops,
+                    int maxstrlen, const vector<string>& stralphabet,
+                    const string& wrapper,
+                    const string& topwrapper);
+
+// Runs an exhaustive test using the given parameters and
+// the basic egrep operators.
+void EgrepTest(int maxatoms, int maxops, const string& alphabet,
+               int maxstrlen, const string& stralphabet,
+               const string& wrapper);
+
+}  // namespace re2
+
+#endif  // RE2_TESTING_EXHAUSTIVE_TESTER_H__

diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc
new file mode 100644
index 0000000..e3a0dd1
--- /dev/null
+++ b/re2/testing/filtered_re2_test.cc

@@ -0,0 +1,275 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "util/test.h"
+#include "re2/filtered_re2.h"
+#include "re2/re2.h"
+
+DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc
+
+namespace re2 {
+
+struct FilterTestVars {
+  vector<string> atoms;
+  vector<int> atom_indices;
+  vector<int> matches;
+  RE2::Options opts;
+  FilteredRE2 f;
+};
+
+TEST(FilteredRE2Test, EmptyTest) {
+  FilterTestVars v;
+  v.f.AllMatches("foo", v.atom_indices, &v.matches);
+  EXPECT_EQ(0, v.matches.size());
+}
+
+TEST(FilteredRE2Test, SmallOrTest) {
+  FLAGS_filtered_re2_min_atom_len = 4;
+
+  FilterTestVars v;
+  int id;
+  v.f.Add("(foo|bar)", v.opts, &id);
+
+  v.f.Compile(&v.atoms);
+  EXPECT_EQ(0, v.atoms.size());
+
+  v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
+  EXPECT_EQ(1, v.matches.size());
+  EXPECT_EQ(id, v.matches[0]);
+}
+
+TEST(FilteredRE2Test, SmallLatinTest) {
+  FLAGS_filtered_re2_min_atom_len = 3;
+  FilterTestVars v;
+  int id;
+
+  v.opts.set_utf8(false);
+  v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id);
+  v.f.Compile(&v.atoms);
+  EXPECT_EQ(1, v.atoms.size());
+  EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef");
+
+  v.atom_indices.push_back(0);
+  v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches);
+  EXPECT_EQ(1, v.matches.size());
+  EXPECT_EQ(id, v.matches[0]);
+}
+
+struct AtomTest {
+  const char* testname;
+  // If any test needs more than this many regexps or atoms, increase
+  // the size of the corresponding array.
+  const char* regexps[20];
+  const char* atoms[20];
+};
+
+AtomTest atom_tests[] = {
+  {
+    // This test checks to make sure empty patterns are allowed.
+    "CheckEmptyPattern",
+    {""},
+    {}
+  }, {
+    // This test checks that all atoms of length greater than min length
+    // are found, and no atoms that are of smaller length are found.
+    "AllAtomsGtMinLengthFound", {
+      "(abc123|def456|ghi789).*mnop[x-z]+",
+      "abc..yyy..zz",
+      "mnmnpp[a-z]+PPP"
+    }, {
+      "abc123",
+      "def456",
+      "ghi789",
+      "mnop",
+      "abc",
+      "yyy",
+      "mnmnpp",
+      "ppp"
+    }
+  }, {
+    // Test to make sure that any atoms that have another atom as a
+    // substring in an OR are removed; that is, only the shortest
+    // substring is kept.
+    "SubstrAtomRemovesSuperStrInOr", {
+      "(abc123|abc|ghi789|abc1234).*[x-z]+",
+      "abcd..yyy..yyyzzz",
+      "mnmnpp[a-z]+PPP"
+    }, {
+      "abc",
+      "ghi789",
+      "abcd",
+      "yyy",
+      "yyyzzz",
+      "mnmnpp",
+      "ppp"
+    }
+  }, {
+    // Test character class expansion.
+    "CharClassExpansion", {
+      "m[a-c][d-f]n.*[x-z]+",
+      "[x-y]bcde[ab]"
+    }, {
+      "madn", "maen", "mafn",
+      "mbdn", "mben", "mbfn",
+      "mcdn", "mcen", "mcfn",
+      "xbcdea", "xbcdeb",
+      "ybcdea", "ybcdeb"
+    }
+  }, {
+    // Test upper/lower of non-ASCII.
+    "UnicodeLower", {
+      "(?i)ΔδΠϖπΣςσ",
+      "ΛΜΝΟΠ",
+      "ψρστυ",
+    }, {
+      "δδπππσσσ",
+      "λμνοπ",
+      "ψρστυ",
+    },
+  },
+};
+
+void AddRegexpsAndCompile(const char* regexps[],
+                          int n,
+                          struct FilterTestVars* v) {
+  for (int i = 0; i < n; i++) {
+    int id;
+    v->f.Add(regexps[i], v->opts, &id);
+  }
+  v->f.Compile(&v->atoms);
+}
+
+bool CheckExpectedAtoms(const char* atoms[],
+                        int n,
+                        const char* testname,
+                        struct FilterTestVars* v) {
+  vector<string> expected;
+  for (int i = 0; i < n; i++)
+    expected.push_back(atoms[i]);
+
+  bool pass = expected.size() == v->atoms.size();
+
+  sort(v->atoms.begin(), v->atoms.end());
+  sort(expected.begin(), expected.end());
+  for (int i = 0; pass && i < n; i++)
+      pass = pass && expected[i] == v->atoms[i];
+
+  if (!pass) {
+    LOG(WARNING) << "Failed " << testname;
+    LOG(WARNING) << "Expected #atoms = " << expected.size();
+    for (int i = 0; i < expected.size(); i++)
+      LOG(WARNING) << expected[i];
+    LOG(WARNING) << "Found #atoms = " << v->atoms.size();
+    for (int i = 0; i < v->atoms.size(); i++)
+      LOG(WARNING) << v->atoms[i];
+  }
+
+  return pass;
+}
+
+TEST(FilteredRE2Test, AtomTests) {
+  FLAGS_filtered_re2_min_atom_len = 3;
+
+  int nfail = 0;
+  for (int i = 0; i < arraysize(atom_tests); i++) {
+    FilterTestVars v;
+    AtomTest* t = &atom_tests[i];
+    int natom, nregexp;
+    for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+      if (t->regexps[nregexp] == NULL)
+        break;
+    for (natom = 0; natom < arraysize(t->atoms); natom++)
+      if (t->atoms[natom] == NULL)
+        break;
+    AddRegexpsAndCompile(t->regexps, nregexp, &v);
+    if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v))
+      nfail++;
+  }
+  EXPECT_EQ(0, nfail);
+}
+
+void FindAtomIndices(const vector<string> atoms,
+                     const vector<string> matched_atoms,
+                     vector<int>* atom_indices) {
+  atom_indices->clear();
+  for (int i = 0; i < matched_atoms.size(); i++) {
+    int j = 0;
+    for (; j < atoms.size(); j++) {
+      if (matched_atoms[i] == atoms[j]) {
+        atom_indices->push_back(j);
+        break;
+      }
+      EXPECT_LT(j, atoms.size());
+    }
+  }
+}
+
+TEST(FilteredRE2Test, MatchEmptyPattern) {
+  FLAGS_filtered_re2_min_atom_len = 3;
+
+  FilterTestVars v;
+  AtomTest* t = &atom_tests[0];
+  // We are using the regexps used in one of the atom tests
+  // for this test. Adding the EXPECT here to make sure
+  // the index we use for the test is for the correct test.
+  EXPECT_EQ("CheckEmptyPattern", string(t->testname));
+  int nregexp;
+  for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+    if (t->regexps[nregexp] == NULL)
+      break;
+  AddRegexpsAndCompile(t->regexps, nregexp, &v);
+  string text = "0123";
+  vector<int> atom_ids;
+  vector<int> matching_regexps;
+  EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
+}
+
+TEST(FilteredRE2Test, MatchTests) {
+  FLAGS_filtered_re2_min_atom_len = 3;
+
+  FilterTestVars v;
+  AtomTest* t = &atom_tests[2];
+  // We are using the regexps used in one of the atom tests
+  // for this test.
+  EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname));
+  int nregexp;
+  for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+    if (t->regexps[nregexp] == NULL)
+      break;
+  AddRegexpsAndCompile(t->regexps, nregexp, &v);
+
+  string text = "abc121212xyz";
+  // atoms = abc
+  vector<int> atom_ids;
+  vector<string> atoms;
+  atoms.push_back("abc");
+  FindAtomIndices(v.atoms, atoms, &atom_ids);
+  vector<int> matching_regexps;
+  v.f.AllMatches(text, atom_ids, &matching_regexps);
+  EXPECT_EQ(1, matching_regexps.size());
+
+  text = "abc12312yyyzzz";
+  atoms.clear();
+  atoms.push_back("abc");
+  atoms.push_back("yyy");
+  atoms.push_back("yyyzzz");
+  FindAtomIndices(v.atoms, atoms, &atom_ids);
+  v.f.AllMatches(text, atom_ids, &matching_regexps);
+  EXPECT_EQ(1, matching_regexps.size());
+
+  text = "abcd12yyy32yyyzzz";
+  atoms.clear();
+  atoms.push_back("abc");
+  atoms.push_back("abcd");
+  atoms.push_back("yyy");
+  atoms.push_back("yyyzzz");
+  FindAtomIndices(v.atoms, atoms, &atom_ids);
+  LOG(INFO) << "S: " << atom_ids.size();
+  for (int i = 0; i < atom_ids.size(); i++)
+    LOG(INFO) << "i: " << i << " : " << atom_ids[i];
+  v.f.AllMatches(text, atom_ids, &matching_regexps);
+  EXPECT_EQ(2, matching_regexps.size());
+}
+
+}  //  namespace re2

diff --git a/re2/testing/mimics_pcre_test.cc b/re2/testing/mimics_pcre_test.cc
new file mode 100644
index 0000000..f965092
--- /dev/null
+++ b/re2/testing/mimics_pcre_test.cc

@@ -0,0 +1,76 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "util/test.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct PCRETest {
+  const char* regexp;
+  bool should_match;
+};
+
+static PCRETest tests[] = {
+  // Most things should behave exactly.
+  { "abc",       true  },
+  { "(a|b)c",    true  },
+  { "(a*|b)c",   true  },
+  { "(a|b*)c",   true  },
+  { "a(b|c)d",   true  },
+  { "a(()|())c", true  },
+  { "ab*c",      true  },
+  { "ab+c",      true  },
+  { "a(b*|c*)d", true  },
+  { "\\W",       true  },
+  { "\\W{1,2}",  true  },
+  { "\\d",       true  },
+
+  // Check that repeated empty strings do not.
+  { "(a*)*",     false },
+  { "x(a*)*y",   false },
+  { "(a*)+",     false },
+  { "(a+)*",     true  },
+  { "(a+)+",     true  },
+  { "(a+)+",     true  },
+
+  // \v is the only character class that shouldn't.
+  { "\\b",       true  },
+  { "\\v",       false },
+  { "\\d",       true  },
+
+  // The handling of ^ in multi-line mode is different, as is
+  // the handling of $ in single-line mode.  (Both involve
+  // boundary cases if the string ends with \n.)
+  { "\\A",       true  },
+  { "\\z",       true  },
+  { "(?m)^",     false },
+  { "(?m)$",     true  },
+  { "(?-m)^",    true  },
+  { "(?-m)$",    false },  // In PCRE, == \Z
+  { "(?m)\\A",   true  },
+  { "(?m)\\z",   true  },
+  { "(?-m)\\A",  true  },
+  { "(?-m)\\z",  true  },
+};
+
+TEST(MimicsPCRE, SimpleTests) {
+  for (int i = 0; i < arraysize(tests); i++) {
+    const PCRETest& t = tests[i];
+    for (int j = 0; j < 2; j++) {
+      Regexp::ParseFlags flags = Regexp::LikePerl;
+      if (j == 0)
+        flags = flags | Regexp::Latin1;
+      Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
+      CHECK(re) << " " << t.regexp;
+      CHECK_EQ(t.should_match, re->MimicsPCRE())
+        << " " << t.regexp << " "
+        << (j==0 ? "latin1" : "utf");
+      re->Decref();
+    }
+  }
+}
+
+}  // namespace re2

diff --git a/re2/testing/null_walker.cc b/re2/testing/null_walker.cc
new file mode 100644
index 0000000..09b53cb
--- /dev/null
+++ b/re2/testing/null_walker.cc

@@ -0,0 +1,44 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "util/test.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
+
+namespace re2 {
+
+// Null walker.  For benchmarking the walker itself.
+
+class NullWalker : public Regexp::Walker<bool> {
+ public:
+  NullWalker() { }
+  bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                 bool* child_args, int nchild_args);
+
+  bool ShortVisit(Regexp* re, bool a) {
+    // Should never be called: we use Walk not WalkExponential.
+    LOG(DFATAL) << "NullWalker::ShortVisit called";
+    return a;
+  }
+
+ private:
+  DISALLOW_EVIL_CONSTRUCTORS(NullWalker);
+};
+
+// Called after visiting re's children.  child_args contains the return
+// value from each of the children's PostVisits (i.e., whether each child
+// can match an empty string).  Returns whether this clause can match an
+// empty string.
+bool NullWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                                  bool* child_args, int nchild_args) {
+  return false;
+}
+
+// Returns whether re can match an empty string.
+void Regexp::NullWalk() {
+  NullWalker w;
+  w.Walk(this, false);
+}
+
+}  // namespace re2

diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc
new file mode 100644
index 0000000..f67b477
--- /dev/null
+++ b/re2/testing/parse_test.cc

@@ -0,0 +1,433 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test parse.cc, dump.cc, and tostring.cc.
+
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+static const Regexp::ParseFlags TestZeroFlags = Regexp::ParseFlags(1<<30);
+
+struct Test {
+  const char* regexp;
+  const char* parse;
+  Regexp::ParseFlags flags;
+};
+
+static Regexp::ParseFlags kTestFlags = Regexp::MatchNL |
+                                       Regexp::PerlX |
+                                       Regexp::PerlClasses |
+                                       Regexp::UnicodeGroups;
+
+static Test tests[] = {
+  // Base cases
+  { "a", "lit{a}" },
+  { "a.", "cat{lit{a}dot{}}" },
+  { "a.b", "cat{lit{a}dot{}lit{b}}" },
+  { "ab", "str{ab}" },
+  { "a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}" },
+  { "abc", "str{abc}" },
+  { "a|^", "alt{lit{a}bol{}}" },
+  { "a|b", "cc{0x61-0x62}" },
+  { "(a)", "cap{lit{a}}" },
+  { "(a)|b", "alt{cap{lit{a}}lit{b}}" },
+  { "a*", "star{lit{a}}" },
+  { "a+", "plus{lit{a}}" },
+  { "a?", "que{lit{a}}" },
+  { "a{2}", "rep{2,2 lit{a}}" },
+  { "a{2,3}", "rep{2,3 lit{a}}" },
+  { "a{2,}", "rep{2,-1 lit{a}}" },
+  { "a*?", "nstar{lit{a}}" },
+  { "a+?", "nplus{lit{a}}" },
+  { "a??", "nque{lit{a}}" },
+  { "a{2}?", "nrep{2,2 lit{a}}" },
+  { "a{2,3}?", "nrep{2,3 lit{a}}" },
+  { "a{2,}?", "nrep{2,-1 lit{a}}" },
+  { "", "emp{}" },
+  { "|", "emp{}" },  // alt{emp{}emp{}} but got factored
+  { "|x|", "alt{emp{}lit{x}emp{}}" },
+  { ".", "dot{}" },
+  { "^", "bol{}" },
+  { "$", "eol{}" },
+  { "\\|", "lit{|}" },
+  { "\\(", "lit{(}" },
+  { "\\)", "lit{)}" },
+  { "\\*", "lit{*}" },
+  { "\\+", "lit{+}" },
+  { "\\?", "lit{?}" },
+  { "{", "lit{{}" },
+  { "}", "lit{}}" },
+  { "\\.", "lit{.}" },
+  { "\\^", "lit{^}" },
+  { "\\$", "lit{$}" },
+  { "\\\\", "lit{\\}" },
+  { "[ace]", "cc{0x61 0x63 0x65}" },
+  { "[abc]", "cc{0x61-0x63}" },
+  { "[a-z]", "cc{0x61-0x7a}" },
+  { "[a]", "lit{a}" },
+  { "\\-", "lit{-}" },
+  { "-", "lit{-}" },
+  { "\\_", "lit{_}" },
+
+  // Posix and Perl extensions
+  { "[[:lower:]]", "cc{0x61-0x7a}" },
+  { "[a-z]", "cc{0x61-0x7a}" },
+  { "[^[:lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
+  { "[[:^lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
+  { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+  { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+  { "(?i)[^[:lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
+  { "(?i)[[:^lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
+  { "\\d", "cc{0x30-0x39}" },
+  { "\\D", "cc{0-0x2f 0x3a-0x10ffff}" },
+  { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" },
+  { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" },
+  { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" },
+  { "\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" },
+  { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" },
+  { "(?i)\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
+  { "[^\\\\]", "cc{0-0x5b 0x5d-0x10ffff}" },
+  { "\\C", "byte{}" },
+
+  // Unicode, negatives, and a double negative.
+  { "\\p{Braille}", "cc{0x2800-0x28ff}" },
+  { "\\P{Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
+  { "\\p{^Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
+  { "\\P{^Braille}", "cc{0x2800-0x28ff}" },
+
+  // More interesting regular expressions.
+  { "a{,2}", "str{a{,2}}" },
+  { "\\.\\^\\$\\\\", "str{.^$\\}" },
+  { "[a-zABC]", "cc{0x41-0x43 0x61-0x7a}" },
+  { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
+  { "[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}" },  // utf-8
+  { "a*{", "cat{star{lit{a}}lit{{}}" },
+
+  // Test precedences
+  { "(?:ab)*", "star{str{ab}}" },
+  { "(ab)*", "star{cap{str{ab}}}" },
+  { "ab|cd", "alt{str{ab}str{cd}}" },
+  { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
+
+  // Test flattening.
+  { "(?:a)", "lit{a}" },
+  { "(?:ab)(?:cd)", "str{abcd}" },
+  { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" },
+  { "a|.", "dot{}" },
+  { ".|a", "dot{}" },
+
+  // Test Perl quoted literals
+  { "\\Q+|*?{[\\E", "str{+|*?{[}" },
+  { "\\Q+\\E+", "plus{lit{+}}" },
+  { "\\Q\\\\E", "lit{\\}" },
+  { "\\Q\\\\\\E", "str{\\\\}" },
+
+  // Test Perl \A and \z
+  { "(?m)^", "bol{}" },
+  { "(?m)$", "eol{}" },
+  { "(?-m)^", "bot{}" },
+  { "(?-m)$", "eot{}" },
+  { "(?m)\\A", "bot{}" },
+  { "(?m)\\z", "eot{\\z}" },
+  { "(?-m)\\A", "bot{}" },
+  { "(?-m)\\z", "eot{\\z}" },
+
+  // Test named captures
+  { "(?P<name>a)", "cap{name:lit{a}}" },
+
+  // Case-folded literals
+  { "[Aa]", "litfold{a}" },
+
+  // Strings
+  { "abcde", "str{abcde}" },
+  { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" },
+
+  // Reported bug involving \n leaking in despite use of NeverNL.
+  { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
+  { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+  { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", TestZeroFlags },
+  { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+  { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", TestZeroFlags },
+  { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+  { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", TestZeroFlags },
+  { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+  { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
+  { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+  { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+  { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+  { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses },
+  { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses | Regexp::FoldCase },
+  { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses | Regexp::NeverNL },
+  { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
+  { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses },
+  { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses | Regexp::FoldCase },
+  { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses | Regexp::NeverNL },
+  { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+    Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
+};
+
+bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
+  return Regexp::Equal(a, b);
+}
+
+void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
+               const string& title) {
+  Regexp** re = new Regexp*[ntests];
+  for (int i = 0; i < ntests; i++) {
+    RegexpStatus status;
+    Regexp::ParseFlags f = flags;
+    if (tests[i].flags != 0) {
+      f = tests[i].flags & ~TestZeroFlags;
+    }
+    re[i] = Regexp::Parse(tests[i].regexp, f, &status);
+    CHECK(re[i] != NULL) << " " << tests[i].regexp << " "
+                         << status.Text();
+    string s = re[i]->Dump();
+    EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp
+      << "\nparse: " << tests[i].parse << " s: " << s << " flag=" << f;
+  }
+
+  for (int i = 0; i < ntests; i++) {
+    for (int j = 0; j < ntests; j++) {
+      EXPECT_EQ(string(tests[i].parse) == tests[j].parse,
+                RegexpEqualTestingOnly(re[i], re[j]))
+        << "Regexp: " << tests[i].regexp << " " << tests[j].regexp;
+    }
+  }
+
+  for (int i = 0; i < ntests; i++)
+    re[i]->Decref();
+  delete[] re;
+}
+
+// Test that regexps parse to expected structures.
+TEST(TestParse, SimpleRegexps) {
+  TestParse(tests, arraysize(tests), kTestFlags, "simple");
+}
+
+Test foldcase_tests[] = {
+  { "AbCdE", "strfold{abcde}" },
+  { "[Aa]", "litfold{a}" },
+  { "a", "litfold{a}" },
+
+  // 0x17F is an old English long s (looks like an f) and folds to s.
+  // 0x212A is the Kelvin symbol and folds to k.
+  { "A[F-g]", "cat{litfold{a}cc{0x41-0x7a 0x17f 0x212a}}" },  // [Aa][A-z...]
+  { "[[:upper:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+  { "[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+};
+
+// Test that parsing with FoldCase works.
+TEST(TestParse, FoldCase) {
+  TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldcase");
+}
+
+Test literal_tests[] = {
+  { "(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}" },
+};
+
+// Test that parsing with Literal works.
+TEST(TestParse, Literal) {
+  TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal");
+}
+
+Test matchnl_tests[] = {
+  { ".", "dot{}" },
+  { "\n", "lit{\n}" },
+  { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
+  { "[a\\n]", "cc{0xa 0x61}" },
+};
+
+// Test that parsing with MatchNL works.
+// (Also tested above during simple cases.)
+TEST(TestParse, MatchNL) {
+  TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with MatchNL");
+}
+
+Test nomatchnl_tests[] = {
+  { ".", "cc{0-0x9 0xb-0x10ffff}" },
+  { "\n", "lit{\n}" },
+  { "[^a]", "cc{0-0x9 0xb-0x60 0x62-0x10ffff}" },
+  { "[a\\n]", "cc{0xa 0x61}" },
+};
+
+// Test that parsing without MatchNL works.
+TEST(TestParse, NoMatchNL) {
+  TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "without MatchNL");
+}
+
+Test prefix_tests[] = {
+  { "abc|abd", "cat{str{ab}cc{0x63-0x64}}" },
+  { "a(?:b)c|abd", "cat{str{ab}cc{0x63-0x64}}" },
+  { "abc|abd|aef|bcx|bcy",
+    "alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}"
+      "cat{str{bc}cc{0x78-0x79}}}" },
+  { "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" },
+  { "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" },
+  { "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" },
+  { "(?:xx|yy)c|(?:xx|yy)d",
+    "cat{alt{str{xx}str{yy}}cc{0x63-0x64}}" },
+  { "x{2}|x{2}[0-9]",
+    "cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" },
+  { "x{2}y|x{2}[0-9]y",
+    "cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}" },
+};
+
+// Test that prefix factoring works.
+TEST(TestParse, Prefix) {
+  TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix");
+}
+
+// Invalid regular expressions
+const char* badtests[] = {
+  "(",
+  ")",
+  "(a",
+  "(a|b|",
+  "(a|b",
+  "[a-z",
+  "([a-z)",
+  "x{1001}",
+  "\xff",      // Invalid UTF-8
+  "[\xff]",
+  "[\\\xff]",
+  "\\\xff",
+  "(?P<name>a",
+  "(?P<name>",
+  "(?P<name",
+  "(?P<x y>a)",
+  "(?P<>a)",
+  "[a-Z]",
+  "(?i)[a-Z]",
+  "a{100000}",
+  "a{100000,}",
+};
+
+// Valid in Perl, bad in POSIX
+const char* only_perl[] = {
+ "[a-b-c]",
+ "\\Qabc\\E",
+ "\\Q*+?{[\\E",
+ "\\Q\\\\E",
+ "\\Q\\\\\\E",
+ "\\Q\\\\\\\\E",
+ "\\Q\\\\\\\\\\E",
+ "(?:a)",
+ "(?P<name>a)",
+};
+
+// Valid in POSIX, bad in Perl.
+const char* only_posix[] = {
+  "a++",
+  "a**",
+  "a?*",
+  "a+*",
+  "a{1}*",
+};
+
+// Test that parser rejects bad regexps.
+TEST(TestParse, InvalidRegexps) {
+  for (int i = 0; i < arraysize(badtests); i++) {
+    CHECK(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL)
+      << " " << badtests[i];
+    CHECK(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL)
+      << " " << badtests[i];
+  }
+  for (int i = 0; i < arraysize(only_posix); i++) {
+    CHECK(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL)
+      << " " << only_posix[i];
+    Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL);
+    CHECK(re) << " " << only_posix[i];
+    re->Decref();
+  }
+  for (int i = 0; i < arraysize(only_perl); i++) {
+    CHECK(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL)
+      << " " << only_perl[i];
+    Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL);
+    CHECK(re) << " " << only_perl[i];
+    re->Decref();
+  }
+}
+
+// Test that ToString produces original regexp or equivalent one.
+TEST(TestToString, EquivalentParse) {
+  for (int i = 0; i < arraysize(tests); i++) {
+    RegexpStatus status;
+    Regexp::ParseFlags f = kTestFlags;
+    if (tests[i].flags != 0) {
+      f = tests[i].flags & ~TestZeroFlags;
+    }
+    Regexp* re = Regexp::Parse(tests[i].regexp, f, &status);
+    CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text();
+    string s = re->Dump();
+    EXPECT_EQ(string(tests[i].parse), s) << " " << tests[i].regexp << " " << string(tests[i].parse) << " " << s;
+    string t = re->ToString();
+    if (t != tests[i].regexp) {
+      // If ToString didn't return the original regexp,
+      // it must have found one with fewer parens.
+      // Unfortunately we can't check the length here, because
+      // ToString produces "\\{" for a literal brace,
+      // but "{" is a shorter equivalent.
+      // CHECK_LT(t.size(), strlen(tests[i].regexp))
+      //     << " t=" << t << " regexp=" << tests[i].regexp;
+
+      // Test that if we parse the new regexp we get the same structure.
+      Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
+      CHECK(nre != NULL) << " reparse " << t << " " << status.Text();
+      string ss = nre->Dump();
+      string tt = nre->ToString();
+      if (s != ss || t != tt)
+        LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
+      EXPECT_EQ(s, ss);
+      EXPECT_EQ(t, tt);
+      nre->Decref();
+    }
+    re->Decref();
+  }
+}
+
+// Test that capture error args are correct.
+TEST(NamedCaptures, ErrorArgs) {
+  RegexpStatus status;
+  Regexp* re;
+
+  re = Regexp::Parse("test(?P<name", Regexp::LikePerl, &status);
+  EXPECT_TRUE(re == NULL);
+  EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
+  EXPECT_EQ(status.error_arg(), "(?P<name");
+
+  re = Regexp::Parse("test(?P<space bar>z)", Regexp::LikePerl, &status);
+  EXPECT_TRUE(re == NULL);
+  EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
+  EXPECT_EQ(status.error_arg(), "(?P<space bar>");
+}
+
+}  // namespace re2

diff --git a/re2/testing/possible_match_test.cc b/re2/testing/possible_match_test.cc
new file mode 100644
index 0000000..7c2400e
--- /dev/null
+++ b/re2/testing/possible_match_test.cc

@@ -0,0 +1,240 @@
+// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <vector>
+#include "util/test.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+#include "re2/testing/regexp_generator.h"
+#include "re2/testing/string_generator.h"
+
+namespace re2 {
+
+// Test that C++ strings are compared as uint8s, not int8s.
+// PossibleMatchRange doesn't depend on this, but callers probably will.
+TEST(CplusplusStrings, EightBit) {
+  string s = "\x70";
+  string t = "\xA0";
+  EXPECT_LT(s, t);
+}
+
+struct PrefixTest {
+  const char* regexp;
+  int maxlen;
+  const char* min;
+  const char* max;
+};
+
+static PrefixTest tests[] = {
+  { "",                  10,  "",           "",        },
+  { "Abcdef",            10,  "Abcdef",     "Abcdef"   },
+  { "abc(def|ghi)",      10,  "abcdef",     "abcghi"   },
+  { "a+hello",           10,  "aa",         "ahello"   },
+  { "a*hello",           10,  "a",          "hello"    },
+  { "def|abc",           10,  "abc",        "def"      },
+  { "a(b)(c)[d]",        10,  "abcd",       "abcd"     },
+  { "ab(cab|cat)",       10,  "abcab",      "abcat"    },
+  { "ab(cab|ca)x",       10,  "abcabx",     "abcax"    },
+  { "(ab|x)(c|de)",      10,  "abc",        "xde"      },
+  { "(ab|x)?(c|z)?",     10,  "",           "z"        },
+  { "[^\\s\\S]",         10,  "",           ""         },
+  { "(abc)+",             5,  "abc",        "abcac"    },
+  { "(abc)+",             2,  "ab",         "ac"       },
+  { "(abc)+",             1,  "a",          "b"        },
+  { "[a\xC3\xA1]",        4,  "a",          "\xC3\xA1" },
+  { "a*",                10,  "",           "ab"       },
+
+  { "(?i)Abcdef",        10,  "ABCDEF",     "abcdef"   },
+  { "(?i)abc(def|ghi)",  10,  "ABCDEF",     "abcghi"   },
+  { "(?i)a+hello",       10,  "AA",         "ahello"   },
+  { "(?i)a*hello",       10,  "A",          "hello"    },
+  { "(?i)def|abc",       10,  "ABC",        "def"      },
+  { "(?i)a(b)(c)[d]",    10,  "ABCD",       "abcd"     },
+  { "(?i)ab(cab|cat)",   10,  "ABCAB",      "abcat"    },
+  { "(?i)ab(cab|ca)x",   10,  "ABCABX",     "abcax"    },
+  { "(?i)(ab|x)(c|de)",  10,  "ABC",        "xde"      },
+  { "(?i)(ab|x)?(c|z)?", 10,  "",           "z"        },
+  { "(?i)[^\\s\\S]",     10,  "",           ""         },
+  { "(?i)(abc)+",         5,  "ABC",        "abcac"    },
+  { "(?i)(abc)+",         2,  "AB",         "ac"       },
+  { "(?i)(abc)+",         1,  "A",          "b"        },
+  { "(?i)[a\xC3\xA1]",    4,  "A",          "\xC3\xA1" },
+  { "(?i)a*",            10,  "",           "ab"       },
+  { "(?i)A*",            10,  "",           "ab"       },
+
+  { "\\AAbcdef",         10,  "Abcdef",     "Abcdef"   },
+  { "\\Aabc(def|ghi)",   10,  "abcdef",     "abcghi"   },
+  { "\\Aa+hello",        10,  "aa",         "ahello"   },
+  { "\\Aa*hello",        10,  "a",          "hello"    },
+  { "\\Adef|abc",        10,  "abc",        "def"      },
+  { "\\Aa(b)(c)[d]",     10,  "abcd",       "abcd"     },
+  { "\\Aab(cab|cat)",    10,  "abcab",      "abcat"    },
+  { "\\Aab(cab|ca)x",    10,  "abcabx",     "abcax"    },
+  { "\\A(ab|x)(c|de)",   10,  "abc",        "xde"      },
+  { "\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
+  { "\\A[^\\s\\S]",      10,  "",           ""         },
+  { "\\A(abc)+",          5,  "abc",        "abcac"    },
+  { "\\A(abc)+",          2,  "ab",         "ac"       },
+  { "\\A(abc)+",          1,  "a",          "b"        },
+  { "\\A[a\xC3\xA1]",     4,  "a",          "\xC3\xA1" },
+  { "\\Aa*",             10,  "",           "ab"       },
+
+  { "(?i)\\AAbcdef",         10,  "ABCDEF",     "abcdef"   },
+  { "(?i)\\Aabc(def|ghi)",   10,  "ABCDEF",     "abcghi"   },
+  { "(?i)\\Aa+hello",        10,  "AA",         "ahello"   },
+  { "(?i)\\Aa*hello",        10,  "A",          "hello"    },
+  { "(?i)\\Adef|abc",        10,  "ABC",        "def"      },
+  { "(?i)\\Aa(b)(c)[d]",     10,  "ABCD",       "abcd"     },
+  { "(?i)\\Aab(cab|cat)",    10,  "ABCAB",      "abcat"    },
+  { "(?i)\\Aab(cab|ca)x",    10,  "ABCABX",     "abcax"    },
+  { "(?i)\\A(ab|x)(c|de)",   10,  "ABC",        "xde"      },
+  { "(?i)\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
+  { "(?i)\\A[^\\s\\S]",      10,  "",           ""         },
+  { "(?i)\\A(abc)+",          5,  "ABC",        "abcac"    },
+  { "(?i)\\A(abc)+",          2,  "AB",         "ac"       },
+  { "(?i)\\A(abc)+",          1,  "A",          "b"        },
+  { "(?i)\\A[a\xC3\xA1]",     4,  "A",          "\xC3\xA1" },
+  { "(?i)\\Aa*",             10,  "",           "ab"       },
+  { "(?i)\\AA*",             10,  "",           "ab"       },
+};
+
+TEST(PossibleMatchRange, HandWritten) {
+  for (int i = 0; i < arraysize(tests); i++) {
+    for (int j = 0; j < 2; j++) {
+      const PrefixTest& t = tests[i];
+      string min, max;
+      if (j == 0) {
+        LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
+        Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
+        CHECK(re);
+        Prog* prog = re->CompileToProg(0);
+        CHECK(prog);
+        CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen))
+          << " " << t.regexp;
+        delete prog;
+        re->Decref();
+      } else {
+        CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
+      }
+      EXPECT_EQ(t.min, min) << t.regexp;
+      EXPECT_EQ(t.max, max) << t.regexp;
+    }
+  }
+}
+
+// Test cases where PossibleMatchRange should return false.
+TEST(PossibleMatchRange, Failures) {
+  string min, max;
+
+  // Fails because no room to write max.
+  EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
+
+  // Fails because there is no max -- any non-empty string matches
+  // or begins a match.  Have to use Latin-1 input, because there
+  // are no valid UTF-8 strings beginning with byte 0xFF.
+  EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
+               PossibleMatchRange(&min, &max, 10))
+    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+  EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
+               PossibleMatchRange(&min, &max, 10))
+    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+  EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
+               PossibleMatchRange(&min, &max, 10))
+    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+  EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
+               PossibleMatchRange(&min, &max, 10))
+    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+  EXPECT_FALSE(RE2(".*", RE2::Latin1).
+               PossibleMatchRange(&min, &max, 10))
+    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+  EXPECT_FALSE(RE2("\\C*").
+               PossibleMatchRange(&min, &max, 10))
+    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+
+  // Fails because it's a malformed regexp.
+  EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
+    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+}
+
+// Exhaustive test: generate all regexps within parameters,
+// then generate all strings of a given length over a given alphabet,
+// then check that the prefix information agrees with whether
+// the regexp matches each of the strings.
+class PossibleMatchTester : public RegexpGenerator {
+ public:
+  PossibleMatchTester(int maxatoms,
+                      int maxops,
+                      const vector<string>& alphabet,
+                      const vector<string>& ops,
+                      int maxstrlen,
+                      const vector<string>& stralphabet)
+    : RegexpGenerator(maxatoms, maxops, alphabet, ops),
+      strgen_(maxstrlen, stralphabet),
+      regexps_(0), tests_(0) { }
+
+  int regexps()  { return regexps_; }
+  int tests()    { return tests_; }
+
+  // Needed for RegexpGenerator interface.
+  void HandleRegexp(const string& regexp);
+
+ private:
+  StringGenerator strgen_;
+
+  int regexps_;   // Number of HandleRegexp calls
+  int tests_;     // Number of regexp tests.
+
+  DISALLOW_EVIL_CONSTRUCTORS(PossibleMatchTester);
+};
+
+// Processes a single generated regexp.
+// Checks that all accepted strings agree with the prefix range.
+void PossibleMatchTester::HandleRegexp(const string& regexp) {
+  regexps_++;
+
+  VLOG(3) << CEscape(regexp);
+
+  RE2 re(regexp, RE2::Latin1);
+  CHECK_EQ(re.error(), "");
+
+  string min, max;
+  if(!re.PossibleMatchRange(&min, &max, 10)) {
+    // There's no good max for "\\C*".  Can't use strcmp
+    // because sometimes it gets embedded in more
+    // complicated expressions.
+    if(strstr(regexp.c_str(), "\\C*"))
+      return;
+    LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
+  }
+
+  strgen_.Reset();
+  while (strgen_.HasNext()) {
+    const StringPiece& s = strgen_.Next();
+    tests_++;
+    if (!RE2::FullMatch(s, re))
+      continue;
+    CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max;
+    CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min;
+  }
+}
+
+TEST(PossibleMatchRange, Exhaustive) {
+  int natom = 3;
+  int noperator = 3;
+  int stringlen = 5;
+  if (DEBUG_MODE) {
+    natom = 2;
+    noperator = 3;
+    stringlen = 3;
+  }
+  PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
+                 RegexpGenerator::EgrepOps(),
+                 stringlen, Explode("ab4"));
+  t.Generate();
+  LOG(INFO) << t.regexps() << " regexps, "
+            << t.tests() << " tests";
+}
+
+}  // namespace re2

diff --git a/re2/testing/random_test.cc b/re2/testing/random_test.cc
new file mode 100644
index 0000000..91d2b32
--- /dev/null
+++ b/re2/testing/random_test.cc

@@ -0,0 +1,95 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Random testing of regular expression matching.
+
+#include <stdio.h>
+#include "util/test.h"
+#include "re2/testing/exhaustive_tester.h"
+
+DEFINE_int32(regexpseed, 404, "Random regexp seed.");
+DEFINE_int32(regexpcount, 100, "How many random regexps to generate.");
+DEFINE_int32(stringseed, 200, "Random string seed.");
+DEFINE_int32(stringcount, 100, "How many random strings to generate.");
+
+namespace re2 {
+
+// Runs a random test on the given parameters.
+// (Always uses the same random seeds for reproducibility.
+// Can give different seeds on command line.)
+static void RandomTest(int maxatoms, int maxops,
+                       const vector<string>& alphabet,
+                       const vector<string>& ops,
+                       int maxstrlen, const vector<string>& stralphabet,
+                       const string& wrapper) {
+  // Limit to smaller test cases in debug mode,
+  // because everything is so much slower.
+  if (DEBUG_MODE) {
+    maxatoms--;
+    maxops--;
+    maxstrlen /= 2;
+  }
+
+  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
+                     maxstrlen, stralphabet, wrapper, "");
+  t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
+  t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount);
+  printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
+         t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
+  EXPECT_EQ(0, t.failures());
+}
+
+// Tests random small regexps involving literals and egrep operators.
+TEST(Random, SmallEgrepLiterals) {
+  RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(),
+             15, Explode("abc"),
+             "");
+}
+
+// Tests random bigger regexps involving literals and egrep operators.
+TEST(Random, BigEgrepLiterals) {
+  RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(),
+             15, Explode("abc"),
+             "");
+}
+
+// Tests random small regexps involving literals, capturing parens,
+// and egrep operators.
+TEST(Random, SmallEgrepCaptures) {
+  RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
+             15, Explode("abc"),
+             "");
+}
+
+// Tests random bigger regexps involving literals, capturing parens,
+// and egrep operators.
+TEST(Random, BigEgrepCaptures) {
+  RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
+             15, Explode("abc"),
+             "");
+}
+
+// Tests random large complicated expressions, using all the possible
+// operators, some literals, some parenthesized literals, and predefined
+// character classes like \d.  (Adding larger character classes would
+// make for too many possibilities.)
+TEST(Random, Complicated) {
+  vector<string> ops = Split(" ",
+    "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? "
+    "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} "
+    "%s{2} %s{2,} %s{3,4} %s{4,5}");
+
+  // Use (?:\b) and (?:\B) instead of \b and \B,
+  // because PCRE rejects \b* but accepts (?:\b)*.
+  // Ditto ^ and $.
+  vector<string> atoms = Split(" ",
+    ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v "
+    "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) "
+    "a (a) b c - \\\\");
+  vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a");
+  RandomTest(10, 10, atoms, ops, 20, alphabet, "");
+}
+
+}  // namespace re2
+

diff --git a/re2/testing/re2_arg_test.cc b/re2/testing/re2_arg_test.cc
new file mode 100644
index 0000000..ae7a7b0
--- /dev/null
+++ b/re2/testing/re2_arg_test.cc

@@ -0,0 +1,133 @@
+// Copyright 2005 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This tests to make sure numbers are parsed from strings
+// correctly.
+// Todo: Expand the test to validate strings parsed to the other types
+// supported by RE2::Arg class
+
+#include "util/test.h"
+#include "re2/re2.h"
+
+namespace re2 {
+
+struct SuccessTable {
+  const char * value_string;
+  int64 value;
+  bool success[6];
+};
+
+// Test boundary cases for different integral sizes.
+// Specifically I want to make sure that values outside the boundries
+// of an integral type will fail and that negative numbers will fail
+// for unsigned types. The following table contains the boundaries for
+// the various integral types and has entries for whether or not each
+// type can contain the given value.
+const SuccessTable kSuccessTable[] = {
+// string       integer value     short  ushort int    uint   int64  uint64
+// 0 to 2^7-1
+{ "0",          0,              { true,  true,  true,  true,  true,  true  }},
+{ "127",        127,            { true,  true,  true,  true,  true,  true  }},
+
+// -1 to -2^7
+{ "-1",         -1,             { true,  false, true,  false, true,  false }},
+{ "-128",       -128,           { true,  false, true,  false, true,  false }},
+
+// 2^7 to 2^8-1
+{ "128",        128,            { true,  true,  true,  true,  true,  true  }},
+{ "255",        255,            { true,  true,  true,  true,  true,  true  }},
+
+// 2^8 to 2^15-1
+{ "256",        256,            { true,  true,  true,  true,  true,  true  }},
+{ "32767",      32767,          { true,  true,  true,  true,  true,  true  }},
+
+// -2^7-1 to -2^15
+{ "-129",       -129,           { true,  false, true,  false, true,  false }},
+{ "-32768",     -32768,         { true,  false, true,  false, true,  false }},
+
+// 2^15 to 2^16-1
+{ "32768",      32768,          { false, true,  true,  true,  true,  true  }},
+{ "65535",      65535,          { false, true,  true,  true,  true,  true  }},
+
+// 2^16 to 2^31-1
+{ "65536",      65536,          { false, false, true,  true,  true,  true  }},
+{ "2147483647", 2147483647,     { false, false, true,  true,  true,  true  }},
+
+// -2^15-1 to -2^31
+{ "-32769",     -32769,         { false, false, true,  false, true,  false }},
+{ "-2147483648",
+  static_cast<int64>(0xFFFFFFFF80000000LL),
+{ false, false, true,  false, true,  false }},
+
+// 2^31 to 2^32-1
+{ "2147483648", 2147483648U,    { false, false, false, true,  true,  true  }},
+{ "4294967295", 4294967295U,    { false, false, false, true,  true,  true  }},
+
+// 2^32 to 2^63-1
+{ "4294967296", 4294967296LL,   { false, false, false, false, true,  true  }},
+{ "9223372036854775807",
+  9223372036854775807LL,        { false, false, false, false, true,  true  }},
+
+// -2^31-1 to -2^63
+{ "-2147483649", -2147483649LL, { false, false, false, false, true,  false }},
+{ "-9223372036854775808", static_cast<int64>(0x8000000000000000LL),
+  { false, false, false, false, true,  false }},
+
+// 2^63 to 2^64-1
+{ "9223372036854775808", static_cast<int64>(9223372036854775808ULL),
+  { false, false, false, false, false, true  }},
+{ "18446744073709551615", static_cast<int64>(18446744073709551615ULL),
+  { false, false, false, false, false, true  }},
+
+// >= 2^64
+{ "18446744073709551616", 0,    { false, false, false, false, false, false }},
+};
+
+const int kNumStrings = ARRAYSIZE(kSuccessTable);
+
+// It's ugly to use a macro, but we apparently can't use the ASSERT_TRUE_M
+// macro outside of a TEST block and this seems to be the only way to
+// avoid code duplication.  I can also pull off a couple nice tricks
+// using concatenation for the type I'm checking against.
+#define PARSE_FOR_TYPE(type, column) {                                   \
+  type r;                                                                \
+  for ( int i = 0; i < kNumStrings; ++i ) {                              \
+    RE2::Arg arg(&r);                                                    \
+    const char* const p = kSuccessTable[i].value_string;                 \
+    bool retval = arg.Parse(p, strlen(p));                               \
+    bool success = kSuccessTable[i].success[column];                     \
+    ASSERT_TRUE_M(retval == success,                                     \
+      StringPrintf("Parsing '%s' for type " #type " should return %d",   \
+                   p, success).c_str());                                 \
+    if ( success ) {                                                     \
+      ASSERT_EQUALS(r, kSuccessTable[i].value);                          \
+    }                                                                    \
+  }                                                                      \
+}
+
+TEST(REArgTest, Int16Test) {
+  PARSE_FOR_TYPE(int16, 0);
+}
+
+TEST(REArgTest, Uint16Test) {
+  PARSE_FOR_TYPE(uint16, 1);
+}
+
+TEST(REArgTest, IntTest) {
+  PARSE_FOR_TYPE(int, 2);
+}
+
+TEST(REArgTest, UInt32Test) {
+  PARSE_FOR_TYPE(uint32, 3);
+}
+
+TEST(REArgTest, Iint64Test) {
+  PARSE_FOR_TYPE(int64, 4);
+}
+
+TEST(REArgTest, Uint64Test) {
+  PARSE_FOR_TYPE(uint64, 5);
+}
+
+}  // namespace re2

diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
new file mode 100644
index 0000000..b99cacf
--- /dev/null
+++ b/re2/testing/re2_test.cc

@@ -0,0 +1,1371 @@
+// -*- coding: utf-8 -*-
+// Copyright 2002-2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO: Test extractions for PartialMatch/Consume
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <vector>
+#include "util/test.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+
+DECLARE_bool(logtostderr);
+
+namespace re2 {
+
+TEST(RE2, HexTests) {
+
+  VLOG(1) << "hex tests";
+
+#define CHECK_HEX(type, value) \
+  do { \
+    type v; \
+    CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
+    CHECK_EQ(v, 0x ## value); \
+    CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
+    CHECK_EQ(v, 0x ## value); \
+  } while(0)
+
+  CHECK_HEX(short,              2bad);
+  CHECK_HEX(unsigned short,     2badU);
+  CHECK_HEX(int,                dead);
+  CHECK_HEX(unsigned int,       deadU);
+  CHECK_HEX(long,               7eadbeefL);
+  CHECK_HEX(unsigned long,      deadbeefUL);
+  CHECK_HEX(long long,          12345678deadbeefLL);
+  CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
+
+#undef CHECK_HEX
+}
+
+TEST(RE2, OctalTests) {
+  VLOG(1) << "octal tests";
+
+#define CHECK_OCTAL(type, value) \
+  do { \
+    type v; \
+    CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
+    CHECK_EQ(v, 0 ## value); \
+    CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
+    CHECK_EQ(v, 0 ## value); \
+  } while(0)
+
+  CHECK_OCTAL(short,              77777);
+  CHECK_OCTAL(unsigned short,     177777U);
+  CHECK_OCTAL(int,                17777777777);
+  CHECK_OCTAL(unsigned int,       37777777777U);
+  CHECK_OCTAL(long,               17777777777L);
+  CHECK_OCTAL(unsigned long,      37777777777UL);
+  CHECK_OCTAL(long long,          777777777777777777777LL);
+  CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
+
+#undef CHECK_OCTAL
+}
+
+TEST(RE2, DecimalTests) {
+  VLOG(1) << "decimal tests";
+
+#define CHECK_DECIMAL(type, value) \
+  do { \
+    type v; \
+    CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
+    CHECK_EQ(v, value); \
+    CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
+    CHECK_EQ(v, value); \
+  } while(0)
+
+  CHECK_DECIMAL(short,              -1);
+  CHECK_DECIMAL(unsigned short,     9999);
+  CHECK_DECIMAL(int,                -1000);
+  CHECK_DECIMAL(unsigned int,       12345U);
+  CHECK_DECIMAL(long,               -10000000L);
+  CHECK_DECIMAL(unsigned long,      3083324652U);
+  CHECK_DECIMAL(long long,          -100000000000000LL);
+  CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
+
+#undef CHECK_DECIMAL
+}
+
+TEST(RE2, Replace) {
+  VLOG(1) << "TestReplace";
+
+  struct ReplaceTest {
+    const char *regexp;
+    const char *rewrite;
+    const char *original;
+    const char *single;
+    const char *global;
+    int        greplace_count;
+  };
+  static const ReplaceTest tests[] = {
+    { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
+      "\\2\\1ay",
+      "the quick brown fox jumps over the lazy dogs.",
+      "ethay quick brown fox jumps over the lazy dogs.",
+      "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
+      9 },
+    { "\\w+",
+      "\\0-NOSPAM",
+      "abcd.efghi@google.com",
+      "abcd-NOSPAM.efghi@google.com",
+      "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
+      4 },
+    { "^",
+      "(START)",
+      "foo",
+      "(START)foo",
+      "(START)foo",
+      1 },
+    { "^",
+      "(START)",
+      "",
+      "(START)",
+      "(START)",
+      1 },
+    { "$",
+      "(END)",
+      "",
+      "(END)",
+      "(END)",
+      1 },
+    { "b",
+      "bb",
+      "ababababab",
+      "abbabababab",
+      "abbabbabbabbabb",
+      5 },
+    { "b",
+      "bb",
+      "bbbbbb",
+      "bbbbbbb",
+      "bbbbbbbbbbbb",
+      6 },
+    { "b+",
+      "bb",
+      "bbbbbb",
+      "bb",
+      "bb",
+      1 },
+    { "b*",
+      "bb",
+      "bbbbbb",
+      "bb",
+      "bb",
+      1 },
+    { "b*",
+      "bb",
+      "aaaaa",
+      "bbaaaaa",
+      "bbabbabbabbabbabb",
+      6 },
+    // Check newline handling
+    { "a.*a",
+      "(\\0)",
+      "aba\naba",
+      "(aba)\naba",
+      "(aba)\n(aba)",
+      2 },
+    { "", NULL, NULL, NULL, NULL, 0 }
+  };
+
+  for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
+    VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite);
+    string one(t->original);
+    CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
+    CHECK_EQ(one, t->single);
+    string all(t->original);
+    CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
+      << "Got: " << all;
+    CHECK_EQ(all, t->global);
+  }
+}
+
+static void TestCheckRewriteString(const char* regexp, const char* rewrite,
+                              bool expect_ok) {
+  string error;
+  RE2 exp(regexp);
+  bool actual_ok = exp.CheckRewriteString(rewrite, &error);
+  EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
+}
+
+TEST(CheckRewriteString, all) {
+  TestCheckRewriteString("abc", "foo", true);
+  TestCheckRewriteString("abc", "foo\\", false);
+  TestCheckRewriteString("abc", "foo\\0bar", true);
+
+  TestCheckRewriteString("a(b)c", "foo", true);
+  TestCheckRewriteString("a(b)c", "foo\\0bar", true);
+  TestCheckRewriteString("a(b)c", "foo\\1bar", true);
+  TestCheckRewriteString("a(b)c", "foo\\2bar", false);
+  TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
+
+  TestCheckRewriteString("a(b)(c)", "foo\\12", true);
+  TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
+  TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
+}
+
+TEST(RE2, Extract) {
+  VLOG(1) << "TestExtract";
+
+  string s;
+
+  CHECK(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
+  CHECK_EQ(s, "kremvax!boris");
+
+  CHECK(RE2::Extract("foo", ".*", "'\\0'", &s));
+  CHECK_EQ(s, "'foo'");
+  // check that false match doesn't overwrite
+  CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s));
+  CHECK_EQ(s, "'foo'");
+}
+
+TEST(RE2, Consume) {
+  VLOG(1) << "TestConsume";
+
+  RE2 r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
+  string word;
+
+  string s("   aaa b!@#$@#$cccc");
+  StringPiece input(s);
+
+  CHECK(RE2::Consume(&input, r, &word));
+  CHECK_EQ(word, "aaa") << " input: " << input;
+  CHECK(RE2::Consume(&input, r, &word));
+  CHECK_EQ(word, "b") << " input: " << input;
+  CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input;
+}
+
+TEST(RE2, ConsumeN) {
+  const string s(" one two three 4");
+  StringPiece input(s);
+
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0));  // Skips "one".
+
+  // 1 arg
+  string word;
+  argv[0] = &word;
+  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
+  EXPECT_EQ("two", word);
+
+  // Multi-args
+  int n;
+  argv[1] = &n;
+  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
+  EXPECT_EQ("three", word);
+  EXPECT_EQ(4, n);
+}
+
+TEST(RE2, FindAndConsume) {
+  VLOG(1) << "TestFindAndConsume";
+
+  RE2 r("(\\w+)");      // matches a word
+  string word;
+
+  string s("   aaa b!@#$@#$cccc");
+  StringPiece input(s);
+
+  CHECK(RE2::FindAndConsume(&input, r, &word));
+  CHECK_EQ(word, "aaa");
+  CHECK(RE2::FindAndConsume(&input, r, &word));
+  CHECK_EQ(word, "b");
+  CHECK(RE2::FindAndConsume(&input, r, &word));
+  CHECK_EQ(word, "cccc");
+  CHECK(! RE2::FindAndConsume(&input, r, &word));
+
+  // Check that FindAndConsume works without any submatches.
+  // Earlier version used uninitialized data for
+  // length to consume.
+  input = "aaa";
+  CHECK(RE2::FindAndConsume(&input, "aaa"));
+  CHECK_EQ(input, "");
+}
+
+TEST(RE2, FindAndConsumeN) {
+  const string s(" one two three 4");
+  StringPiece input(s);
+
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0));  // Skips "one".
+
+  // 1 arg
+  string word;
+  argv[0] = &word;
+  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
+  EXPECT_EQ("two", word);
+
+  // Multi-args
+  int n;
+  argv[1] = &n;
+  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
+  EXPECT_EQ("three", word);
+  EXPECT_EQ(4, n);
+}
+
+TEST(RE2, MatchNumberPeculiarity) {
+  VLOG(1) << "TestMatchNumberPeculiarity";
+
+  RE2 r("(foo)|(bar)|(baz)");
+  string word1;
+  string word2;
+  string word3;
+
+  CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
+  CHECK_EQ(word1, "foo");
+  CHECK_EQ(word2, "");
+  CHECK_EQ(word3, "");
+  CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
+  CHECK_EQ(word1, "");
+  CHECK_EQ(word2, "bar");
+  CHECK_EQ(word3, "");
+  CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
+  CHECK_EQ(word1, "");
+  CHECK_EQ(word2, "");
+  CHECK_EQ(word3, "baz");
+  CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3));
+
+  string a;
+  CHECK(RE2::FullMatch("hello", "(foo)|hello", &a));
+  CHECK_EQ(a, "");
+}
+
+TEST(RE2, Match) {
+  RE2 re("((\\w+):([0-9]+))");   // extracts host and port
+  StringPiece group[4];
+
+  // No match.
+  StringPiece s = "zyzzyva";
+  CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED,
+                  group, arraysize(group)));
+
+  // Matches and extracts.
+  s = "a chrisr:9000 here";
+  CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED,
+                 group, arraysize(group)));
+  CHECK_EQ(group[0], "chrisr:9000");
+  CHECK_EQ(group[1], "chrisr:9000");
+  CHECK_EQ(group[2], "chrisr");
+  CHECK_EQ(group[3], "9000");
+
+  string all, host;
+  int port;
+  CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
+  CHECK_EQ(all, "chrisr:9000");
+  CHECK_EQ(host, "chrisr");
+  CHECK_EQ(port, 9000);
+}
+
+static void TestRecursion(int size, const char *pattern) {
+  // Fill up a string repeating the pattern given
+  string domain;
+  domain.resize(size);
+  int patlen = strlen(pattern);
+  for (int i = 0; i < size; ++i) {
+    domain[i] = pattern[i % patlen];
+  }
+  // Just make sure it doesn't crash due to too much recursion.
+  RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
+  RE2::FullMatch(domain, re);
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void TestQuoteMeta(string unquoted,
+                          const RE2::Options& options = RE2::DefaultOptions) {
+  string quoted = RE2::QuoteMeta(unquoted);
+  RE2 re(quoted, options);
+  EXPECT_TRUE_M(RE2::FullMatch(unquoted, re),
+                "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
+                                  const RE2::Options& options = RE2::DefaultOptions) {
+  string quoted = RE2::QuoteMeta(unquoted);
+  RE2 re(quoted, options);
+  EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re),
+                 "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
+}
+
+// Tests that quoted meta characters match their original strings,
+// and that a few things that shouldn't match indeed do not.
+TEST(QuoteMeta, Simple) {
+  TestQuoteMeta("foo");
+  TestQuoteMeta("foo.bar");
+  TestQuoteMeta("foo\\.bar");
+  TestQuoteMeta("[1-9]");
+  TestQuoteMeta("1.5-2.0?");
+  TestQuoteMeta("\\d");
+  TestQuoteMeta("Who doesn't like ice cream?");
+  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
+  TestQuoteMeta("((?!)xxx).*yyy");
+  TestQuoteMeta("([");
+}
+TEST(QuoteMeta, SimpleNegative) {
+  NegativeTestQuoteMeta("foo", "bar");
+  NegativeTestQuoteMeta("...", "bar");
+  NegativeTestQuoteMeta("\\.", ".");
+  NegativeTestQuoteMeta("\\.", "..");
+  NegativeTestQuoteMeta("(a)", "a");
+  NegativeTestQuoteMeta("(a|b)", "a");
+  NegativeTestQuoteMeta("(a|b)", "(a)");
+  NegativeTestQuoteMeta("(a|b)", "a|b");
+  NegativeTestQuoteMeta("[0-9]", "0");
+  NegativeTestQuoteMeta("[0-9]", "0-9");
+  NegativeTestQuoteMeta("[0-9]", "[9]");
+  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
+}
+
+TEST(QuoteMeta, Latin1) {
+  TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
+}
+
+TEST(QuoteMeta, UTF8) {
+  TestQuoteMeta("Plácido Domingo");
+  TestQuoteMeta("xyz");  // No fancy utf8.
+  TestQuoteMeta("\xc2\xb0");  // 2-byte utf8 -- a degree symbol.
+  TestQuoteMeta("27\xc2\xb0 degrees");  // As a middle character.
+  TestQuoteMeta("\xe2\x80\xb3");  // 3-byte utf8 -- a double prime.
+  TestQuoteMeta("\xf0\x9d\x85\x9f");  // 4-byte utf8 -- a music note.
+  TestQuoteMeta("27\xc2\xb0");  // Interpreted as Latin-1, this should
+                                // still work.
+  NegativeTestQuoteMeta("27\xc2\xb0",
+                        "27\\\xc2\\\xb0");  // 2-byte utf8 -- a degree symbol.
+}
+
+TEST(QuoteMeta, HasNull) {
+  string has_null;
+
+  // string with one null character
+  has_null += '\0';
+  TestQuoteMeta(has_null);
+  NegativeTestQuoteMeta(has_null, "");
+
+  // Don't want null-followed-by-'1' to be interpreted as '\01'.
+  has_null += '1';
+  TestQuoteMeta(has_null);
+  NegativeTestQuoteMeta(has_null, "\1");
+}
+
+TEST(ProgramSize, BigProgram) {
+  RE2 re_simple("simple regexp");
+  RE2 re_medium("medium.*regexp");
+  RE2 re_complex("hard.{1,128}regexp");
+
+  CHECK_GT(re_simple.ProgramSize(), 0);
+  CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
+  CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
+}
+
+// Issue 956519: handling empty character sets was
+// causing NULL dereference.  This tests a few empty character sets.
+// (The way to get an empty character set is to negate a full one.)
+TEST(EmptyCharset, Fuzz) {
+  static const char *empties[] = {
+    "[^\\S\\s]",
+    "[^\\S[:space:]]",
+    "[^\\D\\d]",
+    "[^\\D[:digit:]]"
+  };
+  for (int i = 0; i < arraysize(empties); i++)
+    CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
+}
+
+// Test that named groups work correctly.
+TEST(Capture, NamedGroups) {
+  {
+    RE2 re("(hello world)");
+    CHECK_EQ(re.NumberOfCapturingGroups(), 1);
+    const map<string, int>& m = re.NamedCapturingGroups();
+    CHECK_EQ(m.size(), 0);
+  }
+
+  {
+    RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
+    CHECK_EQ(re.NumberOfCapturingGroups(), 6);
+    const map<string, int>& m = re.NamedCapturingGroups();
+    CHECK_EQ(m.size(), 4);
+    CHECK_EQ(m.find("A")->second, 1);
+    CHECK_EQ(m.find("B")->second, 2);
+    CHECK_EQ(m.find("C")->second, 3);
+    CHECK_EQ(m.find("D")->second, 6);  // $4 and $5 are anonymous
+  }
+}
+
+TEST(RE2, FullMatchWithNoArgs) {
+  CHECK(RE2::FullMatch("h", "h"));
+  CHECK(RE2::FullMatch("hello", "hello"));
+  CHECK(RE2::FullMatch("hello", "h.*o"));
+  CHECK(!RE2::FullMatch("othello", "h.*o"));       // Must be anchored at front
+  CHECK(!RE2::FullMatch("hello!", "h.*o"));        // Must be anchored at end
+}
+
+TEST(RE2, PartialMatch) {
+  CHECK(RE2::PartialMatch("x", "x"));
+  CHECK(RE2::PartialMatch("hello", "h.*o"));
+  CHECK(RE2::PartialMatch("othello", "h.*o"));
+  CHECK(RE2::PartialMatch("hello!", "h.*o"));
+  CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
+}
+
+TEST(RE2, PartialMatchN) {
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
+  EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
+
+  // 1 arg
+  int i;
+  argv[0] = &i;
+  EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
+  EXPECT_EQ(1001, i);
+  EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
+
+  // Multi-arg
+  string s;
+  argv[1] = &s;
+  EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
+  EXPECT_EQ(42, i);
+  EXPECT_EQ("life", s);
+  EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
+}
+
+TEST(RE2, FullMatchZeroArg) {
+  // Zero-arg
+  CHECK(RE2::FullMatch("1001", "\\d+"));
+}
+
+TEST(RE2, FullMatchOneArg) {
+  int i;
+
+  // Single-arg
+  CHECK(RE2::FullMatch("1001", "(\\d+)",   &i));
+  CHECK_EQ(i, 1001);
+  CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i));
+  CHECK_EQ(i, -123);
+  CHECK(!RE2::FullMatch("10", "()\\d+", &i));
+  CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890",
+                       "(\\d+)", &i));
+}
+
+TEST(RE2, FullMatchIntegerArg) {
+  int i;
+
+  // Digits surrounding integer-arg
+  CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i));
+  CHECK_EQ(i, 23);
+  CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i));
+  CHECK_EQ(i, 1);
+  CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
+  CHECK_EQ(i, -1);
+  CHECK(RE2::PartialMatch("1234", "(\\d)", &i));
+  CHECK_EQ(i, 1);
+  CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i));
+  CHECK_EQ(i, -1);
+}
+
+TEST(RE2, FullMatchStringArg) {
+  string s;
+  // String-arg
+  CHECK(RE2::FullMatch("hello", "h(.*)o", &s));
+  CHECK_EQ(s, string("ell"));
+}
+
+TEST(RE2, FullMatchStringPieceArg) {
+  int i;
+  // StringPiece-arg
+  StringPiece sp;
+  CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
+  CHECK_EQ(sp.size(), 4);
+  CHECK(memcmp(sp.data(), "ruby", 4) == 0);
+  CHECK_EQ(i, 1234);
+}
+
+TEST(RE2, FullMatchMultiArg) {
+  int i;
+  string s;
+  // Multi-arg
+  CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+  CHECK_EQ(s, string("ruby"));
+  CHECK_EQ(i, 1234);
+}
+
+TEST(RE2, FullMatchN) {
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
+  EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
+
+  // 1 arg
+  int i;
+  argv[0] = &i;
+  EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
+  EXPECT_EQ(1001, i);
+  EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
+
+  // Multi-arg
+  string s;
+  argv[1] = &s;
+  EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
+  EXPECT_EQ(42, i);
+  EXPECT_EQ("life", s);
+  EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
+}
+
+TEST(RE2, FullMatchIgnoredArg) {
+  int i;
+  string s;
+  // Ignored arg
+  CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
+  CHECK_EQ(s, string("ruby"));
+  CHECK_EQ(i, 1234);
+}
+
+TEST(RE2, FullMatchTypedNullArg) {
+  string s;
+
+  // Ignore non-void* NULL arg
+  CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
+  CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
+  CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
+  CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL));
+  CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
+  CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
+  CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
+
+  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
+  CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
+  CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL));
+  CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
+  CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL));
+  CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
+}
+
+// Check that numeric parsing code does not read past the end of
+// the number being parsed.
+TEST(RE2, NULTerminated) {
+  char *v;
+  int x;
+  long pagesize = sysconf(_SC_PAGE_SIZE);
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+  v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
+                              MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
+  CHECK(v != reinterpret_cast<char*>(-1));
+  LOG(INFO) << "Memory at " << (void*)v;
+  CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
+  v[pagesize - 1] = '1';
+
+  x = 0;
+  CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
+  CHECK_EQ(x, 1);
+}
+
+TEST(RE2, FullMatchTypeTests) {
+  // Type tests
+  string zeros(100, '0');
+  {
+    char c;
+    CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
+    CHECK_EQ(c, 'H');
+  }
+  {
+    unsigned char c;
+    CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
+    CHECK_EQ(c, static_cast<unsigned char>('H'));
+  }
+  {
+    int16 v;
+    CHECK(RE2::FullMatch("100",     "(-?\\d+)", &v));    CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("-100",    "(-?\\d+)", &v));    CHECK_EQ(v, -100);
+    CHECK(RE2::FullMatch("32767",   "(-?\\d+)", &v));    CHECK_EQ(v, 32767);
+    CHECK(RE2::FullMatch("-32768",  "(-?\\d+)", &v));    CHECK_EQ(v, -32768);
+    CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v));
+    CHECK(!RE2::FullMatch("32768",  "(-?\\d+)", &v));
+  }
+  {
+    uint16 v;
+    CHECK(RE2::FullMatch("100",     "(\\d+)", &v));    CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("32767",   "(\\d+)", &v));    CHECK_EQ(v, 32767);
+    CHECK(RE2::FullMatch("65535",   "(\\d+)", &v));    CHECK_EQ(v, 65535);
+    CHECK(!RE2::FullMatch("65536",  "(\\d+)", &v));
+  }
+  {
+    int32 v;
+    static const int32 max = 0x7fffffff;
+    static const int32 min = -max - 1;
+    CHECK(RE2::FullMatch("100",          "(-?\\d+)", &v)); CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("-100",         "(-?\\d+)", &v)); CHECK_EQ(v, -100);
+    CHECK(RE2::FullMatch("2147483647",   "(-?\\d+)", &v)); CHECK_EQ(v, max);
+    CHECK(RE2::FullMatch("-2147483648",  "(-?\\d+)", &v)); CHECK_EQ(v, min);
+    CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
+    CHECK(!RE2::FullMatch("2147483648",  "(-?\\d+)", &v));
+
+    CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
+    CHECK_EQ(v, max);
+    CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
+    CHECK_EQ(v, min);
+
+    CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
+    CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
+    CHECK_EQ(v, max);
+    CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
+  }
+  {
+    uint32 v;
+    static const uint32 max = 0xfffffffful;
+    CHECK(RE2::FullMatch("100",         "(\\d+)", &v)); CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("4294967295",  "(\\d+)", &v)); CHECK_EQ(v, max);
+    CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v));
+    CHECK(!RE2::FullMatch("-1",         "(\\d+)", &v));
+
+    CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
+  }
+  {
+    int64 v;
+    static const int64 max = 0x7fffffffffffffffull;
+    static const int64 min = -max - 1;
+    char buf[32];
+
+    CHECK(RE2::FullMatch("100",  "(-?\\d+)", &v)); CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
+
+    snprintf(buf, sizeof(buf), "%lld", (long long int)max);
+    CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, max);
+
+    snprintf(buf, sizeof(buf), "%lld", (long long int)min);
+    CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, min);
+
+    snprintf(buf, sizeof(buf), "%lld", (long long int)max);
+    assert(buf[strlen(buf)-1] != '9');
+    buf[strlen(buf)-1]++;
+    CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
+
+    snprintf(buf, sizeof(buf), "%lld", (long long int)min);
+    assert(buf[strlen(buf)-1] != '9');
+    buf[strlen(buf)-1]++;
+    CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
+  }
+  {
+    uint64 v;
+    int64 v2;
+    static const uint64 max = 0xffffffffffffffffull;
+    char buf[32];
+
+    CHECK(RE2::FullMatch("100",  "(-?\\d+)", &v));  CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100);
+
+    snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max);
+    CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, max);
+
+    assert(buf[strlen(buf)-1] != '9');
+    buf[strlen(buf)-1]++;
+    CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
+  }
+}
+
+TEST(RE2, FloatingPointFullMatchTypes) {
+  string zeros(100, '0');
+  {
+    float v;
+    CHECK(RE2::FullMatch("100",   "(.*)", &v));  CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("-100.", "(.*)", &v));  CHECK_EQ(v, -100);
+    CHECK(RE2::FullMatch("1e23",  "(.*)", &v));  CHECK_EQ(v, float(1e23));
+
+    CHECK(RE2::FullMatch(zeros + "1e23",  "(.*)", &v));
+    CHECK_EQ(v, float(1e23));
+
+    // 6700000000081920.1 is an edge case.
+    // 6700000000081920 is exactly halfway between
+    // two float32s, so the .1 should make it round up.
+    // However, the .1 is outside the precision possible with
+    // a float64: the nearest float64 is 6700000000081920.
+    // So if the code uses strtod and then converts to float32,
+    // round-to-even will make it round down instead of up.
+    // To pass the test, the parser must call strtof directly.
+    // This test case is carefully chosen to use only a 17-digit
+    // number, since C does not guarantee to get the correctly
+    // rounded answer for strtod and strtof unless the input is
+    // short.
+    CHECK(RE2::FullMatch("0.1", "(.*)", &v));
+    CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
+    CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
+    CHECK_EQ(v, 6700000000081920.1f)
+      << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
+  }
+  {
+    double v;
+    CHECK(RE2::FullMatch("100",   "(.*)", &v));  CHECK_EQ(v, 100);
+    CHECK(RE2::FullMatch("-100.", "(.*)", &v));  CHECK_EQ(v, -100);
+    CHECK(RE2::FullMatch("1e23",  "(.*)", &v));  CHECK_EQ(v, 1e23);
+    CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
+    CHECK_EQ(v, double(1e23));
+
+    CHECK(RE2::FullMatch("0.1", "(.*)", &v));
+    CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
+    CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
+    CHECK_EQ(v, 1.0000000596046448)
+      << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
+  }
+}
+
+TEST(RE2, FullMatchAnchored) {
+  int i;
+  // Check that matching is fully anchored
+  CHECK(!RE2::FullMatch("x1001", "(\\d+)",  &i));
+  CHECK(!RE2::FullMatch("1001x", "(\\d+)",  &i));
+  CHECK(RE2::FullMatch("x1001",  "x(\\d+)", &i)); CHECK_EQ(i, 1001);
+  CHECK(RE2::FullMatch("1001x",  "(\\d+)x", &i)); CHECK_EQ(i, 1001);
+}
+
+TEST(RE2, FullMatchBraces) {
+  // Braces
+  CHECK(RE2::FullMatch("0abcd",  "[0-9a-f+.-]{5,}"));
+  CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
+  CHECK(!RE2::FullMatch("0abc",  "[0-9a-f+.-]{5,}"));
+}
+
+TEST(RE2, Complicated) {
+  // Complicated RE2
+  CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
+  CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
+  CHECK(RE2::FullMatch("X",   "foo|bar|[A-Z]"));
+  CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]"));
+}
+
+TEST(RE2, FullMatchEnd) {
+  // Check full-match handling (needs '$' tacked on internally)
+  CHECK(RE2::FullMatch("fo", "fo|foo"));
+  CHECK(RE2::FullMatch("foo", "fo|foo"));
+  CHECK(RE2::FullMatch("fo", "fo|foo$"));
+  CHECK(RE2::FullMatch("foo", "fo|foo$"));
+  CHECK(RE2::FullMatch("foo", "foo$"));
+  CHECK(!RE2::FullMatch("foo$bar", "foo\\$"));
+  CHECK(!RE2::FullMatch("fox", "fo|bar"));
+
+  // Uncomment the following if we change the handling of '$' to
+  // prevent it from matching a trailing newline
+  if (false) {
+    // Check that we don't get bitten by pcre's special handling of a
+    // '\n' at the end of the string matching '$'
+    CHECK(!RE2::PartialMatch("foo\n", "foo$"));
+  }
+}
+
+TEST(RE2, FullMatchArgCount) {
+  // Number of args
+  int a[16];
+  CHECK(RE2::FullMatch("", ""));
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("1",
+                      "(\\d){1}",
+                      &a[0]));
+  CHECK_EQ(a[0], 1);
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("12",
+                      "(\\d)(\\d)",
+                      &a[0],  &a[1]));
+  CHECK_EQ(a[0], 1);
+  CHECK_EQ(a[1], 2);
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("123",
+                      "(\\d)(\\d)(\\d)",
+                      &a[0],  &a[1],  &a[2]));
+  CHECK_EQ(a[0], 1);
+  CHECK_EQ(a[1], 2);
+  CHECK_EQ(a[2], 3);
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("1234",
+                      "(\\d)(\\d)(\\d)(\\d)",
+                      &a[0],  &a[1],  &a[2],  &a[3]));
+  CHECK_EQ(a[0], 1);
+  CHECK_EQ(a[1], 2);
+  CHECK_EQ(a[2], 3);
+  CHECK_EQ(a[3], 4);
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("12345",
+                      "(\\d)(\\d)(\\d)(\\d)(\\d)",
+                      &a[0],  &a[1],  &a[2],  &a[3],
+                      &a[4]));
+  CHECK_EQ(a[0], 1);
+  CHECK_EQ(a[1], 2);
+  CHECK_EQ(a[2], 3);
+  CHECK_EQ(a[3], 4);
+  CHECK_EQ(a[4], 5);
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("123456",
+                      "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
+                      &a[0],  &a[1],  &a[2],  &a[3],
+                      &a[4],  &a[5]));
+  CHECK_EQ(a[0], 1);
+  CHECK_EQ(a[1], 2);
+  CHECK_EQ(a[2], 3);
+  CHECK_EQ(a[3], 4);
+  CHECK_EQ(a[4], 5);
+  CHECK_EQ(a[5], 6);
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("1234567",
+                      "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
+                      &a[0],  &a[1],  &a[2],  &a[3],
+                      &a[4],  &a[5],  &a[6]));
+  CHECK_EQ(a[0], 1);
+  CHECK_EQ(a[1], 2);
+  CHECK_EQ(a[2], 3);
+  CHECK_EQ(a[3], 4);
+  CHECK_EQ(a[4], 5);
+  CHECK_EQ(a[5], 6);
+  CHECK_EQ(a[6], 7);
+
+  memset(a, 0, sizeof(0));
+  CHECK(RE2::FullMatch("1234567890123456",
+                      "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
+                      "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
+                      &a[0],  &a[1],  &a[2],  &a[3],
+                      &a[4],  &a[5],  &a[6],  &a[7],
+                      &a[8],  &a[9],  &a[10], &a[11],
+                      &a[12], &a[13], &a[14], &a[15]));
+  CHECK_EQ(a[0], 1);
+  CHECK_EQ(a[1], 2);
+  CHECK_EQ(a[2], 3);
+  CHECK_EQ(a[3], 4);
+  CHECK_EQ(a[4], 5);
+  CHECK_EQ(a[5], 6);
+  CHECK_EQ(a[6], 7);
+  CHECK_EQ(a[7], 8);
+  CHECK_EQ(a[8], 9);
+  CHECK_EQ(a[9], 0);
+  CHECK_EQ(a[10], 1);
+  CHECK_EQ(a[11], 2);
+  CHECK_EQ(a[12], 3);
+  CHECK_EQ(a[13], 4);
+  CHECK_EQ(a[14], 5);
+  CHECK_EQ(a[15], 6);
+}
+
+TEST(RE2, Accessors) {
+  // Check the pattern() accessor
+  {
+    const string kPattern = "http://([^/]+)/.*";
+    const RE2 re(kPattern);
+    CHECK_EQ(kPattern, re.pattern());
+  }
+
+  // Check RE2 error field.
+  {
+    RE2 re("foo");
+    CHECK(re.error().empty());  // Must have no error
+    CHECK(re.ok());
+    CHECK(re.error_code() == RE2::NoError);
+  }
+}
+
+TEST(RE2, UTF8) {
+  // Check UTF-8 handling
+  // Three Japanese characters (nihongo)
+  const char utf8_string[] = {
+       0xe6, 0x97, 0xa5, // 65e5
+       0xe6, 0x9c, 0xac, // 627c
+       0xe8, 0xaa, 0x9e, // 8a9e
+       0
+  };
+  const char utf8_pattern[] = {
+       '.',
+       0xe6, 0x9c, 0xac, // 627c
+       '.',
+       0
+  };
+
+  // Both should match in either mode, bytes or UTF-8
+  RE2 re_test1(".........", RE2::Latin1);
+  CHECK(RE2::FullMatch(utf8_string, re_test1));
+  RE2 re_test2("...");
+  CHECK(RE2::FullMatch(utf8_string, re_test2));
+
+  // Check that '.' matches one byte or UTF-8 character
+  // according to the mode.
+  string s;
+  RE2 re_test3("(.)", RE2::Latin1);
+  CHECK(RE2::PartialMatch(utf8_string, re_test3, &s));
+  CHECK_EQ(s, string("\xe6"));
+  RE2 re_test4("(.)");
+  CHECK(RE2::PartialMatch(utf8_string, re_test4, &s));
+  CHECK_EQ(s, string("\xe6\x97\xa5"));
+
+  // Check that string matches itself in either mode
+  RE2 re_test5(utf8_string, RE2::Latin1);
+  CHECK(RE2::FullMatch(utf8_string, re_test5));
+  RE2 re_test6(utf8_string);
+  CHECK(RE2::FullMatch(utf8_string, re_test6));
+
+  // Check that pattern matches string only in UTF8 mode
+  RE2 re_test7(utf8_pattern, RE2::Latin1);
+  CHECK(!RE2::FullMatch(utf8_string, re_test7));
+  RE2 re_test8(utf8_pattern);
+  CHECK(RE2::FullMatch(utf8_string, re_test8));
+}
+
+TEST(RE2, UngreedyUTF8) {
+  // Check that ungreedy, UTF8 regular expressions don't match when they
+  // oughtn't -- see bug 82246.
+  {
+    // This code always worked.
+    const char* pattern = "\\w+X";
+    const string target = "a aX";
+    RE2 match_sentence(pattern, RE2::Latin1);
+    RE2 match_sentence_re(pattern);
+
+    CHECK(!RE2::FullMatch(target, match_sentence));
+    CHECK(!RE2::FullMatch(target, match_sentence_re));
+  }
+  {
+    const char* pattern = "(?U)\\w+X";
+    const string target = "a aX";
+    RE2 match_sentence(pattern, RE2::Latin1);
+    CHECK_EQ(match_sentence.error(), "");
+    RE2 match_sentence_re(pattern);
+
+    CHECK(!RE2::FullMatch(target, match_sentence));
+    CHECK(!RE2::FullMatch(target, match_sentence_re));
+  }
+}
+
+TEST(RE2, Rejects) {
+  { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); }
+  {
+    RE2 re("a[x", RE2::Quiet);
+    CHECK(!re.ok());
+  }
+  {
+    RE2 re("a[z-a]", RE2::Quiet);
+    CHECK(!re.ok());
+  }
+  {
+    RE2 re("a[[:foobar:]]", RE2::Quiet);
+    CHECK(!re.ok());
+  }
+  {
+    RE2 re("a(b", RE2::Quiet);
+    CHECK(!re.ok());
+  }
+  {
+    RE2 re("a\\", RE2::Quiet);
+    CHECK(!re.ok());
+  }
+}
+
+TEST(RE2, NoCrash) {
+  // Test that using a bad regexp doesn't crash.
+  {
+    RE2 re("a\\", RE2::Quiet);
+    CHECK(!re.ok());
+    CHECK(!RE2::PartialMatch("a\\b", re));
+  }
+
+  // Test that using an enormous regexp doesn't crash
+  {
+    RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
+    CHECK(!re.ok());
+    CHECK(!RE2::PartialMatch("aaa", re));
+  }
+
+  // Test that a crazy regexp still compiles and runs.
+  {
+    RE2 re(".{512}x", RE2::Quiet);
+    CHECK(re.ok());
+    string s;
+    s.append(515, 'c');
+    s.append("x");
+    CHECK(RE2::PartialMatch(s, re));
+  }
+}
+
+TEST(RE2, Recursion) {
+  // Test that recursion is stopped.
+  // This test is PCRE-legacy -- there's no recursion in RE2.
+  int bytes = 15 * 1024;  // enough to crash PCRE
+  TestRecursion(bytes, ".");
+  TestRecursion(bytes, "a");
+  TestRecursion(bytes, "a.");
+  TestRecursion(bytes, "ab.");
+  TestRecursion(bytes, "abc.");
+}
+
+TEST(RE2, BigCountedRepetition) {
+  // Test that counted repetition works, given tons of memory.
+  RE2::Options opt;
+  opt.set_max_mem(256<<20);
+
+  RE2 re(".{512}x", opt);
+  CHECK(re.ok());
+  string s;
+  s.append(515, 'c');
+  s.append("x");
+  CHECK(RE2::PartialMatch(s, re));
+}
+
+TEST(RE2, DeepRecursion) {
+  // Test for deep stack recursion.  This would fail with a
+  // segmentation violation due to stack overflow before pcre was
+  // patched.
+  // Again, a PCRE legacy test.  RE2 doesn't recurse.
+  string comment("x*");
+  string a(131072, 'a');
+  comment += a;
+  comment += "*x";
+  RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
+  CHECK(RE2::FullMatch(comment, re));
+}
+
+// Suggested by Josh Hyman.  Failed when SearchOnePass was
+// not implementing case-folding.
+TEST(CaseInsensitive, MatchAndConsume) {
+  string result;
+  string text = "A fish named *Wanda*";
+  StringPiece sp(text);
+
+  EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
+  EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
+}
+
+// RE2 should permit implicit conversions from string, StringPiece, const char*,
+// and C string literals.
+TEST(RE2, ImplicitConversions) {
+  string re_string(".");
+  StringPiece re_stringpiece(".");
+  const char* re_cstring = ".";
+  EXPECT_TRUE(RE2::PartialMatch("e", re_string));
+  EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
+  EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
+  EXPECT_TRUE(RE2::PartialMatch("e", "."));
+}
+
+// Bugs introduced by 8622304
+TEST(RE2, CL8622304) {
+  // reported by ingow
+  string dir;
+  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])"));  // ok
+  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir));  // fails
+
+  // reported by jacobsa
+  string key, val;
+  EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
+              "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
+              &key,
+              &val));
+  EXPECT_EQ(key, "bar");
+  EXPECT_EQ(val, "1,0x2F,030,4,5");
+}
+
+
+// Check that RE2 returns correct regexp pieces on error.
+// In particular, make sure it returns whole runes
+// and that it always reports invalid UTF-8.
+// Also check that Perl error flag piece is big enough.
+static struct ErrorTest {
+  const char *regexp;
+  const char *error;
+} error_tests[] = {
+  { "ab\\αcd", "\\α" },
+  { "ef\\x☺01", "\\x☺0" },
+  { "gh\\x1☺01", "\\x1☺" },
+  { "ij\\x1", "\\x1" },
+  { "kl\\x", "\\x" },
+  { "uv\\x{0000☺}", "\\x{0000☺" },
+  { "wx\\p{ABC", "\\p{ABC" },
+  { "yz(?smiUX:abc)", "(?smiUX" },   // used to return (?s but the error is X
+  { "aa(?sm☺i", "(?sm☺" },
+  { "bb[abc", "[abc" },
+
+  { "mn\\x1\377", "" },  // no argument string returned for invalid UTF-8
+  { "op\377qr", "" },
+  { "st\\x{00000\377", "" },
+  { "zz\\p{\377}", "" },
+  { "zz\\x{00\377}", "" },
+  { "zz(?P<name\377>abc)", "" },
+};
+TEST(RE2, ErrorArgs) {
+  for (int i = 0; i < arraysize(error_tests); i++) {
+    RE2 re(error_tests[i].regexp, RE2::Quiet);
+    EXPECT_FALSE(re.ok());
+    EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
+  }
+}
+
+// Check that "never match \n" mode never matches \n.
+static struct NeverTest {
+  const char* regexp;
+  const char* text;
+  const char* match;
+} never_tests[] = {
+  { "(.*)", "abc\ndef\nghi\n", "abc" },
+  { "(?s)(abc.*def)", "abc\ndef\n", NULL },
+  { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
+  { "(abc[^x]*def)", "abc\ndef\n", NULL },
+  { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
+};
+TEST(RE2, NeverNewline) {
+  RE2::Options opt;
+  opt.set_never_nl(true);
+  for (int i = 0; i < arraysize(never_tests); i++) {
+    const NeverTest& t = never_tests[i];
+    RE2 re(t.regexp, opt);
+    if (t.match == NULL) {
+      EXPECT_FALSE(re.PartialMatch(t.text, re));
+    } else {
+      StringPiece m;
+      EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
+      EXPECT_EQ(m, t.match);
+    }
+  }
+}
+
+// Check that there are no capturing groups in "never capture" mode.
+TEST(RE2, NeverCapture) {
+  RE2::Options opt;
+  opt.set_never_capture(true);
+  RE2 re("(r)(e)", opt);
+  EXPECT_EQ(0, re.NumberOfCapturingGroups());
+}
+
+// Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
+// Triggered by a failed DFA search falling back to Bitstate when
+// using Match with a NULL submatch set.  Bitstate tried to read
+// the submatch[0] entry even if nsubmatch was 0.
+TEST(RE2, BitstateCaptureBug) {
+  RE2::Options opt;
+  opt.set_max_mem(20000);
+  RE2 re("(_________$)", opt);
+  StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
+  EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
+}
+
+// C++ version of bug 609710.
+TEST(RE2, UnicodeClasses) {
+  const string str = "ABCDEFGHI譚永鋒";
+  string a, b, c;
+
+  EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
+  EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
+  EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
+  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
+  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
+  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
+  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
+  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
+  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
+  EXPECT_EQ("A", a);
+  EXPECT_EQ("B", b);
+  EXPECT_EQ("C", c);
+
+  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
+  EXPECT_EQ("A", a);
+  EXPECT_EQ("B", b);
+  EXPECT_EQ("C", c);
+
+  EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
+
+  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
+  EXPECT_EQ("A", a);
+  EXPECT_EQ("B", b);
+  EXPECT_EQ("C", c);
+
+  EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
+
+  EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
+  EXPECT_EQ("譚", a);
+  EXPECT_EQ("永", b);
+  EXPECT_EQ("鋒", c);
+}
+
+// Bug reported by saito. 2009/02/17
+TEST(RE2, NullVsEmptyString) {
+  RE2 re2(".*");
+  StringPiece v1("");
+  EXPECT_TRUE(RE2::FullMatch(v1, re2));
+
+  StringPiece v2;
+  EXPECT_TRUE(RE2::FullMatch(v2, re2));
+}
+
+// Issue 1816809
+TEST(RE2, Bug1816809) {
+  RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
+  StringPiece piece("llx-3;llx4");
+  string x;
+  EXPECT_TRUE(RE2::Consume(&piece, re, &x));
+}
+
+// Issue 3061120
+TEST(RE2, Bug3061120) {
+  RE2 re("(?i)\\W");
+  EXPECT_FALSE(RE2::PartialMatch("x", re));  // always worked
+  EXPECT_FALSE(RE2::PartialMatch("k", re));  // broke because of kelvin
+  EXPECT_FALSE(RE2::PartialMatch("s", re));  // broke because of latin long s
+}
+
+TEST(RE2, CapturingGroupNames) {
+  // Opening parentheses annotated with group IDs:
+  //      12    3        45   6         7
+  RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
+  EXPECT_TRUE(re.ok());
+  const map<int, string>& have = re.CapturingGroupNames();
+  map<int, string> want;
+  want[3] = "G2";
+  want[6] = "G2";
+  want[7] = "G1";
+  EXPECT_EQ(want, have);
+}
+
+TEST(RE2, RegexpToStringLossOfAnchor) {
+  EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
+  EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
+  EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
+  EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
+}
+
+}  // namespace re2

diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
new file mode 100644
index 0000000..ca7627f
--- /dev/null
+++ b/re2/testing/regexp_benchmark.cc

@@ -0,0 +1,1461 @@
+// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Benchmarks for regular expression implementations.
+
+#include "util/test.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+#include "util/pcre.h"
+#include "util/benchmark.h"
+
+namespace re2 {
+void Test();
+void MemoryUsage();
+}  // namespace re2
+
+typedef testing::MallocCounter MallocCounter;
+
+namespace re2 {
+
+void Test() {
+  Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  CHECK(prog->IsOnePass());
+  const char* text = "650-253-0001";
+  StringPiece sp[4];
+  CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  CHECK_EQ(sp[0], "650-253-0001");
+  CHECK_EQ(sp[1], "650");
+  CHECK_EQ(sp[2], "253");
+  CHECK_EQ(sp[3], "0001");
+  delete prog;
+  re->Decref();
+  LOG(INFO) << "test passed\n";
+}
+
+void MemoryUsage() {
+  const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
+  const char* text = "650-253-0001";
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
+    // because LOG(INFO) might do a big allocation before they get evaluated.
+    fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+    mc.Reset();
+
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    fprintf(stderr, "Prog:   %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+    mc.Reset();
+
+    StringPiece sp[4];
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+    delete prog;
+    re->Decref();
+  }
+
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+
+    PCRE re(regexp, PCRE::UTF8);
+    fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+    PCRE::FullMatch(text, re);
+    fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+  }
+
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+
+    PCRE* re = new PCRE(regexp, PCRE::UTF8);
+    fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+    PCRE::FullMatch(text, *re);
+    fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+    delete re;
+  }
+
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+
+    RE2 re(regexp);
+    fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+    RE2::FullMatch(text, re);
+    fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
+  }
+
+  fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n",
+          static_cast<int>(sizeof(PCRE)),
+          static_cast<int>(sizeof(RE2)),
+          static_cast<int>(sizeof(Prog)),
+          static_cast<int>(sizeof(Prog::Inst)));
+}
+
+// Regular expression implementation wrappers.
+// Defined at bottom of file, but they are repetitive
+// and not interesting.
+
+typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
+             Prog::Anchor anchor, bool expect_match);
+
+SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
+           SearchPCRE, SearchRE2,
+           SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
+           SearchCachedPCRE, SearchCachedRE2;
+
+typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
+
+ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
+          Parse1PCRE, Parse1RE2,
+          Parse1Backtrack,
+          Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
+          Parse1CachedPCRE, Parse1CachedRE2,
+          Parse1CachedBacktrack;
+
+ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
+          Parse3PCRE, Parse3RE2,
+          Parse3Backtrack,
+          Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
+          Parse3CachedPCRE, Parse3CachedRE2,
+          Parse3CachedBacktrack;
+
+ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
+
+ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
+
+// Benchmark: failed search for regexp in random text.
+
+// Generate random text that won't contain the search string,
+// to test worst-case search behavior.
+void MakeText(string* text, int nbytes) {
+  text->resize(nbytes);
+  srand(0);
+  for (int i = 0; i < nbytes; i++) {
+    if (!rand()%30)
+      (*text)[i] = '\n';
+    else
+      (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20;
+  }
+}
+
+// Makes text of size nbytes, then calls run to search
+// the text for regexp iters times.
+void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
+  StopBenchmarkTiming();
+  string s;
+  MakeText(&s, nbytes);
+  BenchmarkMemoryUsage();
+  StartBenchmarkTiming();
+  search(iters, regexp, s, Prog::kUnanchored, false);
+  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
+}
+
+// These two are easy because they start with an A,
+// giving the search loop something to memchr for.
+#define EASY0      "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+#define EASY1      "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
+
+// This is a little harder, since it starts with a character class
+// and thus can't be memchr'ed.  Could look for ABC and work backward,
+// but no one does that.
+#define MEDIUM     "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+
+// This is a fair amount harder, because of the leading [ -~]*.
+// A bad backtracking implementation will take O(text^2) time to
+// figure out there's no match.
+#define HARD       "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+
+// This stresses engines that are trying to track parentheses.
+#define PARENS     "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
+                   "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
+
+void Search_Easy0_CachedDFA(int i, int n)     { Search(i, n, EASY0, SearchCachedDFA); }
+void Search_Easy0_CachedNFA(int i, int n)     { Search(i, n, EASY0, SearchCachedNFA); }
+void Search_Easy0_CachedPCRE(int i, int n)    { Search(i, n, EASY0, SearchCachedPCRE); }
+void Search_Easy0_CachedRE2(int i, int n)     { Search(i, n, EASY0, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Easy0_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Easy0_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Easy0_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Easy0_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Easy1_CachedDFA(int i, int n)     { Search(i, n, EASY1, SearchCachedDFA); }
+void Search_Easy1_CachedNFA(int i, int n)     { Search(i, n, EASY1, SearchCachedNFA); }
+void Search_Easy1_CachedPCRE(int i, int n)    { Search(i, n, EASY1, SearchCachedPCRE); }
+void Search_Easy1_CachedRE2(int i, int n)     { Search(i, n, EASY1, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Easy1_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Easy1_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Easy1_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Easy1_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Medium_CachedDFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedDFA); }
+void Search_Medium_CachedNFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedNFA); }
+void Search_Medium_CachedPCRE(int i, int n)    { Search(i, n, MEDIUM, SearchCachedPCRE); }
+void Search_Medium_CachedRE2(int i, int n)     { Search(i, n, MEDIUM, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Medium_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Medium_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Medium_CachedPCRE,    8, 256<<10)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Medium_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Hard_CachedDFA(int i, int n)     { Search(i, n, HARD, SearchCachedDFA); }
+void Search_Hard_CachedNFA(int i, int n)     { Search(i, n, HARD, SearchCachedNFA); }
+void Search_Hard_CachedPCRE(int i, int n)    { Search(i, n, HARD, SearchCachedPCRE); }
+void Search_Hard_CachedRE2(int i, int n)     { Search(i, n, HARD, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Hard_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Hard_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Hard_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Hard_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Parens_CachedDFA(int i, int n)     { Search(i, n, PARENS, SearchCachedDFA); }
+void Search_Parens_CachedNFA(int i, int n)     { Search(i, n, PARENS, SearchCachedNFA); }
+void Search_Parens_CachedPCRE(int i, int n)    { Search(i, n, PARENS, SearchCachedPCRE); }
+void Search_Parens_CachedRE2(int i, int n)     { Search(i, n, PARENS, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Parens_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Parens_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Parens_CachedPCRE,    8, 8)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Parens_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
+  StopBenchmarkTiming();
+  string s;
+  s.append(nbytes/2, 'x');
+  string regexp = "^" + s + ".*$";
+  string t;
+  MakeText(&t, nbytes/2);
+  s += t;
+  BenchmarkMemoryUsage();
+  StartBenchmarkTiming();
+  search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
+  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
+}
+
+void Search_BigFixed_CachedDFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedDFA); }
+void Search_BigFixed_CachedNFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedNFA); }
+void Search_BigFixed_CachedPCRE(int i, int n)    { SearchBigFixed(i, n, SearchCachedPCRE); }
+void Search_BigFixed_CachedRE2(int i, int n)     { SearchBigFixed(i, n, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_BigFixed_CachedDFA,     8, 1<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_BigFixed_CachedNFA,     8, 32<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_BigFixed_CachedPCRE,    8, 32<<10)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_BigFixed_CachedRE2,     8, 1<<20)->ThreadRange(1, NumCPUs());
+
+// Benchmark: FindAndConsume
+void FindAndConsume(int iters, int nbytes) {
+  StopBenchmarkTiming();
+  string s;
+  MakeText(&s, nbytes);
+  s.append("Hello World");
+  StartBenchmarkTiming();
+  RE2 re("((Hello World))");
+  for (int i = 0; i < iters; i++) {
+    StringPiece t = s;
+    StringPiece u;
+    CHECK(RE2::FindAndConsume(&t, re, &u));
+    CHECK_EQ(u, "Hello World");
+  }
+  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
+}
+
+BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
+
+// Benchmark: successful anchored search.
+
+void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
+  string s;
+  MakeText(&s, nbytes);
+  BenchmarkMemoryUsage();
+  search(iters, regexp, s, Prog::kAnchored, true);
+  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
+}
+
+// Unambiguous search (RE2 can use OnePass).
+
+void Search_Success_DFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchDFA); }
+void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
+void Search_Success_PCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchPCRE); }
+void Search_Success_RE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchRE2); }
+
+BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Success_CachedDFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
+void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
+void Search_Success_CachedPCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
+void Search_Success_CachedRE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
+
+// Ambiguous search (RE2 cannot use OnePass).
+
+void Search_Success1_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchDFA); }
+void Search_Success1_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchPCRE); }
+void Search_Success1_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchRE2); }
+void Search_Success1_BitState(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchBitState); }
+
+BENCHMARK_RANGE(Search_Success1_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success1_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success1_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Success1_Cached_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
+void Search_Success1_Cached_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
+void Search_Success1_Cached_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Success1_Cached_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success1_Cached_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success1_Cached_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+// Benchmark: use regexp to find phone number.
+
+void SearchDigits(int iters, SearchImpl* search) {
+  const char *text = "650-253-0001";
+  int len = strlen(text);
+  BenchmarkMemoryUsage();
+  search(iters, "([0-9]+)-([0-9]+)-([0-9]+)",
+         StringPiece(text, len), Prog::kAnchored, true);
+  SetBenchmarkItemsProcessed(iters);
+}
+
+void Search_Digits_DFA(int i)         { SearchDigits(i, SearchDFA); }
+void Search_Digits_NFA(int i)         { SearchDigits(i, SearchNFA); }
+void Search_Digits_OnePass(int i)     { SearchDigits(i, SearchOnePass); }
+void Search_Digits_PCRE(int i)        { SearchDigits(i, SearchPCRE); }
+void Search_Digits_RE2(int i)         { SearchDigits(i, SearchRE2); }
+void Search_Digits_BitState(int i)         { SearchDigits(i, SearchBitState); }
+
+BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
+
+// Benchmark: use regexp to parse digit fields in phone number.
+
+void Parse3Digits(int iters,
+               void (*parse3)(int, const char*, const StringPiece&)) {
+  BenchmarkMemoryUsage();
+  parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
+  SetBenchmarkItemsProcessed(iters);
+}
+
+void Parse_Digits_NFA(int i)         { Parse3Digits(i, Parse3NFA); }
+void Parse_Digits_OnePass(int i)     { Parse3Digits(i, Parse3OnePass); }
+void Parse_Digits_PCRE(int i)        { Parse3Digits(i, Parse3PCRE); }
+void Parse_Digits_RE2(int i)         { Parse3Digits(i, Parse3RE2); }
+void Parse_Digits_Backtrack(int i)   { Parse3Digits(i, Parse3Backtrack); }
+void Parse_Digits_BitState(int i)   { Parse3Digits(i, Parse3BitState); }
+
+BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedDigits_NFA(int i)         { Parse3Digits(i, Parse3CachedNFA); }
+void Parse_CachedDigits_OnePass(int i)     { Parse3Digits(i, Parse3CachedOnePass); }
+void Parse_CachedDigits_PCRE(int i)        { Parse3Digits(i, Parse3CachedPCRE); }
+void Parse_CachedDigits_RE2(int i)         { Parse3Digits(i, Parse3CachedRE2); }
+void Parse_CachedDigits_Backtrack(int i)   { Parse3Digits(i, Parse3CachedBacktrack); }
+void Parse_CachedDigits_BitState(int i)   { Parse3Digits(i, Parse3CachedBitState); }
+
+BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse3DigitDs(int iters,
+               void (*parse3)(int, const char*, const StringPiece&)) {
+  BenchmarkMemoryUsage();
+  parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
+  SetBenchmarkItemsProcessed(iters);
+}
+
+void Parse_DigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3NFA); }
+void Parse_DigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3OnePass); }
+void Parse_DigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3PCRE); }
+void Parse_DigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3RE2); }
+void Parse_DigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
+void Parse_DigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
+
+BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedDigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3CachedNFA); }
+void Parse_CachedDigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3CachedOnePass); }
+void Parse_CachedDigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3CachedPCRE); }
+void Parse_CachedDigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3CachedRE2); }
+void Parse_CachedDigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
+void Parse_CachedDigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
+
+BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
+
+// Benchmark: splitting off leading number field.
+
+void Parse1Split(int iters,
+              void (*parse1)(int, const char*, const StringPiece&)) {
+  BenchmarkMemoryUsage();
+  parse1(iters, "[0-9]+-(.*)", "650-253-0001");
+  SetBenchmarkItemsProcessed(iters);
+}
+
+void Parse_Split_NFA(int i)         { Parse1Split(i, Parse1NFA); }
+void Parse_Split_OnePass(int i)     { Parse1Split(i, Parse1OnePass); }
+void Parse_Split_PCRE(int i)        { Parse1Split(i, Parse1PCRE); }
+void Parse_Split_RE2(int i)         { Parse1Split(i, Parse1RE2); }
+void Parse_Split_BitState(int i)         { Parse1Split(i, Parse1BitState); }
+
+BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedSplit_NFA(int i)         { Parse1Split(i, Parse1CachedNFA); }
+void Parse_CachedSplit_OnePass(int i)     { Parse1Split(i, Parse1CachedOnePass); }
+void Parse_CachedSplit_PCRE(int i)        { Parse1Split(i, Parse1CachedPCRE); }
+void Parse_CachedSplit_RE2(int i)         { Parse1Split(i, Parse1CachedRE2); }
+void Parse_CachedSplit_BitState(int i)         { Parse1Split(i, Parse1CachedBitState); }
+
+BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
+
+// Benchmark: splitting off leading number field but harder (ambiguous regexp).
+
+void Parse1SplitHard(int iters,
+                  void (*run)(int, const char*, const StringPiece&)) {
+  BenchmarkMemoryUsage();
+  run(iters, "[0-9]+.(.*)", "650-253-0001");
+  SetBenchmarkItemsProcessed(iters);
+}
+
+void Parse_SplitHard_NFA(int i)         { Parse1SplitHard(i, Parse1NFA); }
+void Parse_SplitHard_PCRE(int i)        { Parse1SplitHard(i, Parse1PCRE); }
+void Parse_SplitHard_RE2(int i)         { Parse1SplitHard(i, Parse1RE2); }
+void Parse_SplitHard_BitState(int i)         { Parse1SplitHard(i, Parse1BitState); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedSplitHard_NFA(int i)       { Parse1SplitHard(i, Parse1CachedNFA); }
+void Parse_CachedSplitHard_PCRE(int i)      { Parse1SplitHard(i, Parse1CachedPCRE); }
+void Parse_CachedSplitHard_RE2(int i)       { Parse1SplitHard(i, Parse1CachedRE2); }
+void Parse_CachedSplitHard_BitState(int i)       { Parse1SplitHard(i, Parse1CachedBitState); }
+void Parse_CachedSplitHard_Backtrack(int i)       { Parse1SplitHard(i, Parse1CachedBacktrack); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
+
+// Benchmark: Parse1SplitHard, big text, small match.
+
+void Parse1SplitBig1(int iters,
+                  void (*run)(int, const char*, const StringPiece&)) {
+  string s;
+  s.append(100000, 'x');
+  s.append("650-253-0001");
+  BenchmarkMemoryUsage();
+  run(iters, "[0-9]+.(.*)", s);
+  SetBenchmarkItemsProcessed(iters);
+}
+
+void Parse_CachedSplitBig1_PCRE(int i)      { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
+void Parse_CachedSplitBig1_RE2(int i)       { Parse1SplitBig1(i, SearchParse1CachedRE2); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
+
+// Benchmark: Parse1SplitHard, big text, big match.
+
+void Parse1SplitBig2(int iters,
+                  void (*run)(int, const char*, const StringPiece&)) {
+  string s;
+  s.append("650-253-");
+  s.append(100000, '0');
+  BenchmarkMemoryUsage();
+  run(iters, "[0-9]+.(.*)", s);
+  SetBenchmarkItemsProcessed(iters);
+}
+
+void Parse_CachedSplitBig2_PCRE(int i)      { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
+void Parse_CachedSplitBig2_RE2(int i)       { Parse1SplitBig2(i, SearchParse1CachedRE2); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
+
+// Benchmark: measure time required to parse (but not execute)
+// a simple regular expression.
+
+void ParseRegexp(int iters, const string& regexp) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    re->Decref();
+  }
+}
+
+void SimplifyRegexp(int iters, const string& regexp) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Regexp* sre = re->Simplify();
+    CHECK(sre);
+    sre->Decref();
+    re->Decref();
+  }
+}
+
+void NullWalkRegexp(int iters, const string& regexp) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  for (int i = 0; i < iters; i++) {
+    re->NullWalk();
+  }
+  re->Decref();
+}
+
+void SimplifyCompileRegexp(int iters, const string& regexp) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Regexp* sre = re->Simplify();
+    CHECK(sre);
+    Prog* prog = sre->CompileToProg(0);
+    CHECK(prog);
+    delete prog;
+    sre->Decref();
+    re->Decref();
+  }
+}
+
+void CompileRegexp(int iters, const string& regexp) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void CompileToProg(int iters, const string& regexp) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  for (int i = 0; i < iters; i++) {
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    delete prog;
+  }
+  re->Decref();
+}
+
+void CompileByteMap(int iters, const string& regexp) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  for (int i = 0; i < iters; i++) {
+    prog->ComputeByteMap();
+  }
+  delete prog;
+  re->Decref();
+}
+
+void CompilePCRE(int iters, const string& regexp) {
+  for (int i = 0; i < iters; i++) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+  }
+}
+
+void CompileRE2(int iters, const string& regexp) {
+  for (int i = 0; i < iters; i++) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+  }
+}
+
+void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
+  run(iters, regexp);
+  SetBenchmarkItemsProcessed(iters);
+}
+
+}  // namespace re2
+
+DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
+
+namespace re2 {
+
+void BM_PCRE_Compile(int i)      { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
+void BM_Regexp_Parse(int i)      { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
+void BM_Regexp_Simplify(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
+void BM_CompileToProg(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
+void BM_CompileByteMap(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
+void BM_Regexp_Compile(int i)    { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
+void BM_Regexp_SimplifyCompile(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
+void BM_Regexp_NullWalk(int i)   { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
+void BM_RE2_Compile(int i)       { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
+
+#ifdef USEPCRE
+BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
+
+
+// Makes text of size nbytes, then calls run to search
+// the text for regexp iters times.
+void SearchPhone(int iters, int nbytes, ParseImpl* search) {
+  StopBenchmarkTiming();
+  string s;
+  MakeText(&s, nbytes);
+  s.append("(650) 253-0001");
+  BenchmarkMemoryUsage();
+  StartBenchmarkTiming();
+  search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
+  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
+}
+
+void SearchPhone_CachedPCRE(int i, int n) {
+  SearchPhone(i, n, SearchParse2CachedPCRE);
+}
+void SearchPhone_CachedRE2(int i, int n) {
+  SearchPhone(i, n, SearchParse2CachedRE2);
+}
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
+
+/*
+TODO(rsc): Make this work again.
+
+// Generates and returns a string over binary alphabet {0,1} that contains
+// all possible binary sequences of length n as subsequences.  The obvious
+// brute force method would generate a string of length n * 2^n, but this
+// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
+// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
+static string DeBruijnString(int n) {
+  CHECK_LT(n, 8*sizeof(int));
+  CHECK_GT(n, 0);
+
+  vector<bool> did(1<<n);
+  for (int i = 0; i < 1<<n; i++)
+    did[i] = false;
+
+  string s;
+  for (int i = 0; i < n-1; i++)
+    s.append("0");
+  int bits = 0;
+  int mask = (1<<n) - 1;
+  for (int i = 0; i < (1<<n); i++) {
+    bits <<= 1;
+    bits &= mask;
+    if (!did[bits|1]) {
+      bits |= 1;
+      s.append("1");
+    } else {
+      s.append("0");
+    }
+    CHECK(!did[bits]);
+    did[bits] = true;
+  }
+  return s;
+}
+
+void CacheFill(int iters, int n, SearchImpl *srch) {
+  string s = DeBruijnString(n+1);
+  string t;
+  for (int i = n+1; i < 20; i++) {
+    t = s + s;
+    swap(s, t);
+  }
+  srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
+       Prog::kUnanchored, true);
+  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size());
+}
+
+void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
+void CacheFillRE2(int i, int n)  { CacheFill(i, n, SearchCachedRE2); }
+void CacheFillNFA(int i, int n)  { CacheFill(i, n, SearchCachedNFA); }
+void CacheFillDFA(int i, int n)  { CacheFill(i, n, SearchCachedDFA); }
+
+// BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
+// for the static BenchmarkRegisterer, which makes it unusable inside
+// a macro like DO24 below.  MY_BENCHMARK_WITH_ARG uses the argument a
+// to make the identifiers distinct (only possible when 'a' is a simple
+// expression like 2, not like 1+1).
+#define MY_BENCHMARK_WITH_ARG(n, a) \
+  bool __benchmark_ ## n ## a =     \
+    (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
+
+#define DO24(A, B) \
+  A(B, 1);    A(B, 2);    A(B, 3);    A(B, 4);    A(B, 5);    A(B, 6);  \
+  A(B, 7);    A(B, 8);    A(B, 9);    A(B, 10);   A(B, 11);   A(B, 12); \
+  A(B, 13);   A(B, 14);   A(B, 15);   A(B, 16);   A(B, 17);   A(B, 18); \
+  A(B, 19);   A(B, 20);   A(B, 21);   A(B, 22);   A(B, 23);   A(B, 24);
+
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
+
+#undef DO24
+#undef MY_BENCHMARK_WITH_ARG
+*/
+
+////////////////////////////////////////////////////////////////////////
+//
+// Implementation routines.  Sad that there are so many,
+// but all the interfaces are slightly different.
+
+// Runs implementation to search for regexp in text, iters times.
+// Expect_match says whether the regexp should be found.
+// Anchored says whether to run an anchored search.
+
+void SearchDFA(int iters, const char* regexp, const StringPiece& text,
+            Prog::Anchor anchor, bool expect_match) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    bool failed = false;
+    CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch,
+                             NULL, &failed, NULL),
+             expect_match);
+    CHECK(!failed);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchNFA(int iters, const char* regexp, const StringPiece& text,
+            Prog::Anchor anchor, bool expect_match) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
+            Prog::Anchor anchor, bool expect_match) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchBitState(int iters, const char* regexp, const StringPiece& text,
+            Prog::Anchor anchor, bool expect_match) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
+                Prog::Anchor anchor, bool expect_match) {
+  for (int i = 0; i < iters; i++) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
+  }
+}
+
+void SearchRE2(int iters, const char* regexp, const StringPiece& text,
+               Prog::Anchor anchor, bool expect_match) {
+  for (int i = 0; i < iters; i++) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(RE2::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
+  }
+}
+
+// SearchCachedXXX is like SearchXXX but only does the
+// regexp parsing and compiling once.  This lets us measure
+// search time without the per-regexp overhead.
+
+void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
+                     Prog::Anchor anchor, bool expect_match) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(1LL<<31);
+  CHECK(prog);
+  for (int i = 0; i < iters; i++) {
+    bool failed = false;
+    CHECK_EQ(prog->SearchDFA(text, NULL, anchor,
+                             Prog::kFirstMatch, NULL, &failed, NULL),
+             expect_match);
+    CHECK(!failed);
+  }
+  delete prog;
+  re->Decref();
+}
+
+void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
+                     Prog::Anchor anchor, bool expect_match) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  for (int i = 0; i < iters; i++) {
+    CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+  }
+  delete prog;
+  re->Decref();
+}
+
+void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
+                     Prog::Anchor anchor, bool expect_match) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  CHECK(prog->IsOnePass());
+  for (int i = 0; i < iters; i++)
+    CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+  delete prog;
+  re->Decref();
+}
+
+void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
+                     Prog::Anchor anchor, bool expect_match) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  for (int i = 0; i < iters; i++)
+    CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+  delete prog;
+  re->Decref();
+}
+
+void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
+                     Prog::Anchor anchor, bool expect_match) {
+  PCRE re(regexp, PCRE::UTF8);
+  CHECK_EQ(re.error(), "");
+  for (int i = 0; i < iters; i++) {
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
+  }
+}
+
+void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
+                     Prog::Anchor anchor, bool expect_match) {
+  RE2 re(regexp);
+  CHECK_EQ(re.error(), "");
+  for (int i = 0; i < iters; i++) {
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(RE2::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
+  }
+}
+
+
+// Runs implementation to full match regexp against text,
+// extracting three submatches.  Expects match always.
+
+void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1, sp2, sp3;
+    CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1, sp2, sp3;
+    CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++) {
+    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  }
+  delete prog;
+  re->Decref();
+}
+
+void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  CHECK(prog->IsOnePass());
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++)
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  delete prog;
+  re->Decref();
+}
+
+void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++)
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  delete prog;
+  re->Decref();
+}
+
+void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++)
+    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  delete prog;
+  re->Decref();
+}
+
+void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
+  PCRE re(regexp, PCRE::UTF8);
+  CHECK_EQ(re.error(), "");
+  StringPiece sp1, sp2, sp3;
+  for (int i = 0; i < iters; i++) {
+    CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
+  RE2 re(regexp);
+  CHECK_EQ(re.error(), "");
+  StringPiece sp1, sp2, sp3;
+  for (int i = 0; i < iters; i++) {
+    CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+
+// Runs implementation to full match regexp against text,
+// extracting three submatches.  Expects match always.
+
+void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[2];  // 2 because sp[0] is whole match.
+    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    StringPiece sp[2];  // 2 because sp[0] is whole match.
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[2];  // 2 because sp[0] is whole match.
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1;
+    CHECK(PCRE::FullMatch(text, re, &sp1));
+  }
+}
+
+void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
+  for (int i = 0; i < iters; i++) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1;
+    CHECK(RE2::FullMatch(text, re, &sp1));
+  }
+}
+
+void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++) {
+    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+  }
+  delete prog;
+  re->Decref();
+}
+
+void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  CHECK(prog->IsOnePass());
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++)
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+  delete prog;
+  re->Decref();
+}
+
+void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++)
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+  delete prog;
+  re->Decref();
+}
+
+void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (int i = 0; i < iters; i++)
+    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+  delete prog;
+  re->Decref();
+}
+
+void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
+  PCRE re(regexp, PCRE::UTF8);
+  CHECK_EQ(re.error(), "");
+  StringPiece sp1;
+  for (int i = 0; i < iters; i++) {
+    CHECK(PCRE::FullMatch(text, re, &sp1));
+  }
+}
+
+void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
+  RE2 re(regexp);
+  CHECK_EQ(re.error(), "");
+  StringPiece sp1;
+  for (int i = 0; i < iters; i++) {
+    CHECK(RE2::FullMatch(text, re, &sp1));
+  }
+}
+
+void SearchParse2CachedPCRE(int iters, const char* regexp,
+                            const StringPiece& text) {
+  PCRE re(regexp, PCRE::UTF8);
+  CHECK_EQ(re.error(), "");
+  for (int i = 0; i < iters; i++) {
+    StringPiece sp1, sp2;
+    CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
+  }
+}
+
+void SearchParse2CachedRE2(int iters, const char* regexp,
+                           const StringPiece& text) {
+  RE2 re(regexp);
+  CHECK_EQ(re.error(), "");
+  for (int i = 0; i < iters; i++) {
+    StringPiece sp1, sp2;
+    CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
+  }
+}
+
+void SearchParse1CachedPCRE(int iters, const char* regexp,
+                            const StringPiece& text) {
+  PCRE re(regexp, PCRE::UTF8);
+  CHECK_EQ(re.error(), "");
+  for (int i = 0; i < iters; i++) {
+    StringPiece sp1;
+    CHECK(PCRE::PartialMatch(text, re, &sp1));
+  }
+}
+
+void SearchParse1CachedRE2(int iters, const char* regexp,
+                           const StringPiece& text) {
+  RE2 re(regexp);
+  CHECK_EQ(re.error(), "");
+  for (int i = 0; i < iters; i++) {
+    StringPiece sp1;
+    CHECK(RE2::PartialMatch(text, re, &sp1));
+  }
+}
+
+void EmptyPartialMatchPCRE(int n) {
+  PCRE re("");
+  for (int i = 0; i < n; i++) {
+    PCRE::PartialMatch("", re);
+  }
+}
+
+void EmptyPartialMatchRE2(int n) {
+  RE2 re("");
+  for (int i = 0; i < n; i++) {
+    RE2::PartialMatch("", re);
+  }
+}
+#ifdef USEPCRE
+BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+void SimplePartialMatchPCRE(int n) {
+  PCRE re("abcdefg");
+  for (int i = 0; i < n; i++) {
+    PCRE::PartialMatch("abcdefg", re);
+  }
+}
+
+void SimplePartialMatchRE2(int n) {
+  RE2 re("abcdefg");
+  for (int i = 0; i < n; i++) {
+    RE2::PartialMatch("abcdefg", re);
+  }
+}
+#ifdef USEPCRE
+BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+static string http_text =
+  "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
+  "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
+
+void HTTPPartialMatchPCRE(int n) {
+  StringPiece a;
+  PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (int i = 0; i < n; i++) {
+    PCRE::PartialMatch(http_text, re, &a);
+  }
+}
+
+void HTTPPartialMatchRE2(int n) {
+  StringPiece a;
+  RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (int i = 0; i < n; i++) {
+    RE2::PartialMatch(http_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+static string http_smalltext =
+  "GET /abc HTTP/1.1";
+
+void SmallHTTPPartialMatchPCRE(int n) {
+  StringPiece a;
+  PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (int i = 0; i < n; i++) {
+    PCRE::PartialMatch(http_text, re, &a);
+  }
+}
+
+void SmallHTTPPartialMatchRE2(int n) {
+  StringPiece a;
+  RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (int i = 0; i < n; i++) {
+    RE2::PartialMatch(http_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+void DotMatchPCRE(int n) {
+  StringPiece a;
+  PCRE re("(?-s)^(.+)");
+  for (int i = 0; i < n; i++) {
+    PCRE::PartialMatch(http_text, re, &a);
+  }
+}
+
+void DotMatchRE2(int n) {
+  StringPiece a;
+  RE2 re("(?-s)^(.+)");
+  for (int i = 0; i < n; i++) {
+    RE2::PartialMatch(http_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
+
+void ASCIIMatchPCRE(int n) {
+  StringPiece a;
+  PCRE re("(?-s)^([ -~]+)");
+  for (int i = 0; i < n; i++) {
+    PCRE::PartialMatch(http_text, re, &a);
+  }
+}
+
+void ASCIIMatchRE2(int n) {
+  StringPiece a;
+  RE2 re("(?-s)^([ -~]+)");
+  for (int i = 0; i < n; i++) {
+    RE2::PartialMatch(http_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
+
+void FullMatchPCRE(int iter, int n, const char *regexp) {
+  StopBenchmarkTiming();
+  string s;
+  MakeText(&s, n);
+  s += "ABCDEFGHIJ";
+  BenchmarkMemoryUsage();
+  PCRE re(regexp);
+  StartBenchmarkTiming();
+  for (int i = 0; i < iter; i++)
+    CHECK(PCRE::FullMatch(s, re));
+  SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
+}
+
+void FullMatchRE2(int iter, int n, const char *regexp) {
+  StopBenchmarkTiming();
+  string s;
+  MakeText(&s, n);
+  s += "ABCDEFGHIJ";
+  BenchmarkMemoryUsage();
+  RE2 re(regexp, RE2::Latin1);
+  StartBenchmarkTiming();
+  for (int i = 0; i < iter; i++)
+    CHECK(RE2::FullMatch(s, re));
+  SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
+}
+
+void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
+void FullMatch_DotStar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*"); }
+
+void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
+void FullMatch_DotStarDollar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*$"); }
+
+void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
+void FullMatch_DotStarCapture_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
+#endif
+BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2,  8, 2<<20);
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
+#endif
+BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2,  8, 2<<20);
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
+#endif
+BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2,  8, 2<<20);
+
+}  // namespace re2

diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc
new file mode 100644
index 0000000..cf2db11
--- /dev/null
+++ b/re2/testing/regexp_generator.cc

@@ -0,0 +1,264 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regular expression generator: generates all possible
+// regular expressions within parameters (see regexp_generator.h for details).
+
+// The regexp generator first generates a sequence of commands in a simple
+// postfix language.  Each command in the language is a string,
+// like "a" or "%s*" or "%s|%s".
+//
+// To evaluate a command, enough arguments are popped from the value stack to
+// plug into the %s slots.  Then the result is pushed onto the stack.
+// For example, the command sequence
+//      a b %s%s c
+// results in the stack
+//      ab c
+//
+// GeneratePostfix generates all possible command sequences.
+// Then RunPostfix turns each sequence into a regular expression
+// and passes the regexp to HandleRegexp.
+
+#include <string.h>
+#include <string>
+#include <stack>
+#include <vector>
+#include "util/test.h"
+#include "re2/testing/regexp_generator.h"
+
+namespace re2 {
+
+// Returns a vector of the egrep regexp operators.
+const vector<string>& RegexpGenerator::EgrepOps() {
+  static const char *ops[] = {
+    "%s%s",
+    "%s|%s",
+    "%s*",
+    "%s+",
+    "%s?",
+    "%s\\C*",
+  };
+  static vector<string> v(ops, ops + arraysize(ops));
+  return v;
+}
+
+RegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
+                                 const vector<string>& atoms,
+                                 const vector<string>& ops)
+    : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) {
+  // Degenerate case.
+  if (atoms_.size() == 0)
+    maxatoms_ = 0;
+  if (ops_.size() == 0)
+    maxops_ = 0;
+}
+
+// Generates all possible regular expressions (within the parameters),
+// calling HandleRegexp for each one.
+void RegexpGenerator::Generate() {
+  vector<string> postfix;
+  GeneratePostfix(&postfix, 0, 0, 0);
+}
+
+// Generates random regular expressions, calling HandleRegexp for each one.
+void RegexpGenerator::GenerateRandom(int32 seed, int n) {
+  ACMRandom acm(seed);
+  acm_ = &acm;
+
+  for (int i = 0; i < n; i++) {
+    vector<string> postfix;
+    GenerateRandomPostfix(&postfix, 0, 0, 0);
+  }
+
+  acm_ = NULL;
+}
+
+// Counts and returns the number of occurrences of "%s" in s.
+static int CountArgs(const string& s) {
+  const char *p = s.c_str();
+  int n = 0;
+  while ((p = strstr(p, "%s")) != NULL) {
+    p += 2;
+    n++;
+  }
+  return n;
+}
+
+// Generates all possible postfix command sequences.
+// Each sequence is handed off to RunPostfix to generate a regular expression.
+// The arguments are:
+//   post:  the current postfix sequence
+//   nstk:  the number of elements that would be on the stack after executing
+//          the sequence
+//   ops:   the number of operators used in the sequence
+//   atoms: the number of atoms used in the sequence
+// For example, if post were ["a", "b", "%s%s", "c"],
+// then nstk = 2, ops = 1, atoms = 3.
+//
+// The initial call should be GeneratePostfix([empty vector], 0, 0, 0).
+//
+void RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk,
+                                      int ops, int atoms) {
+  if (nstk == 1)
+    RunPostfix(*post);
+
+  // Early out: if used too many operators or can't
+  // get back down to a single expression on the stack
+  // using binary operators, give up.
+  if (ops + nstk - 1 > maxops_)
+    return;
+
+  // Add atoms if there is room.
+  if (atoms < maxatoms_) {
+    for (int i = 0; i < atoms_.size(); i++) {
+      post->push_back(atoms_[i]);
+      GeneratePostfix(post, nstk + 1, ops, atoms + 1);
+      post->pop_back();
+    }
+  }
+
+  // Add operators if there are enough arguments.
+  if (ops < maxops_) {
+    for (int i = 0; i < ops_.size(); i++) {
+      const string& fmt = ops_[i];
+      int nargs = CountArgs(fmt);
+      if (nargs <= nstk) {
+        post->push_back(fmt);
+        GeneratePostfix(post, nstk - nargs + 1, ops + 1, atoms);
+        post->pop_back();
+      }
+    }
+  }
+}
+
+// Generates a random postfix command sequence.
+// Stops and returns true once a single sequence has been generated.
+bool RegexpGenerator::GenerateRandomPostfix(vector<string> *post, int nstk,
+                                            int ops, int atoms) {
+  for (;;) {
+    // Stop if we get to a single element, but only sometimes.
+    if (nstk == 1 && acm_->Uniform(maxatoms_ + 1 - atoms) == 0) {
+      RunPostfix(*post);
+      return true;
+    }
+
+    // Early out: if used too many operators or can't
+    // get back down to a single expression on the stack
+    // using binary operators, give up.
+    if (ops + nstk - 1 > maxops_)
+      return false;
+
+    // Add operators if there are enough arguments.
+    if (ops < maxops_ && acm_->Uniform(2) == 0) {
+      const string& fmt = ops_[acm_->Uniform(ops_.size())];
+      int nargs = CountArgs(fmt);
+      if (nargs <= nstk) {
+        post->push_back(fmt);
+        bool ret = GenerateRandomPostfix(post, nstk - nargs + 1,
+                                         ops + 1, atoms);
+        post->pop_back();
+        if (ret)
+          return true;
+      }
+    }
+
+    // Add atoms if there is room.
+    if (atoms < maxatoms_ && acm_->Uniform(2) == 0) {
+      post->push_back(atoms_[acm_->Uniform(atoms_.size())]);
+      bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1);
+      post->pop_back();
+      if (ret)
+        return true;
+    }
+  }
+}
+
+// Interprets the postfix command sequence to create a regular expression
+// passed to HandleRegexp.  The results of operators like %s|%s are wrapped
+// in (?: ) to avoid needing to maintain a precedence table.
+void RegexpGenerator::RunPostfix(const vector<string>& post) {
+  stack<string> regexps;
+  for (int i = 0; i < post.size(); i++) {
+    switch (CountArgs(post[i])) {
+      default:
+        LOG(FATAL) << "Bad operator: " << post[i];
+      case 0:
+        regexps.push(post[i]);
+        break;
+      case 1: {
+        string a = regexps.top();
+        regexps.pop();
+        regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
+        break;
+      }
+      case 2: {
+        string b = regexps.top();
+        regexps.pop();
+        string a = regexps.top();
+        regexps.pop();
+        regexps.push("(?:" +
+                     StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
+                     ")");
+        break;
+      }
+    }
+  }
+
+  if (regexps.size() != 1) {
+    // Internal error - should never happen.
+    printf("Bad regexp program:\n");
+    for (int i = 0; i < post.size(); i++) {
+      printf("  %s\n", CEscape(post[i]).c_str());
+    }
+    printf("Stack after running program:\n");
+    while (!regexps.empty()) {
+      printf("  %s\n", CEscape(regexps.top()).c_str());
+      regexps.pop();
+    }
+    LOG(FATAL) << "Bad regexp program.";
+  }
+
+  HandleRegexp(regexps.top());
+  HandleRegexp("^(?:" + regexps.top() + ")$");
+  HandleRegexp("^(?:" + regexps.top() + ")");
+  HandleRegexp("(?:" + regexps.top() + ")$");
+}
+
+// Split s into an vector of strings, one for each UTF-8 character.
+vector<string> Explode(const StringPiece& s) {
+  vector<string> v;
+
+  for (const char *q = s.begin(); q < s.end(); ) {
+    const char* p = q;
+    Rune r;
+    q += chartorune(&r, q);
+    v.push_back(string(p, q - p));
+  }
+
+  return v;
+}
+
+// Split string everywhere a substring is found, returning
+// vector of pieces.
+vector<string> Split(const StringPiece& sep, const StringPiece& s) {
+  vector<string> v;
+
+  if (sep.size() == 0)
+    return Explode(s);
+
+  const char *p = s.begin();
+  for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
+    if (StringPiece(q, sep.size()) == sep) {
+      v.push_back(string(p, q - p));
+      p = q + sep.size();
+      q = p - 1;  // -1 for ++ in loop
+      continue;
+    }
+  }
+  if (p < s.end())
+    v.push_back(string(p, s.end() - p));
+  return v;
+}
+
+}  // namespace re2

diff --git a/re2/testing/regexp_generator.h b/re2/testing/regexp_generator.h
new file mode 100644
index 0000000..b4506f2
--- /dev/null
+++ b/re2/testing/regexp_generator.h

@@ -0,0 +1,70 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regular expression generator: generates all possible
+// regular expressions within given parameters (see below for details).
+
+#ifndef RE2_TESTING_REGEXP_GENERATOR_H__
+#define RE2_TESTING_REGEXP_GENERATOR_H__
+
+#include <string>
+#include <vector>
+#include "util/random.h"
+#include "util/util.h"
+#include "re2/stringpiece.h"
+
+namespace re2 {
+
+// Regular expression generator.
+//
+// Given a set of atom expressions like "a", "b", or "."
+// and operators like "%s*", generates all possible regular expressions
+// using at most maxbases base expressions and maxops operators.
+// For each such expression re, calls HandleRegexp(re).
+//
+// Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
+//
+class RegexpGenerator {
+ public:
+  RegexpGenerator(int maxatoms, int maxops, const vector<string>& atoms,
+                  const vector<string>& ops);
+  virtual ~RegexpGenerator() {}
+
+  // Generates all the regular expressions, calling HandleRegexp(re) for each.
+  void Generate();
+
+  // Generates n random regular expressions, calling HandleRegexp(re) for each.
+  void GenerateRandom(int32 seed, int n);
+
+  // Handles a regular expression.  Must be provided by subclass.
+  virtual void HandleRegexp(const string& regexp) = 0;
+
+  // The egrep regexp operators: * + ? | and concatenation.
+  static const vector<string>& EgrepOps();
+
+ private:
+  void RunPostfix(const vector<string>& post);
+  void GeneratePostfix(vector<string>* post, int nstk, int ops, int lits);
+  bool GenerateRandomPostfix(vector<string>* post, int nstk, int ops, int lits);
+
+  int maxatoms_;           // Maximum number of atoms allowed in expr.
+  int maxops_;             // Maximum number of ops allowed in expr.
+  vector<string> atoms_;   // Possible atoms.
+  vector<string> ops_;     // Possible ops.
+  ACMRandom* acm_;         // Random generator.
+  DISALLOW_EVIL_CONSTRUCTORS(RegexpGenerator);
+};
+
+// Helpers for preparing arguments to RegexpGenerator constructor.
+
+// Returns one string for each character in s.
+vector<string> Explode(const StringPiece& s);
+
+// Splits string everywhere sep is found, returning
+// vector of pieces.
+vector<string> Split(const StringPiece& sep, const StringPiece& s);
+
+}  // namespace re2
+
+#endif  // RE2_TESTING_REGEXP_GENERATOR_H__

diff --git a/re2/testing/regexp_test.cc b/re2/testing/regexp_test.cc
new file mode 100644
index 0000000..f317cbc
--- /dev/null
+++ b/re2/testing/regexp_test.cc

@@ -0,0 +1,81 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test parse.cc, dump.cc, and tostring.cc.
+
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+// Test that overflowed ref counts work.
+TEST(Regexp, BigRef) {
+  Regexp* re;
+  re = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
+  for (int i = 0; i < 100000; i++)
+    re->Incref();
+  for (int i = 0; i < 100000; i++)
+    re->Decref();
+  CHECK_EQ(re->Ref(), 1);
+  re->Decref();
+}
+
+// Test that very large Concats work.
+// Depends on overflowed ref counts working.
+TEST(Regexp, BigConcat) {
+  Regexp* x;
+  x = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
+  vector<Regexp*> v(90000, x);  // ToString bails out at 100000
+  for (int i = 0; i < v.size(); i++)
+    x->Incref();
+  CHECK_EQ(x->Ref(), 1 + v.size()) << x->Ref();
+  Regexp* re = Regexp::Concat(&v[0], v.size(), Regexp::NoParseFlags);
+  CHECK_EQ(re->ToString(), string(v.size(), 'x'));
+  re->Decref();
+  CHECK_EQ(x->Ref(), 1) << x->Ref();
+  x->Decref();
+}
+
+TEST(Regexp, NamedCaptures) {
+  Regexp* x;
+  RegexpStatus status;
+  x = Regexp::Parse(
+      "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
+  EXPECT_TRUE(status.ok());
+  EXPECT_EQ(4, x->NumCaptures());
+  const map<string, int>* have = x->NamedCaptures();
+  EXPECT_TRUE(have != NULL);
+  EXPECT_EQ(2, have->size());  // there are only two named groups in
+                               // the regexp: 'g1' and 'g2'.
+  map<string, int> want;
+  want["g1"] = 1;
+  want["g2"] = 3;
+  EXPECT_EQ(want, *have);
+  x->Decref();
+  delete have;
+}
+
+TEST(Regexp, CaptureNames) {
+  Regexp* x;
+  RegexpStatus status;
+  x = Regexp::Parse(
+      "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
+  EXPECT_TRUE(status.ok());
+  EXPECT_EQ(4, x->NumCaptures());
+  const map<int, string>* have = x->CaptureNames();
+  EXPECT_TRUE(have != NULL);
+  EXPECT_EQ(3, have->size());
+  map<int, string> want;
+  want[1] = "g1";
+  want[3] = "g2";
+  want[4] = "g1";
+
+  EXPECT_EQ(want, *have);
+  x->Decref();
+  delete have;
+}
+
+}  // namespace re2

diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc
new file mode 100644
index 0000000..1f0b216
--- /dev/null
+++ b/re2/testing/required_prefix_test.cc

@@ -0,0 +1,67 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "util/test.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct PrefixTest {
+  const char* regexp;
+  bool return_value;
+  const char* prefix;
+  bool foldcase;
+  const char* suffix;
+};
+
+static PrefixTest tests[] = {
+  // If the regexp is missing a ^, there's no required prefix.
+  { "abc", false },
+  { "", false },
+  { "(?m)^", false },
+
+  // If the regexp immediately goes into
+  // something not a literal match, there's no required prefix.
+  { "^(abc)", false },
+  { "^a*",  false },
+
+  // Otherwise, it should work.
+  { "^abc$", true, "abc", false, "(?-m:$)" },
+  { "^abc", "true", "abc", false, "" },
+  { "^(?i)abc", true, "abc", true, "" },
+  { "^abcd*", true, "abc", false, "d*" },
+  { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
+  { "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
+  { "^☺abc", true, "☺abc", false, "" },
+};
+
+TEST(RequiredPrefix, SimpleTests) {
+  for (int i = 0; i < arraysize(tests); i++) {
+    const PrefixTest& t = tests[i];
+    for (int j = 0; j < 2; j++) {
+      Regexp::ParseFlags flags = Regexp::LikePerl;
+      if (j == 0)
+        flags = flags | Regexp::Latin1;
+      Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
+      CHECK(re) << " " << t.regexp;
+      string p;
+      bool f = false;
+      Regexp* s = NULL;
+      CHECK_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
+        << " " << t.regexp << " " << (j==0 ? "latin1" : "utf") << " " << re->Dump();
+      if (t.return_value) {
+        CHECK_EQ(p, string(t.prefix))
+          << " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
+        CHECK_EQ(f, t.foldcase)
+          << " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
+        CHECK_EQ(s->ToString(), string(t.suffix))
+          << " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
+        s->Decref();
+      }
+      re->Decref();
+    }
+  }
+}
+
+}  // namespace re2

diff --git a/re2/testing/search_test.cc b/re2/testing/search_test.cc
new file mode 100644
index 0000000..3ab2ae3
--- /dev/null
+++ b/re2/testing/search_test.cc

@@ -0,0 +1,325 @@
+// Copyright 2006-2007 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdlib.h>
+#include <vector>
+#include "util/test.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+#include "re2/testing/tester.h"
+#include "re2/testing/exhaustive_tester.h"
+
+namespace re2 {
+
+struct RegexpTest {
+  const char* regexp;
+  const char* text;
+};
+
+RegexpTest simple_tests[] = {
+  { "a", "a" },
+  { "a", "zyzzyva" },
+  { "a+", "aa" },
+  { "(a+|b)+", "ab" },
+  { "ab|cd", "xabcdx" },
+  { "h.*od?", "hello\ngoodbye\n" },
+  { "h.*o", "hello\ngoodbye\n" },
+  { "h.*o", "goodbye\nhello\n" },
+  { "h.*o", "hello world" },
+  { "h.*o", "othello, world" },
+  { "[^\\s\\S]", "aaaaaaa" },
+  { "a", "aaaaaaa" },
+  { "a*", "aaaaaaa" },
+  { "a*", "" },
+  { "a*", NULL },
+  { "ab|cd", "xabcdx" },
+  { "a", "cab" },
+  { "a*b", "cab" },
+  { "((((((((((((((((((((x))))))))))))))))))))", "x" },
+  { "[abcd]", "xxxabcdxxx" },
+  { "[^x]", "xxxabcdxxx" },
+  { "[abcd]+", "xxxabcdxxx" },
+  { "[^x]+", "xxxabcdxxx" },
+  { "(fo|foo)", "fo" },
+  { "(foo|fo)", "foo" },
+
+  { "aa", "aA" },
+  { "a", "Aa" },
+  { "a", "A" },
+  { "ABC", "abc" },
+  { "abc", "XABCY" },
+  { "ABC", "xabcy" },
+
+  // Make sure ^ and $ work.
+  // The pathological cases didn't work
+  // in the original grep code.
+  { "foo|bar|[A-Z]", "foo" },
+  { "^(foo|bar|[A-Z])", "foo" },
+  { "(foo|bar|[A-Z])$", "foo\n" },
+  { "(foo|bar|[A-Z])$", "foo" },
+  { "^(foo|bar|[A-Z])$", "foo\n" },
+  { "^(foo|bar|[A-Z])$", "foo" },
+  { "^(foo|bar|[A-Z])$", "bar" },
+  { "^(foo|bar|[A-Z])$", "X" },
+  { "^(foo|bar|[A-Z])$", "XY" },
+  { "^(fo|foo)$", "fo" },
+  { "^(fo|foo)$", "foo" },
+  { "^^(fo|foo)$", "fo" },
+  { "^^(fo|foo)$", "foo" },
+  { "^$", "" },
+  { "^$", "x" },
+  { "^^$", "" },
+  { "^$$", "" },
+  { "^^$", "x" },
+  { "^$$", "x" },
+  { "^^$$", "" },
+  { "^^$$", "x" },
+  { "^^^^^^^^$$$$$$$$", "" },
+  { "^", "x" },
+  { "$", "x" },
+
+  // Word boundaries.
+  { "\\bfoo\\b", "nofoo foo that" },
+  { "a\\b", "faoa x" },
+  { "\\bbar", "bar x" },
+  { "\\bbar", "foo\nbar x" },
+  { "bar\\b", "foobar" },
+  { "bar\\b", "foobar\nxxx" },
+  { "(foo|bar|[A-Z])\\b", "foo" },
+  { "(foo|bar|[A-Z])\\b", "foo\n" },
+  { "\\b", "" },
+  { "\\b", "x" },
+  { "\\b(foo|bar|[A-Z])", "foo" },
+  { "\\b(foo|bar|[A-Z])\\b", "X" },
+  { "\\b(foo|bar|[A-Z])\\b", "XY" },
+  { "\\b(foo|bar|[A-Z])\\b", "bar" },
+  { "\\b(foo|bar|[A-Z])\\b", "foo" },
+  { "\\b(foo|bar|[A-Z])\\b", "foo\n" },
+  { "\\b(foo|bar|[A-Z])\\b", "ffoo bbar N x" },
+  { "\\b(fo|foo)\\b", "fo" },
+  { "\\b(fo|foo)\\b", "foo" },
+  { "\\b\\b", "" },
+  { "\\b\\b", "x" },
+  { "\\b$", "" },
+  { "\\b$", "x" },
+  { "\\b$", "y x" },
+  { "\\b.$", "x" },
+  { "^\\b(fo|foo)\\b", "fo" },
+  { "^\\b(fo|foo)\\b", "foo" },
+  { "^\\b", "" },
+  { "^\\b", "x" },
+  { "^\\b\\b", "" },
+  { "^\\b\\b", "x" },
+  { "^\\b$", "" },
+  { "^\\b$", "x" },
+  { "^\\b.$", "x" },
+  { "^\\b.\\b$", "x" },
+  { "^^^^^^^^\\b$$$$$$$", "" },
+  { "^^^^^^^^\\b.$$$$$$", "x" },
+  { "^^^^^^^^\\b$$$$$$$", "x" },
+
+  // Non-word boundaries.
+  { "\\Bfoo\\B", "n foo xfoox that" },
+  { "a\\B", "faoa x" },
+  { "\\Bbar", "bar x" },
+  { "\\Bbar", "foo\nbar x" },
+  { "bar\\B", "foobar" },
+  { "bar\\B", "foobar\nxxx" },
+  { "(foo|bar|[A-Z])\\B", "foox" },
+  { "(foo|bar|[A-Z])\\B", "foo\n" },
+  { "\\B", "" },
+  { "\\B", "x" },
+  { "\\B(foo|bar|[A-Z])", "foo" },
+  { "\\B(foo|bar|[A-Z])\\B", "xXy" },
+  { "\\B(foo|bar|[A-Z])\\B", "XY" },
+  { "\\B(foo|bar|[A-Z])\\B", "XYZ" },
+  { "\\B(foo|bar|[A-Z])\\B", "abara" },
+  { "\\B(foo|bar|[A-Z])\\B", "xfoo_" },
+  { "\\B(foo|bar|[A-Z])\\B", "xfoo\n" },
+  { "\\B(foo|bar|[A-Z])\\B", "foo bar vNx" },
+  { "\\B(fo|foo)\\B", "xfoo" },
+  { "\\B(foo|fo)\\B", "xfooo" },
+  { "\\B\\B", "" },
+  { "\\B\\B", "x" },
+  { "\\B$", "" },
+  { "\\B$", "x" },
+  { "\\B$", "y x" },
+  { "\\B.$", "x" },
+  { "^\\B(fo|foo)\\B", "fo" },
+  { "^\\B(fo|foo)\\B", "foo" },
+  { "^\\B", "" },
+  { "^\\B", "x" },
+  { "^\\B\\B", "" },
+  { "^\\B\\B", "x" },
+  { "^\\B$", "" },
+  { "^\\B$", "x" },
+  { "^\\B.$", "x" },
+  { "^\\B.\\B$", "x" },
+  { "^^^^^^^^\\B$$$$$$$", "" },
+  { "^^^^^^^^\\B.$$$$$$", "x" },
+  { "^^^^^^^^\\B$$$$$$$", "x" },
+
+  // PCRE uses only ASCII for \b computation.
+  // All non-ASCII are *not* word characters.
+  { "\\bx\\b", "x" },
+  { "\\bx\\b", "x>" },
+  { "\\bx\\b", "<x" },
+  { "\\bx\\b", "<x>" },
+  { "\\bx\\b", "ax" },
+  { "\\bx\\b", "xb" },
+  { "\\bx\\b", "axb" },
+  { "\\bx\\b", "«x" },
+  { "\\bx\\b", "x»" },
+  { "\\bx\\b", "«x»" },
+  { "\\bx\\b", "axb" },
+  { "\\bx\\b", "áxβ" },
+  { "\\Bx\\B", "axb" },
+  { "\\Bx\\B", "áxβ" },
+
+  // Weird boundary cases.
+  { "^$^$", "" },
+  { "^$^", "" },
+  { "$^$", "" },
+
+  { "^$^$", "x" },
+  { "^$^", "x" },
+  { "$^$", "x" },
+
+  { "^$^$", "x\ny" },
+  { "^$^", "x\ny" },
+  { "$^$", "x\ny" },
+
+  { "^$^$", "x\n\ny" },
+  { "^$^", "x\n\ny" },
+  { "$^$", "x\n\ny" },
+
+  { "^(foo\\$)$", "foo$bar" },
+  { "(foo\\$)", "foo$bar" },
+  { "^...$", "abc" },
+
+  // UTF-8
+  { "^\xe6\x9c\xac$", "\xe6\x9c\xac" },
+  { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+  { "^...$", ".\xe6\x9c\xac." },
+
+  { "^\\C\\C\\C$", "\xe6\x9c\xac" },
+  { "^\\C$", "\xe6\x9c\xac" },
+  { "^\\C\\C\\C$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+
+  // Latin1
+  { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+  { "^.........$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+  { "^...$", ".\xe6\x9c\xac." },
+  { "^.....$", ".\xe6\x9c\xac." },
+
+  // Perl v Posix
+  { "\\B(fo|foo)\\B", "xfooo" },
+  { "(fo|foo)", "foo" },
+
+  // Octal escapes.
+  { "\\141", "a" },
+  { "\\060", "0" },
+  { "\\0600", "00" },
+  { "\\608", "08" },
+  { "\\01", "\01" },
+  { "\\018", "\01" "8" },
+
+  // Hexadecimal escapes
+  { "\\x{61}", "a" },
+  { "\\x61", "a" },
+  { "\\x{00000061}", "a" },
+
+  // Unicode scripts.
+  { "\\p{Greek}+", "aαβb" },
+  { "\\P{Greek}+", "aαβb" },
+  { "\\p{^Greek}+", "aαβb" },
+  { "\\P{^Greek}+", "aαβb" },
+
+  // Unicode properties.  Nd is decimal number.  N is any number.
+  { "[^0-9]+",  "abc123" },
+  { "\\p{Nd}+", "abc123²³¼½¾₀₉" },
+  { "\\p{^Nd}+", "abc123²³¼½¾₀₉" },
+  { "\\P{Nd}+", "abc123²³¼½¾₀₉" },
+  { "\\P{^Nd}+", "abc123²³¼½¾₀₉" },
+  { "\\pN+", "abc123²³¼½¾₀₉" },
+  { "\\p{N}+", "abc123²³¼½¾₀₉" },
+  { "\\p{^N}+", "abc123²³¼½¾₀₉" },
+
+  { "\\p{Any}+", "abc123" },
+
+  // Character classes & case folding.
+  { "(?i)[@-A]+", "@AaB" },  // matches @Aa but not B
+  { "(?i)[A-Z]+", "aAzZ" },
+  { "(?i)[^\\\\]+", "Aa\\" },  // \\ is between A-Z and a-z -
+                               // splits the ranges in an interesting way.
+
+  // would like to use, but PCRE mishandles in full-match, non-greedy mode
+  // { "(?i)[\\\\]+", "Aa" },
+
+  { "(?i)[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
+
+  // Character classes & case folding.
+  { "[@-A]+", "@AaB" },
+  { "[A-Z]+", "aAzZ" },
+  { "[^\\\\]+", "Aa\\" },
+  { "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
+  
+  // Anchoring.  (^abc in aabcdef was a former bug)
+  // The tester checks for a match in the text and
+  // subpieces of the text with a byte removed on either side.
+  { "^abc", "abcdef" },
+  { "^abc", "aabcdef" },
+  { "^[ay]*[bx]+c", "abcdef" },
+  { "^[ay]*[bx]+c", "aabcdef" },
+  { "def$", "abcdef" },
+  { "def$", "abcdeff" },
+  { "d[ex][fy]$", "abcdef" },
+  { "d[ex][fy]$", "abcdeff" },
+  { "[dz][ex][fy]$", "abcdef" },
+  { "[dz][ex][fy]$", "abcdeff" },
+  { "(?m)^abc", "abcdef" },
+  { "(?m)^abc", "aabcdef" },
+  { "(?m)^[ay]*[bx]+c", "abcdef" },
+  { "(?m)^[ay]*[bx]+c", "aabcdef" },
+  { "(?m)def$", "abcdef" },
+  { "(?m)def$", "abcdeff" },
+  { "(?m)d[ex][fy]$", "abcdef" },
+  { "(?m)d[ex][fy]$", "abcdeff" },
+  { "(?m)[dz][ex][fy]$", "abcdef" },
+  { "(?m)[dz][ex][fy]$", "abcdeff" },
+  { "^", "a" },
+  { "^^", "a" },
+
+  // Context.
+  // The tester checks for a match in the text and
+  // subpieces of the text with a byte removed on either side.
+  { "a", "a" },
+  { "ab*", "a" },
+  { "a\\C*", "a" },
+  
+  // Former bugs.
+  { "a\\C*|ba\\C", "baba" },
+};
+
+TEST(Regexp, SearchTests) {
+  int failures = 0;
+  for (int i = 0; i < arraysize(simple_tests); i++) {
+    const RegexpTest& t = simple_tests[i];
+    if (!TestRegexpOnText(t.regexp, t.text))
+      failures++;
+
+#ifdef LOGGING
+    // Build a dummy ExhaustiveTest call that will trigger just
+    // this one test, so that we log the test case.
+    vector<string> atom, alpha, ops;
+    atom.push_back(StringPiece(t.regexp).as_string());
+    alpha.push_back(StringPiece(t.text).as_string());
+    ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", "");
+#endif
+
+  }
+  EXPECT_EQ(failures, 0);
+}
+
+}  // namespace re2

diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc
new file mode 100644
index 0000000..74058a4
--- /dev/null
+++ b/re2/testing/set_test.cc

@@ -0,0 +1,114 @@
+// Copyright 2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <vector>
+
+#include "util/test.h"
+#include "re2/re2.h"
+#include "re2/set.h"
+
+namespace re2 {
+
+TEST(Set, Unanchored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+  CHECK_EQ(s.Add("foo", NULL), 0);
+  CHECK_EQ(s.Add("(", NULL), -1);
+  CHECK_EQ(s.Add("bar", NULL), 1);
+
+  CHECK_EQ(s.Compile(), true);
+
+  vector<int> v;
+  CHECK_EQ(s.Match("foobar", &v), true);
+  CHECK_EQ(v.size(), 2);
+  CHECK_EQ(v[0], 0);
+  CHECK_EQ(v[1], 1);
+
+  v.clear();
+  CHECK_EQ(s.Match("fooba", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 0);
+
+  v.clear();
+  CHECK_EQ(s.Match("oobar", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 1);
+}
+
+TEST(Set, UnanchoredFactored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+  CHECK_EQ(s.Add("foo", NULL), 0);
+  CHECK_EQ(s.Add("(", NULL), -1);
+  CHECK_EQ(s.Add("foobar", NULL), 1);
+
+  CHECK_EQ(s.Compile(), true);
+
+  vector<int> v;
+  CHECK_EQ(s.Match("foobar", &v), true);
+  CHECK_EQ(v.size(), 2);
+  CHECK_EQ(v[0], 0);
+  CHECK_EQ(v[1], 1);
+
+  v.clear();
+  CHECK_EQ(s.Match("obarfoobaroo", &v), true);
+  CHECK_EQ(v.size(), 2);
+  CHECK_EQ(v[0], 0);
+  CHECK_EQ(v[1], 1);
+
+  v.clear();
+  CHECK_EQ(s.Match("fooba", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 0);
+
+  v.clear();
+  CHECK_EQ(s.Match("oobar", &v), false);
+  CHECK_EQ(v.size(), 0);
+}
+
+TEST(Set, UnanchoredDollar) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+  
+  CHECK_EQ(s.Add("foo$", NULL), 0);
+  CHECK_EQ(s.Compile(), true);
+  
+  vector<int> v;
+  CHECK_EQ(s.Match("foo", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 0);
+}
+
+TEST(Set, Anchored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+  CHECK_EQ(s.Add("foo", NULL), 0);
+  CHECK_EQ(s.Add("(", NULL), -1);
+  CHECK_EQ(s.Add("bar", NULL), 1);
+
+  CHECK_EQ(s.Compile(), true);
+
+  vector<int> v;
+  CHECK_EQ(s.Match("foobar", &v), false);
+  CHECK_EQ(v.size(), 0);
+
+  CHECK_EQ(s.Match("fooba", &v), false);
+  CHECK_EQ(v.size(), 0);
+
+  CHECK_EQ(s.Match("oobar", &v), false);
+  CHECK_EQ(v.size(), 0);
+
+  CHECK_EQ(s.Match("foo", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 0);
+
+  CHECK_EQ(s.Match("bar", &v), true);
+  CHECK_EQ(v.size(), 1);
+  CHECK_EQ(v[0], 1);
+
+}
+
+}  // namespace re2
+

diff --git a/re2/testing/simplify_test.cc b/re2/testing/simplify_test.cc
new file mode 100644
index 0000000..d54837c
--- /dev/null
+++ b/re2/testing/simplify_test.cc

@@ -0,0 +1,167 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test simplify.cc.
+
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct Test {
+  const char* regexp;
+  const char* simplified;
+};
+
+static Test tests[] = {
+  // Already-simple constructs
+  { "a", "a" },
+  { "ab", "ab" },
+  { "a|b", "[a-b]" },
+  { "ab|cd", "ab|cd" },
+  { "(ab)*", "(ab)*" },
+  { "(ab)+", "(ab)+" },
+  { "(ab)?", "(ab)?" },
+  { ".", "." },
+  { "^", "^" },
+  { "$", "$" },
+  { "[ac]", "[ac]" },
+  { "[^ac]", "[^ac]" },
+
+  // Posix character classes
+  { "[[:alnum:]]", "[0-9A-Za-z]" },
+  { "[[:alpha:]]", "[A-Za-z]" },
+  { "[[:blank:]]", "[\\t ]" },
+  { "[[:cntrl:]]", "[\\x00-\\x1f\\x7f]" },
+  { "[[:digit:]]", "[0-9]" },
+  { "[[:graph:]]", "[!-~]" },
+  { "[[:lower:]]", "[a-z]" },
+  { "[[:print:]]", "[ -~]" },
+  { "[[:punct:]]", "[!-/:-@\\[-`{-~]" },
+  { "[[:space:]]" , "[\\t-\\r ]" },
+  { "[[:upper:]]", "[A-Z]" },
+  { "[[:xdigit:]]", "[0-9A-Fa-f]" },
+
+  // Perl character classes
+  { "\\d", "[0-9]" },
+  { "\\s", "[\\t-\\n\\f-\\r ]" },
+  { "\\w", "[0-9A-Z_a-z]" },
+  { "\\D", "[^0-9]" },
+  { "\\S", "[^\\t-\\n\\f-\\r ]" },
+  { "\\W", "[^0-9A-Z_a-z]" },
+  { "[\\d]", "[0-9]" },
+  { "[\\s]", "[\\t-\\n\\f-\\r ]" },
+  { "[\\w]", "[0-9A-Z_a-z]" },
+  { "[\\D]", "[^0-9]" },
+  { "[\\S]", "[^\\t-\\n\\f-\\r ]" },
+  { "[\\W]", "[^0-9A-Z_a-z]" },
+
+  // Posix repetitions
+  { "a{1}", "a" },
+  { "a{2}", "aa" },
+  { "a{5}", "aaaaa" },
+  { "a{0,1}", "a?" },
+  // The next three are illegible because Simplify inserts (?:)
+  // parens instead of () parens to avoid creating extra
+  // captured subexpressions.  The comments show a version fewer parens.
+  { "(a){0,2}",                   "(?:(a)(a)?)?"     },  //       (aa?)?
+  { "(a){0,4}",       "(?:(a)(?:(a)(?:(a)(a)?)?)?)?" },  //   (a(a(aa?)?)?)?
+  { "(a){2,6}", "(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" },  // aa(a(a(aa?)?)?)?
+  { "a{0,2}",           "(?:aa?)?"     },  //       (aa?)?
+  { "a{0,4}",   "(?:a(?:a(?:aa?)?)?)?" },  //   (a(a(aa?)?)?)?
+  { "a{2,6}", "aa(?:a(?:a(?:aa?)?)?)?" },  // aa(a(a(aa?)?)?)?
+  { "a{0,}", "a*" },
+  { "a{1,}", "a+" },
+  { "a{2,}", "aa+" },
+  { "a{5,}", "aaaaa+" },
+
+  // Test that operators simplify their arguments.
+  // (Simplify used to not simplify arguments to a {} repeat.)
+  { "(?:a{1,}){1,}", "a+" },
+  { "(a{1,}b{1,})", "(a+b+)" },
+  { "a{1,}|b{1,}", "a+|b+" },
+  { "(?:a{1,})*", "(?:a+)*" },
+  { "(?:a{1,})+", "a+" },
+  { "(?:a{1,})?", "(?:a+)?" },
+  { "a{0}", "" },
+
+  // Character class simplification
+  { "[ab]", "[a-b]" },
+  { "[a-za-za-z]", "[a-z]" },
+  { "[A-Za-zA-Za-z]", "[A-Za-z]" },
+  { "[ABCDEFGH]", "[A-H]" },
+  { "[AB-CD-EF-GH]", "[A-H]" },
+  { "[W-ZP-XE-R]", "[E-Z]" },
+  { "[a-ee-gg-m]", "[a-m]" },
+  { "[a-ea-ha-m]", "[a-m]" },
+  { "[a-ma-ha-e]", "[a-m]" },
+  { "[a-zA-Z0-9 -~]", "[ -~]" },
+
+  // Empty character classes
+  { "[^[:cntrl:][:^cntrl:]]", "[^\\x00-\\x{10ffff}]" },
+
+  // Full character classes
+  { "[[:cntrl:][:^cntrl:]]", "." },
+
+  // Unicode case folding.
+  { "(?i)A", "[Aa]" },
+  { "(?i)a", "[Aa]" },
+  { "(?i)K", "[Kk\\x{212a}]" },
+  { "(?i)k", "[Kk\\x{212a}]" },
+  { "(?i)\\x{212a}", "[Kk\\x{212a}]" },
+  { "(?i)[a-z]", "[A-Za-z\\x{17f}\\x{212a}]" },
+  { "(?i)[\\x00-\\x{FFFD}]", "[\\x00-\\x{fffd}]" },
+  { "(?i)[\\x00-\\x{10ffff}]", "." },
+
+  // Empty string as a regular expression.
+  // Empty string must be preserved inside parens in order
+  // to make submatches work right, so these are less
+  // interesting than they used to be.  ToString inserts
+  // explicit (?:) in place of non-parenthesized empty strings,
+  // to make them easier to spot for other parsers.
+  { "(a|b|)", "([a-b]|(?:))" },
+  { "(|)", "()" },
+  { "a()", "a()" },
+  { "(()|())", "(()|())" },
+  { "(a|)", "(a|(?:))" },
+  { "ab()cd()", "ab()cd()" },
+  { "()", "()" },
+  { "()*", "()*" },
+  { "()+", "()+" },
+  { "()?" , "()?" },
+  { "(){0}", "" },
+  { "(){1}", "()" },
+  { "(){1,}", "()+" },
+  { "(){0,2}", "(?:()()?)?" },
+};
+
+TEST(TestSimplify, SimpleRegexps) {
+  for (int i = 0; i < arraysize(tests); i++) {
+    RegexpStatus status;
+    VLOG(1) << "Testing " << tests[i].regexp;
+    Regexp* re = Regexp::Parse(tests[i].regexp,
+                               Regexp::MatchNL | (Regexp::LikePerl &
+                                                  ~Regexp::OneLine),
+                               &status);
+    CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text();
+    Regexp* sre = re->Simplify();
+    CHECK(sre != NULL);
+
+    // Check that already-simple regexps don't allocate new ones.
+    if (strcmp(tests[i].regexp, tests[i].simplified) == 0) {
+      CHECK(re == sre) << " " << tests[i].regexp
+        << " " << re->ToString() << " " << sre->ToString();
+    }
+
+    EXPECT_EQ(tests[i].simplified, sre->ToString())
+      << " " << tests[i].regexp << " " << sre->Dump();
+
+    re->Decref();
+    sre->Decref();
+  }
+}
+
+}  // namespace re2

diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc
new file mode 100644
index 0000000..5be6d3e
--- /dev/null
+++ b/re2/testing/string_generator.cc

@@ -0,0 +1,113 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// String generator: generates all possible strings of up to
+// maxlen letters using the set of letters in alpha.
+// Fetch strings using a Java-like Next()/HasNext() interface.
+
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/testing/string_generator.h"
+
+namespace re2 {
+
+StringGenerator::StringGenerator(int maxlen, const vector<string>& alphabet)
+    : maxlen_(maxlen), alphabet_(alphabet),
+      generate_null_(false),
+      random_(false), nrandom_(0), acm_(NULL) {
+
+  // Degenerate case: no letters, no non-empty strings.
+  if (alphabet_.size() == 0)
+    maxlen_ = 0;
+
+  // Next() will return empty string (digits_ is empty).
+  hasnext_ = true;
+}
+
+StringGenerator::~StringGenerator() {
+  delete acm_;
+}
+
+// Resets the string generator state to the beginning.
+void StringGenerator::Reset() {
+  digits_.clear();
+  hasnext_ = true;
+  random_ = false;
+  nrandom_ = 0;
+  generate_null_ = false;
+}
+
+// Increments the big number in digits_, returning true if successful.
+// Returns false if all the numbers have been used.
+bool StringGenerator::IncrementDigits() {
+  // First try to increment the current number.
+  for (int i = digits_.size() - 1; i >= 0; i--) {
+    if (++digits_[i] < alphabet_.size())
+      return true;
+    digits_[i] = 0;
+  }
+
+  // If that failed, make a longer number.
+  if (digits_.size() < maxlen_) {
+    digits_.push_back(0);
+    return true;
+  }
+
+  return false;
+}
+
+// Generates random digits_, return true if successful.
+// Returns false if the random sequence is over.
+bool StringGenerator::RandomDigits() {
+  if (--nrandom_ <= 0)
+    return false;
+
+  // Pick length.
+  int len = acm_->Uniform(maxlen_+1);
+  digits_.resize(len);
+  for (int i = 0; i < len; i++)
+    digits_[i] = acm_->Uniform(alphabet_.size());
+  return true;
+}
+
+// Returns the next string in the iteration, which is the one
+// currently described by digits_.  Calls IncrementDigits
+// after computing the string, so that it knows the answer
+// for subsequent HasNext() calls.
+const StringPiece& StringGenerator::Next() {
+  CHECK(hasnext_);
+  if (generate_null_) {
+    generate_null_ = false;
+    sp_ = NULL;
+    return sp_;
+  }
+  s_.clear();
+  for (int i = 0; i < digits_.size(); i++) {
+    s_ += alphabet_[digits_[i]];
+  }
+  hasnext_ = random_ ? RandomDigits() : IncrementDigits();
+  sp_ = s_;
+  return sp_;
+}
+
+// Sets generator up to return n random strings.
+void StringGenerator::Random(int32 seed, int n) {
+  if (acm_ == NULL)
+    acm_ = new ACMRandom(seed);
+  else
+    acm_->Reset(seed);
+
+  random_ = true;
+  nrandom_ = n;
+  hasnext_ = nrandom_ > 0;
+}
+
+void StringGenerator::GenerateNULL() {
+  generate_null_ = true;
+  hasnext_ = true;
+}
+
+}  // namespace re2
+

diff --git a/re2/testing/string_generator.h b/re2/testing/string_generator.h
new file mode 100644
index 0000000..6a9ef42
--- /dev/null
+++ b/re2/testing/string_generator.h

@@ -0,0 +1,58 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// String generator: generates all possible strings of up to
+// maxlen letters using the set of letters in alpha.
+// Fetch strings using a Java-like Next()/HasNext() interface.
+
+#ifndef RE2_TESTING_STRING_GENERATOR_H__
+#define RE2_TESTING_STRING_GENERATOR_H__
+
+#include <string>
+#include <vector>
+#include "util/util.h"
+#include "util/random.h"
+#include "re2/stringpiece.h"
+
+namespace re2 {
+
+class StringGenerator {
+ public:
+  StringGenerator(int maxlen, const vector<string>& alphabet);
+  ~StringGenerator();
+  const StringPiece& Next();
+  bool HasNext() { return hasnext_; }
+
+  // Resets generator to start sequence over.
+  void Reset();
+
+  // Causes generator to emit random strings for next n calls to Next().
+  void Random(int32 seed, int n);
+
+  // Causes generator to emit a NULL as the next call.
+  void GenerateNULL();
+
+ private:
+  bool IncrementDigits();
+  bool RandomDigits();
+
+  // Global state.
+  int maxlen_;               // Maximum length string to generate.
+  vector<string> alphabet_;  // Alphabet, one string per letter.
+
+  // Iteration state.
+  StringPiece sp_;           // Last StringPiece returned by Next().
+  string s_;                 // String data in last StringPiece returned by Next().
+  bool hasnext_;             // Whether Next() can be called again.
+  vector<int> digits_;       // Alphabet indices for next string.
+  bool generate_null_;       // Whether to generate a NULL StringPiece next.
+  bool random_;              // Whether generated strings are random.
+  int nrandom_;              // Number of random strings left to generate.
+  ACMRandom* acm_;           // Random number generator
+  DISALLOW_EVIL_CONSTRUCTORS(StringGenerator);
+};
+
+}  // namespace re2
+
+#endif  // RE2_TESTING_STRING_GENERATOR_H__

diff --git a/re2/testing/string_generator_test.cc b/re2/testing/string_generator_test.cc
new file mode 100644
index 0000000..d13401a
--- /dev/null
+++ b/re2/testing/string_generator_test.cc

@@ -0,0 +1,109 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test StringGenerator.
+
+#include <stdlib.h>
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/testing/string_generator.h"
+#include "re2/testing/regexp_generator.h"
+
+namespace re2 {
+
+// Returns i to the e.
+static int64 IntegerPower(int i, int e) {
+  int64 p = 1;
+  while (e-- > 0)
+    p *= i;
+  return p;
+}
+
+// Checks that for given settings of the string generator:
+//   * it generates strings that are non-decreasing in length.
+//   * strings of the same length are sorted in alphabet order.
+//   * it doesn't generate the same string twice.
+//   * it generates the right number of strings.
+//
+// If all of these hold, the StringGenerator is behaving.
+// Assumes that the alphabet is sorted, so that the generated
+// strings can just be compared lexicographically.
+static void RunTest(int len, string alphabet, bool donull) {
+  StringGenerator g(len, Explode(alphabet));
+
+  int n = 0;
+  int last_l = -1;
+  string last_s;
+
+  if (donull) {
+    g.GenerateNULL();
+    EXPECT_TRUE(g.HasNext());
+    StringPiece sp = g.Next();
+    EXPECT_EQ(sp.data(), static_cast<const char*>(NULL));
+    EXPECT_EQ(sp.size(), 0);
+  }
+
+  while (g.HasNext()) {
+    string s = g.Next().as_string();
+    n++;
+
+    // Check that all characters in s appear in alphabet.
+    for (const char *p = s.c_str(); *p != '\0'; ) {
+      Rune r;
+      p += chartorune(&r, p);
+      EXPECT_TRUE(utfrune(alphabet.c_str(), r) != NULL);
+    }
+
+    // Check that string is properly ordered w.r.t. previous string.
+    int l = utflen(s.c_str());
+    EXPECT_LE(l, len);
+    if (last_l < l) {
+      last_l = l;
+    } else {
+      EXPECT_EQ(last_l, l);
+      EXPECT_LT(last_s, s);
+    }
+    last_s = s;
+  }
+
+  // Check total string count.
+  int64 m = 0;
+  int alpha = utflen(alphabet.c_str());
+  if (alpha == 0)  // Degenerate case.
+    len = 0;
+  for (int i = 0; i <= len; i++)
+    m += IntegerPower(alpha, i);
+  EXPECT_EQ(n, m);
+}
+
+TEST(StringGenerator, NoLength) {
+  RunTest(0, "abc", false);
+}
+
+TEST(StringGenerator, NoLengthNoAlphabet) {
+  RunTest(0, "", false);
+}
+
+TEST(StringGenerator, NoAlphabet) {
+  RunTest(5, "", false);
+}
+
+TEST(StringGenerator, Simple) {
+  RunTest(3, "abc", false);
+}
+
+TEST(StringGenerator, UTF8) {
+  RunTest(4, "abc\xE2\x98\xBA", false);
+}
+
+TEST(StringGenerator, GenNULL) {
+  RunTest(0, "abc", true);
+  RunTest(0, "", true);
+  RunTest(5, "", true);
+  RunTest(3, "abc", true);
+  RunTest(4, "abc\xE2\x98\xBA", true);
+}
+
+}  // namespace re2

diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
new file mode 100644
index 0000000..003dc5a
--- /dev/null
+++ b/re2/testing/tester.cc

@@ -0,0 +1,640 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regular expression engine tester -- test all the implementations against each other.
+
+#include "util/util.h"
+#include "util/flags.h"
+#include "re2/testing/tester.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+
+DEFINE_bool(dump_prog, false, "dump regexp program");
+DEFINE_bool(log_okay, false, "log successful runs");
+DEFINE_bool(dump_rprog, false, "dump reversed regexp program");
+
+DEFINE_int32(max_regexp_failures, 100,
+             "maximum number of regexp test failures (-1 = unlimited)");
+
+DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test");
+
+namespace re2 {
+
+enum {
+  kMaxSubmatch = 1+16,  // $0...$16
+};
+
+const char* engine_types[kEngineMax] = {
+  "Backtrack",
+  "NFA",
+  "DFA",
+  "DFA1",
+  "OnePass",
+  "BitState",
+  "RE2",
+  "RE2a",
+  "RE2b",
+  "PCRE",
+};
+
+// Returns the name string for the type t.
+static string EngineString(Engine t) {
+  if (t < 0 || t >= arraysize(engine_types) || engine_types[t] == NULL) {
+    return StringPrintf("type%d", static_cast<int>(t));
+  }
+  return engine_types[t];
+}
+
+// Returns bit mask of engines to use.
+static uint32 Engines() {
+  static uint32 cached_engines;
+  static bool did_parse;
+
+  if (did_parse)
+    return cached_engines;
+
+  if (FLAGS_regexp_engines.empty()) {
+    cached_engines = ~0;
+  } else {
+    for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
+      if (strstr(EngineString(i).c_str(), FLAGS_regexp_engines.c_str()))
+        cached_engines |= 1<<i;
+  }
+
+  if (cached_engines == 0)
+    LOG(INFO) << "Warning: no engines enabled.";
+  if (!UsingPCRE)
+    cached_engines &= ~(1<<kEnginePCRE);
+  for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
+    if (cached_engines & (1<<i))
+      LOG(INFO) << EngineString(i) << " enabled";
+  }
+  did_parse = true;
+  return cached_engines;
+}
+
+// The result of running a match.
+struct TestInstance::Result {
+  bool skipped;         // test skipped: wasn't applicable
+  bool matched;         // found a match
+  bool untrusted;       // don't really trust the answer
+  bool have_submatch;   // computed all submatch info
+  bool have_submatch0;  // computed just submatch[0]
+  StringPiece submatch[kMaxSubmatch];
+};
+
+typedef TestInstance::Result Result;
+
+// Formats a single capture range s in text in the form (a,b)
+// where a and b are the starting and ending offsets of s in text.
+static string FormatCapture(const StringPiece& text, const StringPiece& s) {
+  if (s.begin() == NULL)
+    return "(?,?)";
+  return StringPrintf("(%d,%d)",
+                      static_cast<int>(s.begin() - text.begin()),
+                      static_cast<int>(s.end() - text.begin()));
+}
+
+// Returns whether text contains non-ASCII (>= 0x80) bytes.
+static bool NonASCII(const StringPiece& text) {
+  for (int i = 0; i < text.size(); i++)
+    if ((uint8)text[i] >= 0x80)
+      return true;
+  return false;
+}
+
+// Returns string representation of match kind.
+static string FormatKind(Prog::MatchKind kind) {
+  switch (kind) {
+    case Prog::kFullMatch:
+      return "full match";
+    case Prog::kLongestMatch:
+      return "longest match";
+    case Prog::kFirstMatch:
+      return "first match";
+    case Prog::kManyMatch:
+      return "many match";
+  }
+  return "???";
+}
+
+// Returns string representation of anchor kind.
+static string FormatAnchor(Prog::Anchor anchor) {
+  switch (anchor) {
+    case Prog::kAnchored:
+      return "anchored";
+    case Prog::kUnanchored:
+      return "unanchored";
+  }
+  return "???";
+}
+
+struct ParseMode {
+  Regexp::ParseFlags parse_flags;
+  string desc;
+};
+
+static const Regexp::ParseFlags single_line =
+  Regexp::LikePerl;
+static const Regexp::ParseFlags multi_line =
+  static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine);
+
+static ParseMode parse_modes[] = {
+  { single_line,                   "single-line"          },
+  { single_line|Regexp::Latin1,    "single-line, latin1"  },
+  { multi_line,                    "multiline"            },
+  { multi_line|Regexp::NonGreedy,  "multiline, nongreedy" },
+  { multi_line|Regexp::Latin1,     "multiline, latin1"    },
+};
+
+static string FormatMode(Regexp::ParseFlags flags) {
+  for (int i = 0; i < arraysize(parse_modes); i++)
+    if (parse_modes[i].parse_flags == flags)
+      return parse_modes[i].desc;
+  return StringPrintf("%#x", static_cast<uint>(flags));
+}
+
+// Constructs and saves all the matching engines that
+// will be required for the given tests.
+TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
+                           Regexp::ParseFlags flags)
+  : regexp_str_(regexp_str),
+    kind_(kind),
+    flags_(flags),
+    error_(false),
+    regexp_(NULL),
+    num_captures_(0),
+    prog_(NULL),
+    rprog_(NULL),
+    re_(NULL),
+    re2_(NULL) {
+
+  VLOG(1) << CEscape(regexp_str);
+
+  // Compile regexp to prog.
+  // Always required - needed for backtracking (reference implementation).
+  RegexpStatus status;
+  regexp_ = Regexp::Parse(regexp_str, flags, &status);
+  if (regexp_ == NULL) {
+    LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
+              << " mode: " << FormatMode(flags);
+    error_ = true;
+    return;
+  }
+  num_captures_ = regexp_->NumCaptures();
+  prog_ = regexp_->CompileToProg(0);
+  if (prog_ == NULL) {
+    LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_);
+    error_ = true;
+    return;
+  }
+  if (FLAGS_dump_prog) {
+    LOG(INFO) << "Prog for "
+              << " regexp "
+              << CEscape(regexp_str_)
+              << " (" << FormatKind(kind_)
+              << ", " << FormatMode(flags_)
+              << ")\n"
+              << prog_->Dump();
+  }
+
+  // Compile regexp to reversed prog.  Only needed for DFA engines.
+  if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
+    rprog_ = regexp_->CompileToReverseProg(0);
+    if (rprog_ == NULL) {
+      LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_);
+      error_ = true;
+      return;
+    }
+    if (FLAGS_dump_rprog)
+      LOG(INFO) << rprog_->Dump();
+  }
+
+  // Create re string that will be used for RE and RE2.
+  string re = regexp_str.as_string();
+  // Accomodate flags.
+  // Regexp::Latin1 will be accomodated below.
+  if (!(flags & Regexp::OneLine))
+    re = "(?m)" + re;
+  if (flags & Regexp::NonGreedy)
+    re = "(?U)" + re;
+  if (flags & Regexp::DotNL)
+    re = "(?s)" + re;
+
+  // Compile regexp to RE2.
+  if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) {
+    RE2::Options options;
+    if (flags & Regexp::Latin1)
+      options.set_encoding(RE2::Options::EncodingLatin1);
+    if (kind_ == Prog::kLongestMatch)
+      options.set_longest_match(true);
+    re2_ = new RE2(re, options);
+    if (!re2_->error().empty()) {
+      LOG(INFO) << "Cannot RE2: " << CEscape(re);
+      error_ = true;
+      return;
+    }
+  }
+
+  // Compile regexp to RE.
+  // PCRE as exposed by the RE interface isn't always usable.
+  // 1. It disagrees about handling of empty-string reptitions
+  //    like matching (a*)* against "b".  PCRE treats the (a*) as
+  //    occurring once, while we treat it as occurring not at all.
+  // 2. It treats $ as this weird thing meaning end of string
+  //    or before the \n at the end of the string.
+  // 3. It doesn't implement POSIX leftmost-longest matching.
+  // MimicsPCRE() detects 1 and 2.
+  if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
+      kind_ != Prog::kLongestMatch) {
+    PCRE_Options o;
+    o.set_option(PCRE::UTF8);
+    if (flags & Regexp::Latin1)
+      o.set_option(PCRE::None);
+    // PCRE has interface bug keeping us from finding $0, so
+    // add one more layer of parens.
+    re_ = new PCRE("("+re+")", o);
+    if (!re_->error().empty()) {
+      LOG(INFO) << "Cannot PCRE: " << CEscape(re);
+      error_ = true;
+      return;
+    }
+  }
+}
+
+TestInstance::~TestInstance() {
+  if (regexp_)
+    regexp_->Decref();
+  delete prog_;
+  delete rprog_;
+  delete re_;
+  delete re2_;
+}
+
+// Runs a single search using the named engine type.
+// This interface hides all the irregularities of the various
+// engine interfaces from the rest of this file.
+void TestInstance::RunSearch(Engine type,
+                             const StringPiece& orig_text,
+                             const StringPiece& orig_context,
+                             Prog::Anchor anchor,
+                             Result *result) {
+  memset(result, 0, sizeof *result);
+  if (regexp_ == NULL) {
+    result->skipped = true;
+    return;
+  }
+  int nsubmatch = 1 + num_captures_;  // NumCaptures doesn't count $0
+  if (nsubmatch > kMaxSubmatch)
+    nsubmatch = kMaxSubmatch;
+
+  StringPiece text = orig_text;
+  StringPiece context = orig_context;
+
+  switch (type) {
+    default:
+      LOG(FATAL) << "Bad RunSearch type: " << (int)type;
+
+    case kEngineBacktrack:
+      if (prog_ == NULL) {
+        result->skipped = true;
+        break;
+      }
+      result->matched =
+        prog_->UnsafeSearchBacktrack(text, context, anchor, kind_,
+                                     result->submatch, nsubmatch);
+      result->have_submatch = true;
+      break;
+
+    case kEngineNFA:
+      if (prog_ == NULL) {
+        result->skipped = true;
+        break;
+      }
+      result->matched =
+        prog_->SearchNFA(text, context, anchor, kind_,
+                        result->submatch, nsubmatch);
+      result->have_submatch = true;
+      break;
+
+    case kEngineDFA:
+      if (prog_ == NULL) {
+        result->skipped = true;
+        break;
+      }
+      result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL,
+                                         &result->skipped, NULL);
+      break;
+
+    case kEngineDFA1:
+      if (prog_ == NULL || rprog_ == NULL) {
+        result->skipped = true;
+        break;
+      }
+      result->matched =
+        prog_->SearchDFA(text, context, anchor, kind_, result->submatch,
+                         &result->skipped, NULL);
+      // If anchored, no need for second run,
+      // but do it anyway to find more bugs.
+      if (result->matched) {
+        if (!rprog_->SearchDFA(result->submatch[0], context,
+                               Prog::kAnchored, Prog::kLongestMatch,
+                               result->submatch,
+                               &result->skipped, NULL)) {
+          LOG(ERROR) << "Reverse DFA inconsistency: " << CEscape(regexp_str_)
+                     << " on " << CEscape(text);
+          result->matched = false;
+        }
+      }
+      result->have_submatch0 = true;
+      break;
+
+    case kEngineOnePass:
+      if (prog_ == NULL ||
+          anchor == Prog::kUnanchored ||
+          !prog_->IsOnePass() ||
+          nsubmatch > Prog::kMaxOnePassCapture) {
+        result->skipped = true;
+        break;
+      }
+      result->matched = prog_->SearchOnePass(text, context, anchor, kind_,
+                                      result->submatch, nsubmatch);
+      result->have_submatch = true;
+      break;
+
+    case kEngineBitState:
+      if (prog_ == NULL) {
+        result->skipped = true;
+        break;
+      }
+      result->matched = prog_->SearchBitState(text, context, anchor, kind_,
+                                              result->submatch, nsubmatch);
+      result->have_submatch = true;
+      break;
+
+    case kEngineRE2:
+    case kEngineRE2a:
+    case kEngineRE2b: {
+      if (!re2_ || text.end() != context.end()) {
+        result->skipped = true;
+        break;
+      }
+
+      RE2::Anchor re_anchor;
+      if (anchor == Prog::kAnchored)
+        re_anchor = RE2::ANCHOR_START;
+      else
+        re_anchor = RE2::UNANCHORED;
+      if (kind_ == Prog::kFullMatch)
+        re_anchor = RE2::ANCHOR_BOTH;
+
+      result->matched = re2_->Match(context,
+                                    text.begin() - context.begin(),
+                                    text.end() - context.begin(),
+                                    re_anchor, result->submatch, nsubmatch);
+      result->have_submatch = nsubmatch > 0;
+      break;
+    }
+
+    case kEnginePCRE: {
+      if (!re_ || text.begin() != context.begin() ||
+          text.end() != context.end()) {
+        result->skipped = true;
+        break;
+      }
+
+      const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
+      PCRE::Arg *a = new PCRE::Arg[nsubmatch];
+      for (int i = 0; i < nsubmatch; i++) {
+        a[i] = PCRE::Arg(&result->submatch[i]);
+        argptr[i] = &a[i];
+      }
+      int consumed;
+      PCRE::Anchor pcre_anchor;
+      if (anchor == Prog::kAnchored)
+        pcre_anchor = PCRE::ANCHOR_START;
+      else
+        pcre_anchor = PCRE::UNANCHORED;
+      if (kind_ == Prog::kFullMatch)
+        pcre_anchor = PCRE::ANCHOR_BOTH;
+      re_->ClearHitLimit();
+      result->matched =
+        re_->DoMatch(text,
+                     pcre_anchor,
+                     &consumed,
+                     argptr, nsubmatch);
+      if (re_->HitLimit()) {
+        result->untrusted = true;
+        delete[] argptr;
+        delete[] a;
+        break;
+      }
+      result->have_submatch = true;
+
+      // Work around RE interface bug: PCRE returns -1 as the
+      // offsets for an unmatched subexpression, and RE should
+      // turn that into StringPiece(NULL) but in fact it uses
+      // StringPiece(text.begin() - 1, 0).  Oops.
+      for (int i = 0; i < nsubmatch; i++)
+        if (result->submatch[i].begin() == text.begin() - 1)
+          result->submatch[i] = NULL;
+      delete[] argptr;
+      delete[] a;
+      break;
+    }
+  }
+
+  if (!result->matched)
+    memset(result->submatch, 0, sizeof result->submatch);
+}
+
+// Checks whether r is okay given that correct is the right answer.
+// Specifically, r's answers have to match (but it doesn't have to
+// claim to have all the answers).
+static bool ResultOkay(const Result& r, const Result& correct) {
+  if (r.skipped)
+    return true;
+  if (r.matched != correct.matched)
+    return false;
+  if (r.have_submatch || r.have_submatch0) {
+    for (int i = 0; i < kMaxSubmatch; i++) {
+      if (correct.submatch[i].begin() != r.submatch[i].begin() ||
+          correct.submatch[i].size() != r.submatch[i].size())
+        return false;
+      if (!r.have_submatch)
+        break;
+    }
+  }
+  return true;
+}
+
+// Runs a single test.
+bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
+                           Prog::Anchor anchor) {
+  // Backtracking is the gold standard.
+  Result correct;
+  RunSearch(kEngineBacktrack, text, context, anchor, &correct);
+  if (correct.skipped) {
+    if (regexp_ == NULL)
+      return true;
+    LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
+               << " " << FormatMode(flags_);
+    return false;
+  }
+  VLOG(1) << "Try: regexp " << CEscape(regexp_str_)
+          << " text " << CEscape(text)
+          << " (" << FormatKind(kind_)
+          << ", " << FormatAnchor(anchor)
+          << ", " << FormatMode(flags_)
+          << ")";
+
+  // Compare the others.
+  bool all_okay = true;
+  for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) {
+    if (!(Engines() & (1<<i)))
+      continue;
+
+    Result r;
+    RunSearch(i, text, context, anchor, &r);
+    if (ResultOkay(r, correct)) {
+      if (FLAGS_log_okay)
+        LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
+      continue;
+    }
+
+    // We disagree with PCRE on the meaning of some Unicode matches.
+    // In particular, we treat all non-ASCII UTF-8 as word characters.
+    // We also treat "empty" character sets like [^\w\W] as being
+    // impossible to match, while PCRE apparently excludes some code
+    // points (e.g., 0x0080) from both \w and \W.
+    if (i == kEnginePCRE && NonASCII(text))
+      continue;
+
+    if (!r.untrusted)
+      all_okay = false;
+
+    LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,
+             context, anchor);
+    if (r.matched != correct.matched) {
+      if (r.matched) {
+        LOG(INFO) << "   Should not match (but does).";
+      } else {
+        LOG(INFO) << "   Should match (but does not).";
+        continue;
+      }
+    }
+    for (int i = 0; i < 1+num_captures_; i++) {
+      if (r.submatch[i].begin() != correct.submatch[i].begin() ||
+          r.submatch[i].end() != correct.submatch[i].end()) {
+        LOG(INFO) <<
+          StringPrintf("   $%d: should be %s is %s",
+                       i,
+                       FormatCapture(text, correct.submatch[i]).c_str(),
+                       FormatCapture(text, r.submatch[i]).c_str());
+      } else {
+        LOG(INFO) <<
+          StringPrintf("   $%d: %s ok", i,
+                       FormatCapture(text, r.submatch[i]).c_str());
+      }
+    }
+  }
+
+  if (!all_okay) {
+    if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0)
+      LOG(QFATAL) << "Too many regexp failures.";
+  }
+
+  return all_okay;
+}
+
+void TestInstance::LogMatch(const char* prefix, Engine e,
+                            const StringPiece& text, const StringPiece& context,
+                            Prog::Anchor anchor) {
+  LOG(INFO) << prefix
+    << EngineString(e)
+    << " regexp "
+    << CEscape(regexp_str_)
+    << " "
+    << CEscape(regexp_->ToString())
+    << " text "
+    << CEscape(text)
+    << " ("
+    << text.begin() - context.begin()
+    << ","
+    << text.end() - context.begin()
+    << ") of context "
+    << CEscape(context)
+    << " (" << FormatKind(kind_)
+    << ", " << FormatAnchor(anchor)
+    << ", " << FormatMode(flags_)
+    << ")";
+}
+
+static Prog::MatchKind kinds[] = {
+  Prog::kFirstMatch,
+  Prog::kLongestMatch,
+  Prog::kFullMatch,
+};
+
+// Test all possible match kinds and parse modes.
+Tester::Tester(const StringPiece& regexp) {
+  error_ = false;
+  for (int i = 0; i < arraysize(kinds); i++) {
+    for (int j = 0; j < arraysize(parse_modes); j++) {
+      TestInstance* t = new TestInstance(regexp, kinds[i],
+                                         parse_modes[j].parse_flags);
+      error_ |= t->error();
+      v_.push_back(t);
+    }
+  }
+}
+
+Tester::~Tester() {
+  for (int i = 0; i < v_.size(); i++)
+    delete v_[i];
+}
+
+bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
+                         Prog::Anchor anchor) {
+  bool okay = true;
+  for (int i = 0; i < v_.size(); i++)
+    okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
+  return okay;
+}
+
+static Prog::Anchor anchors[] = {
+  Prog::kAnchored,
+  Prog::kUnanchored
+};
+
+bool Tester::TestInput(const StringPiece& text) {
+  bool okay = TestInputInContext(text, text);
+  if (text.size() > 0) {
+    StringPiece sp;
+    sp = text;
+    sp.remove_prefix(1);
+    okay &= TestInputInContext(sp, text);
+    sp = text;
+    sp.remove_suffix(1);
+    okay &= TestInputInContext(sp, text);
+  }
+  return okay;
+}
+
+bool Tester::TestInputInContext(const StringPiece& text,
+                                const StringPiece& context) {
+  bool okay = true;
+  for (int i = 0; i < arraysize(anchors); i++)
+    okay &= TestCase(text, context, anchors[i]);
+  return okay;
+}
+
+bool TestRegexpOnText(const StringPiece& regexp,
+                      const StringPiece& text) {
+  Tester t(regexp);
+  return t.TestInput(text);
+}
+
+}  // namespace re2

diff --git a/re2/testing/tester.h b/re2/testing/tester.h
new file mode 100644
index 0000000..6e16e77
--- /dev/null
+++ b/re2/testing/tester.h

@@ -0,0 +1,121 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Comparative tester for regular expression matching.
+// Checks all implementations against each other.
+
+#ifndef RE2_TESTING_TESTER_H__
+#define RE2_TESTING_TESTER_H__
+
+#include "re2/stringpiece.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+#include "re2/re2.h"
+#include "util/pcre.h"
+
+namespace re2 {
+
+class Regexp;
+
+// All the supported regexp engines.
+enum Engine {
+  kEngineBacktrack = 0,    // Prog::BadSearchBacktrack
+  kEngineNFA,              // Prog::SearchNFA
+  kEngineDFA,              // Prog::SearchDFA, only ask whether it matched
+  kEngineDFA1,             // Prog::SearchDFA, ask for match[0]
+  kEngineOnePass,          // Prog::SearchOnePass, if applicable
+  kEngineBitState,         // Prog::SearchBitState
+  kEngineRE2,              // RE2, all submatches
+  kEngineRE2a,             // RE2, only ask for match[0]
+  kEngineRE2b,             // RE2, only ask whether it matched
+  kEnginePCRE,             // PCRE (util/pcre.h)
+
+  kEngineMax,
+};
+
+// Make normal math on the enum preserve the type.
+// By default, C++ doesn't define ++ on enum, and e+1 has type int.
+static inline void operator++(Engine& e, int unused) {
+  e = static_cast<Engine>(e+1);
+}
+
+static inline Engine operator+(Engine e, int i) {
+  return static_cast<Engine>(static_cast<int>(e)+i);
+}
+
+// A TestInstance caches per-regexp state for a given
+// regular expression in a given configuration
+// (UTF-8 vs Latin1, longest vs first match, etc.).
+class TestInstance {
+ public:
+  struct Result;
+
+  TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
+               Regexp::ParseFlags flags);
+  ~TestInstance();
+  Regexp::ParseFlags flags() { return flags_; }
+  bool error() { return error_; }
+
+  // Runs a single test case: search in text, which is in context,
+  // using the given anchoring.
+  bool RunCase(const StringPiece& text, const StringPiece& context,
+               Prog::Anchor anchor);
+
+ private:
+  // Runs a single search using the named engine type.
+  void RunSearch(Engine type,
+                 const StringPiece& text, const StringPiece& context,
+                 Prog::Anchor anchor,
+                 Result *result);
+
+  void LogMatch(const char* prefix, Engine e, const StringPiece& text,
+                const StringPiece& context, Prog::Anchor anchor);
+
+  const StringPiece& regexp_str_;   // regexp being tested
+  Prog::MatchKind kind_;            // kind of match
+  Regexp::ParseFlags flags_;        // flags for parsing regexp_str_
+  bool error_;                      // error during constructor?
+
+  Regexp* regexp_;                  // parsed regexp
+  int num_captures_;                // regexp_->NumCaptures() cached
+  Prog* prog_;                      // compiled program
+  Prog* rprog_;                     // compiled reverse program
+  PCRE* re_;                        // PCRE implementation
+  RE2* re2_;                        // RE2 implementation
+
+  DISALLOW_EVIL_CONSTRUCTORS(TestInstance);
+};
+
+// A group of TestInstances for all possible configurations.
+class Tester {
+ public:
+  explicit Tester(const StringPiece& regexp);
+  ~Tester();
+
+  bool error() { return error_; }
+
+  // Runs a single test case: search in text, which is in context,
+  // using the given anchoring.
+  bool TestCase(const StringPiece& text, const StringPiece& context,
+                Prog::Anchor anchor);
+
+  // Run TestCase(text, text, anchor) for all anchoring modes.
+  bool TestInput(const StringPiece& text);
+
+  // Run TestCase(text, context, anchor) for all anchoring modes.
+  bool TestInputInContext(const StringPiece& text, const StringPiece& context);
+
+ private:
+  bool error_;
+  vector<TestInstance*> v_;
+
+  DISALLOW_EVIL_CONSTRUCTORS(Tester);
+};
+
+// Run all possible tests using regexp and text.
+bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
+
+}  // namespace re2
+
+#endif  // RE2_TESTING_TESTER_H__

diff --git a/re2/testing/unicode_test.py b/re2/testing/unicode_test.py
new file mode 100755
index 0000000..a88a3ad
--- /dev/null
+++ b/re2/testing/unicode_test.py

@@ -0,0 +1,207 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2008 The RE2 Authors.  All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""Unittest for the util/regexp/re2/unicode.py module."""
+
+import os
+import StringIO
+from google3.pyglib import flags
+from google3.testing.pybase import googletest
+from google3.util.regexp.re2 import unicode
+
+_UNICODE_DIR = os.path.join(flags.FLAGS.test_srcdir, "google3", "third_party",
+                            "unicode", "ucd-5.1.0")
+
+
+class ConvertTest(googletest.TestCase):
+  """Test the conversion functions."""
+
+  def testUInt(self):
+    self.assertEquals(0x0000, unicode._UInt("0000"))
+    self.assertEquals(0x263A, unicode._UInt("263A"))
+    self.assertEquals(0x10FFFF, unicode._UInt("10FFFF"))
+    self.assertRaises(unicode.InputError, unicode._UInt, "263")
+    self.assertRaises(unicode.InputError, unicode._UInt, "263AAAA")
+    self.assertRaises(unicode.InputError, unicode._UInt, "110000")
+
+  def testURange(self):
+    self.assertEquals([1, 2, 3], unicode._URange("0001..0003"))
+    self.assertEquals([1], unicode._URange("0001"))
+    self.assertRaises(unicode.InputError, unicode._URange, "0001..0003..0005")
+    self.assertRaises(unicode.InputError, unicode._URange, "0003..0001")
+    self.assertRaises(unicode.InputError, unicode._URange, "0001..0001")
+
+  def testUStr(self):
+    self.assertEquals("0x263A", unicode._UStr(0x263a))
+    self.assertEquals("0x10FFFF", unicode._UStr(0x10FFFF))
+    self.assertRaises(unicode.InputError, unicode._UStr, 0x110000)
+    self.assertRaises(unicode.InputError, unicode._UStr, -1)
+
+
+_UNICODE_TABLE = """# Commented line, should be ignored.
+# The next line is blank and should be ignored.
+
+0041;Capital A;Line 1
+0061..007A;Lowercase;Line 2
+1F00;<Greek, First>;Ignored
+1FFE;<Greek, Last>;Line 3
+10FFFF;Runemax;Line 4
+0000;Zero;Line 5
+"""
+
+_BAD_TABLE1 = """
+111111;Not a code point;
+"""
+
+_BAD_TABLE2 = """
+0000;<Zero, First>;Missing <Zero, Last>
+"""
+
+_BAD_TABLE3 = """
+0010..0001;Bad range;
+"""
+
+
+class AbortError(Exception):
+  """Function should not have been called."""
+
+
+def Abort():
+  raise AbortError("Abort")
+
+
+def StringTable(s, n, f):
+  unicode.ReadUnicodeTable(StringIO.StringIO(s), n, f)
+
+
+class ReadUnicodeTableTest(googletest.TestCase):
+  """Test the ReadUnicodeTable function."""
+
+  def testSimpleTable(self):
+
+    ncall = [0]  # can't assign to ordinary int in DoLine
+
+    def DoLine(codes, fields):
+      self.assertEquals(3, len(fields))
+      ncall[0] += 1
+      self.assertEquals("Line %d" % (ncall[0],), fields[2])
+      if ncall[0] == 1:
+        self.assertEquals([0x0041], codes)
+        self.assertEquals("0041", fields[0])
+        self.assertEquals("Capital A", fields[1])
+      elif ncall[0] == 2:
+        self.assertEquals(range(0x0061, 0x007A + 1), codes)
+        self.assertEquals("0061..007A", fields[0])
+        self.assertEquals("Lowercase", fields[1])
+      elif ncall[0] == 3:
+        self.assertEquals(range(0x1F00, 0x1FFE + 1), codes)
+        self.assertEquals("1F00..1FFE", fields[0])
+        self.assertEquals("Greek", fields[1])
+      elif ncall[0] == 4:
+        self.assertEquals([0x10FFFF], codes)
+        self.assertEquals("10FFFF", fields[0])
+        self.assertEquals("Runemax", fields[1])
+      elif ncall[0] == 5:
+        self.assertEquals([0x0000], codes)
+        self.assertEquals("0000", fields[0])
+        self.assertEquals("Zero", fields[1])
+
+    StringTable(_UNICODE_TABLE, 3, DoLine)
+    self.assertEquals(5, ncall[0])
+
+  def testErrorTables(self):
+    self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 4, Abort)
+    self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 2, Abort)
+    self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE1, 3, Abort)
+    self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE2, 3, Abort)
+    self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE3, 3, Abort)
+
+
+class ParseContinueTest(googletest.TestCase):
+  """Test the ParseContinue function."""
+
+  def testParseContinue(self):
+    self.assertEquals(("Private Use", "First"),
+                      unicode._ParseContinue("<Private Use, First>"))
+    self.assertEquals(("Private Use", "Last"),
+                      unicode._ParseContinue("<Private Use, Last>"))
+    self.assertEquals(("<Private Use, Blah>", None),
+                      unicode._ParseContinue("<Private Use, Blah>"))
+
+
+class CaseGroupsTest(googletest.TestCase):
+  """Test the CaseGroups function (and the CaseFoldingReader)."""
+
+  def FindGroup(self, c):
+    if type(c) == str:
+      c = ord(c)
+    for g in self.groups:
+      if c in g:
+        return g
+    return None
+
+  def testCaseGroups(self):
+    self.groups = unicode.CaseGroups(unicode_dir=_UNICODE_DIR)
+    self.assertEquals([ord("A"), ord("a")], self.FindGroup("a"))
+    self.assertEquals(None, self.FindGroup("0"))
+
+
+class ScriptsTest(googletest.TestCase):
+  """Test the Scripts function (and the ScriptsReader)."""
+
+  def FindScript(self, c):
+    if type(c) == str:
+      c = ord(c)
+    for script, codes in self.scripts.items():
+      for code in codes:
+        if c == code:
+          return script
+    return None
+
+  def testScripts(self):
+    self.scripts = unicode.Scripts(unicode_dir=_UNICODE_DIR)
+    self.assertEquals("Latin", self.FindScript("a"))
+    self.assertEquals("Common", self.FindScript("0"))
+    self.assertEquals(None, self.FindScript(0xFFFE))
+
+
+class CategoriesTest(googletest.TestCase):
+  """Test the Categories function (and the UnicodeDataReader)."""
+
+  def FindCategory(self, c):
+    if type(c) == str:
+      c = ord(c)
+    short = None
+    for category, codes in self.categories.items():
+      for code in codes:
+        if code == c:
+          # prefer category Nd over N
+          if len(category) > 1:
+            return category
+          if short == None:
+            short = category
+    return short
+
+  def testCategories(self):
+    self.categories = unicode.Categories(unicode_dir=_UNICODE_DIR)
+    self.assertEquals("Ll", self.FindCategory("a"))
+    self.assertEquals("Nd", self.FindCategory("0"))
+    self.assertEquals("Lo", self.FindCategory(0xAD00))  # in First, Last range
+    self.assertEquals(None, self.FindCategory(0xFFFE))
+    self.assertEquals("Lo", self.FindCategory(0x8B5A))
+    self.assertEquals("Lo", self.FindCategory(0x6C38))
+    self.assertEquals("Lo", self.FindCategory(0x92D2))
+    self.assertTrue(ord("a") in self.categories["L"])
+    self.assertTrue(ord("0") in self.categories["N"])
+    self.assertTrue(0x8B5A in self.categories["L"])
+    self.assertTrue(0x6C38 in self.categories["L"])
+    self.assertTrue(0x92D2 in self.categories["L"])
+
+def main():
+  googletest.main()
+
+if __name__ == "__main__":
+  main()

diff --git a/re2/unicode.py b/re2/unicode.py
old mode 100755
new mode 100644


diff --git a/testinstall.cc b/testinstall.cc
index 40b7a8a..17edfb4 100644
--- a/testinstall.cc
+++ b/testinstall.cc

@@ -1,3 +1,7 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 #include <re2/re2.h>
 #include <re2/filtered_re2.h>
 #include <stdio.h>

diff --git a/util/logging.h b/util/logging.h
index c8f6604..4443f7c 100644
--- a/util/logging.h
+++ b/util/logging.h

@@ -48,17 +48,25 @@
 
 class LogMessage {
  public:
-  LogMessage(const char* file, int line) {
+  LogMessage(const char* file, int line) : flushed_(false) {
     stream() << file << ":" << line << ": ";
   }
-  ~LogMessage() {
+  void Flush() {
     stream() << "\n";
     string s = str_.str();
-    if(write(2, s.data(), s.size()) < 0) {}  // shut up gcc
+    int n = (int)s.size(); // shut up msvc
+    if(write(2, s.data(), n) < 0) {}  // shut up gcc
+    flushed_ = true;
+  }
+  ~LogMessage() {
+    if (!flushed_) {
+      Flush();
+    }
   }
   ostream& stream() { return str_; }
  
  private:
+  bool flushed_;
   std::ostringstream str_;
   DISALLOW_EVIL_CONSTRUCTORS(LogMessage);
 };
@@ -68,7 +76,7 @@
   LogMessageFatal(const char* file, int line)
     : LogMessage(file, line) { }
   ~LogMessageFatal() {
-    std::cerr << "\n";
+    Flush();
     abort();
   }
  private:

diff --git a/util/mutex.h b/util/mutex.h
index d2f69e7..9787bfb 100644
--- a/util/mutex.h
+++ b/util/mutex.h

@@ -72,7 +72,7 @@
   MutexType mutex_;
 
   // Catch the error of writing Mutex when intending MutexLock.
-  Mutex(Mutex *ignored) {}
+  Mutex(Mutex *ignored);
   // Disallow "evil" constructors
   Mutex(const Mutex&);
   void operator=(const Mutex&);
@@ -185,6 +185,27 @@
 #define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
 #define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
 
+// Provide safe way to declare and use global, linker-initialized mutex. Sigh.
+#ifdef HAVE_PTHREAD
+
+#define GLOBAL_MUTEX(name) \
+	static pthread_mutex_t (name) = PTHREAD_MUTEX_INITIALIZER
+#define GLOBAL_MUTEX_LOCK(name) \
+	pthread_mutex_lock(&(name))
+#define GLOBAL_MUTEX_UNLOCK(name) \
+	pthread_mutex_unlock(&(name))
+
+#else
+
+#define GLOBAL_MUTEX(name) \
+	static Mutex name
+#define GLOBAL_MUTEX_LOCK(name) \
+	name.Lock()
+#define GLOBAL_MUTEX_UNLOCK(name) \
+	name.Unlock()
+
+#endif
+
 }  // namespace re2
 
 #endif  /* #define RE2_UTIL_MUTEX_H_ */

diff --git a/util/sparse_array.h b/util/sparse_array.h
index c024bed..3e33f89 100644
--- a/util/sparse_array.h
+++ b/util/sparse_array.h

@@ -224,13 +224,14 @@
   int max_size_;
   int* sparse_to_dense_;
   vector<IndexValue> dense_;
+  bool valgrind_;
 
   DISALLOW_EVIL_CONSTRUCTORS(SparseArray);
 };
 
 template<typename Value>
 SparseArray<Value>::SparseArray()
-    : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_() {}
+    : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(), valgrind_(RunningOnValgrind()) {}
 
 // IndexValue pairs: exposed in SparseArray::iterator.
 template<typename Value>
@@ -272,7 +273,7 @@
     if (sparse_to_dense_) {
       memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
       // Don't need to zero the memory but appease Valgrind.
-      if (RunningOnValgrind()) {
+      if (valgrind_) {
         for (int i = max_size_; i < new_max_size; i++)
           a[i] = 0xababababU;
       }
@@ -417,9 +418,10 @@
 template<typename Value> SparseArray<Value>::SparseArray(int max_size) {
   max_size_ = max_size;
   sparse_to_dense_ = new int[max_size];
+  valgrind_ = RunningOnValgrind();
   dense_.resize(max_size);
   // Don't need to zero the new memory, but appease Valgrind.
-  if (RunningOnValgrind()) {
+  if (valgrind_) {
     for (int i = 0; i < max_size; i++) {
       sparse_to_dense_[i] = 0xababababU;
       dense_[i].index_ = 0xababababU;

diff --git a/util/sparse_set.h b/util/sparse_set.h
index 9cb5753..165dd09 100644
--- a/util/sparse_set.h
+++ b/util/sparse_set.h

@@ -54,15 +54,16 @@
 class SparseSet {
  public:
   SparseSet()
-    : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL) {}
+    : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL), valgrind_(RunningOnValgrind()) {}
 
   SparseSet(int max_size) {
     max_size_ = max_size;
     sparse_to_dense_ = new int[max_size];
     dense_ = new int[max_size];
+    valgrind_ = RunningOnValgrind();
     // Don't need to zero the memory, but do so anyway
     // to appease Valgrind.
-    if (RunningOnValgrind()) {
+    if (valgrind_) {
       for (int i = 0; i < max_size; i++) {
         dense_[i] = 0xababababU;
         sparse_to_dense_[i] = 0xababababU;
@@ -94,7 +95,7 @@
       int* a = new int[new_max_size];
       if (sparse_to_dense_) {
         memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
-        if (RunningOnValgrind()) {
+        if (valgrind_) {
           for (int i = max_size_; i < new_max_size; i++)
             a[i] = 0xababababU;
         }
@@ -105,7 +106,7 @@
       a = new int[new_max_size];
       if (dense_) {
         memmove(a, dense_, size_*sizeof a[0]);
-        if (RunningOnValgrind()) {
+        if (valgrind_) {
           for (int i = size_; i < new_max_size; i++)
             a[i] = 0xababababU;
         }
@@ -168,6 +169,7 @@
   int max_size_;
   int* sparse_to_dense_;
   int* dense_;
+  bool valgrind_;
 
   DISALLOW_EVIL_CONSTRUCTORS(SparseSet);
 };

diff --git a/util/util.h b/util/util.h
index bf897ae..463cbfb 100644
--- a/util/util.h
+++ b/util/util.h

@@ -14,6 +14,7 @@
 #include <stdarg.h>
 #include <sys/time.h>
 #include <time.h>
+#include <ctype.h>	// For isdigit, isalpha.
 
 // C++
 #include <vector>
@@ -22,7 +23,7 @@
 #include <iosfwd>
 #include <map>
 #include <stack>
-#include <iostream>
+#include <ostream>
 #include <utility>
 #include <set>
 
@@ -47,9 +48,8 @@
 #else
 #include <tr1/unordered_set>  // using gnustl
 #endif
-
 using std::tr1::unordered_set;
-
+ 
 #elif defined(__GNUC__) && !defined(USE_CXX0X)
 
 #include <tr1/unordered_set>
@@ -92,6 +92,7 @@
 
 // Fake lock annotations.  For real ones, see
 // http://code.google.com/p/data-race-test/
+#ifndef ANNOTATE_PUBLISH_MEMORY_RANGE
 #define ANNOTATE_PUBLISH_MEMORY_RANGE(a, b)
 #define ANNOTATE_IGNORE_WRITES_BEGIN()
 #define ANNOTATE_IGNORE_WRITES_END()
@@ -99,6 +100,8 @@
 #define NO_THREAD_SAFETY_ANALYSIS
 #define ANNOTATE_HAPPENS_BEFORE(x)
 #define ANNOTATE_HAPPENS_AFTER(x)
+#define ANNOTATE_UNPROTECTED_READ(x) (x)
+#endif
 
 class StringPiece;
 

diff --git a/util/valgrind.cc b/util/valgrind.cc
index 749bb59..46f804b 100644
--- a/util/valgrind.cc
+++ b/util/valgrind.cc

@@ -7,18 +7,12 @@
 
 namespace re2 {
 
-static bool checkValgrind() {
+int RunningOnValgrind() {
 #ifdef RUNNING_ON_VALGRIND
 	return RUNNING_ON_VALGRIND;
 #else
-	return false;
+	return 0;
 #endif
 }
 
-static const int valgrind = checkValgrind();
-
-int RunningOnValgrind() {
-  return valgrind;
-}
-
 }  // namespace re2
commit	0d4c52358a1af421705c54bd8a9fdd8a30558a2e	[log] [tgz]
author	Alexander Gutkin <agutkin@google.com>	Thu Feb 28 13:47:27 2013 +0000
committer	Alexander Gutkin <agutkin@google.com>	Thu Feb 28 17:22:58 2013 +0000
tree	c21f8d71d3fbbb42fc377b0ebad51b06ab5667b0
parent	c94c4501fe83e3ad77ce597b55bbbfbf533c10ee [diff]